fix(nyz): fix ppof collect_data and deploy cuda mismatch bug

opendilab · Feb 16, 2023 · f1f0b55 · f1f0b55
1 parent 8b1f05b
commit f1f0b55
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 2 deletions.
diff --git a/ding/framework/middleware/collector.py b/ding/framework/middleware/collector.py
@@ -121,6 +121,7 @@ def __call__(self, ctx: "OnlineRLContext") -> None:
             timesteps = self.env.step(action)
             ctx.env_step += len(timesteps)
 
+            obs = obs.cpu()
             for i, timestep in enumerate(timesteps):
                 transition = self.policy.process_transition(obs[i], inference_output[i], timestep)
                 transition.collect_train_iter = ttorch.as_tensor([ctx.train_iter])

diff --git a/ding/policy/common_utils.py b/ding/policy/common_utils.py
@@ -49,14 +49,16 @@ def _forward(obs):
     return _forward
 
 
-def single_env_forward_wrapper_ttorch(forward_fn):
+def single_env_forward_wrapper_ttorch(forward_fn, cuda=True):
 
     def _forward(obs):
         # unsqueeze means add batch dim, i.e. (O, ) -> (1, O)
         obs = ttorch.as_tensor(obs).unsqueeze(0)
+        if cuda and torch.cuda.is_available():
+            obs = obs.cuda()
         action = forward_fn(obs).action
         # squeeze means delete batch dim, i.e. (1, A) -> (A, )
-        action = action.squeeze(0).numpy()
+        action = action.squeeze(0).cpu().numpy()
         return action
 
     return _forward