Update

vmoens · vmoens · commit 5c2d8a80b3c8 · 2025-10-22T21:06:06.000-07:00
[ghstack-poisoned]
diff --git a/test/test_collector.py b/test/test_collector.py
@@ -1512,6 +1512,7 @@ def create_env():
             cudagraph_policy=cudagraph,
             weight_sync_schemes={"policy": MultiProcessWeightSyncScheme()},
         )
+        assert "policy" in collector._weight_senders, collector._weight_senders.keys()
         try:
             # collect state_dict
             state_dict = collector.state_dict()
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -307,6 +307,19 @@ def _extract_weights_if_needed(self, weights: Any, model_id: str) -> Any:
             else None
         )
 
+        # If no weights were provided and a sync scheme exists, extract the latest
+        # weights from the current model using the scheme strategy (state_dict or tensordict).
+        # This ensures we don't return stale cached weights.
+        if weights is None and scheme is not None:
+            from torchrl.weight_update.weight_sync_schemes import (
+                _resolve_model,
+                WeightStrategy,
+            )
+
+            strategy = WeightStrategy(extract_as=scheme.strategy)
+            model = _resolve_model(self, model_id)
+            return strategy.extract_weights(model)
+
         if weights is None:
             if model_id == "policy" and hasattr(self, "policy_weights"):
                 return self.policy_weights
diff --git a/torchrl/envs/batched_envs.py b/torchrl/envs/batched_envs.py
@@ -2492,7 +2492,7 @@ def look_for_cuda(tensor, has_cuda=has_cuda):
             # Set event before sending non-tensor data so parent knows worker is done
             # The recv() call itself will provide synchronization for the pipe
             mp_event.set()
-            
+
             if _non_tensor_keys:
                 child_pipe.send(
                     ("non_tensor", next_td.select(*_non_tensor_keys, strict=False))
@@ -2534,7 +2534,7 @@ def look_for_cuda(tensor, has_cuda=has_cuda):
             # Set event before sending non-tensor data so parent knows worker is done
             # The recv() call itself will provide synchronization for the pipe
             mp_event.set()
-            
+
             if _non_tensor_keys:
                 ntd = root_next_td.select(*_non_tensor_keys)
                 ntd.set("next", td_next.select(*_non_tensor_keys))

Original file line number	Diff line number	Diff line change
`@@ -1512,6 +1512,7 @@ def create_env():`
`1512`	`1512`	`cudagraph_policy=cudagraph,`
`1513`	`1513`	`weight_sync_schemes={"policy": MultiProcessWeightSyncScheme()},`
`1514`	`1514`	`)`
	`1515`	`+ assert "policy" in collector._weight_senders, collector._weight_senders.keys()`
`1515`	`1516`	`try:`
`1516`	`1517`	`# collect state_dict`
`1517`	`1518`	`state_dict = collector.state_dict()`