cpnota
diff --git a/‎.github/workflows/python-package.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/python-package.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎all/agents/a2c.py‎
Lines changed: 1 addition & 1 deletion b/‎all/agents/a2c.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎all/agents/dqn.py‎
Lines changed: 3 additions & 7 deletions b/‎all/agents/dqn.py‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎all/agents/sac.py‎
Lines changed: 1 addition & 1 deletion b/‎all/agents/sac.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎all/agents/vqn.py‎
Lines changed: 6 additions & 1 deletion b/‎all/agents/vqn.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎all/agents/vsarsa.py‎
Lines changed: 2 additions & 3 deletions b/‎all/agents/vsarsa.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎all/bodies/atari.py‎
Lines changed: 3 additions & 2 deletions b/‎all/bodies/atari.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎all/bodies/vision.py‎
Lines changed: 2 additions & 3 deletions b/‎all/bodies/vision.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎all/environments/__init__.py‎
Lines changed: 6 additions & 1 deletion b/‎all/environments/__init__.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎all/environments/_vector_environment.py‎
Lines changed: 116 additions & 0 deletions b/‎all/environments/_vector_environment.py‎
Lines changed: 116 additions & 0 deletions
@@ -30,6 +30,7 @@ jobs:
         pip install torch==1.8.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
         make install
         AutoROM -v
+        python -m atari_py.import_roms $(python -c 'import site; print(site.getsitepackages()[0])')/multi_agent_ale_py/ROM
     - name: Lint code
       run: |
         make lint
 
@@ -101,7 +101,7 @@ def _make_buffer(self):
         )
 
 
-class A2CTestAgent(Agent):
+class A2CTestAgent(Agent, ParallelAgent):
     def __init__(self, features, policy):
         self.features = features
         self.policy = policy
 
@@ -81,12 +81,8 @@ def _should_train(self):
 
 
 class DQNTestAgent(Agent):
-    def __init__(self, q, n_actions, exploration=0.):
-        self.q = q
-        self.n_actions = n_actions
-        self.exploration = 0.001
+    def __init__(self, policy):
+        self.policy = policy
 
     def act(self, state):
-        if np.random.rand() < self.exploration:
-            return np.random.randint(0, self.n_actions)
-        return torch.argmax(self.q.eval(state)).item()
+        return self.policy.eval(state)
@@ -98,7 +98,7 @@ def _train(self):
 
             # adjust temperature
             temperature_grad = (_log_probs + self.entropy_target).mean()
-            self.temperature += self.lr_temperature * temperature_grad.detach()
+            self.temperature = max(0, self.temperature + self.lr_temperature * temperature_grad.detach())
 
             # additional debugging info
             self.writer.add_loss('entropy', -_log_probs.mean())
 
@@ -50,4 +50,9 @@ def _train(self, reward, next_state):
             self.q.reinforce(loss)
 
 
-VQNTestAgent = DQNTestAgent
+class VQNTestAgent(Agent, ParallelAgent):
+    def __init__(self, policy):
+        self.policy = policy
+
+    def act(self, state):
+        return self.policy.eval(state)
@@ -1,7 +1,6 @@
 from torch.nn.functional import mse_loss
-from ._agent import Agent
 from ._parallel_agent import ParallelAgent
-from .dqn import DQNTestAgent
+from .vqn import VQNTestAgent
 
 
 class VSarsa(ParallelAgent):
@@ -47,4 +46,4 @@ def _train(self, reward, next_state, next_action):
             self.q.reinforce(loss)
 
 
-VSarsaTestAgent = DQNTestAgent
+VSarsaTestAgent = VQNTestAgent
@@ -6,7 +6,8 @@
 
 class DeepmindAtariBody(Body):
     def __init__(self, agent, lazy_frames=False, episodic_lives=True, frame_stack=4, clip_rewards=True):
-        agent = FrameStack(agent, lazy=lazy_frames, size=frame_stack)
+        if frame_stack > 1:
+            agent = FrameStack(agent, lazy=lazy_frames, size=frame_stack)
         if clip_rewards:
             agent = ClipRewards(agent)
         if episodic_lives:
@@ -19,7 +20,7 @@ def process_state(self, state):
         if 'life_lost' not in state:
             return state
 
-        if len(state) == 1:
+        if len(state.shape) == 0:
             if state['life_lost']:
                 return state.update('mask', 0.)
             return state
 
@@ -69,10 +69,9 @@ def update(self, key, value):
         x = {}
         for k in self.keys():
             if not k == key:
-                x[k] = super().__getitem__(k)
+                x[k] = dict.__getitem__(self, k)
         x[key] = value
-        state = LazyState(x, device=self.device)
-        state.to_cache = self.to_cache
+        state = LazyState.from_state(x, x['observation'], self.to_cache)
         return state
 
     def to(self, device):
 
@@ -1,9 +1,12 @@
 from ._environment import Environment
-from._multiagent_environment import MultiagentEnvironment
+from ._multiagent_environment import MultiagentEnvironment
+from ._vector_environment import VectorEnvironment
 from .gym import GymEnvironment
 from .atari import AtariEnvironment
 from .multiagent_atari import MultiagentAtariEnv
 from .multiagent_pettingzoo import MultiagentPettingZooEnv
+from .duplicate_env import DuplicateEnvironment
+from .vector_env import GymVectorEnvironment
 from .pybullet import PybulletEnvironment
 
 __all__ = [
@@ -13,5 +16,7 @@
     "AtariEnvironment",
     "MultiagentAtariEnv",
     "MultiagentPettingZooEnv",
+    "GymVectorEnvironment",
+    "DuplicateEnvironment",
     "PybulletEnvironment",
 ]
@@ -0,0 +1,116 @@
+from abc import ABC, abstractmethod
+
+
+class VectorEnvironment(ABC):
+    """
+    A reinforcement learning vector Environment.
+
+    Similar to a regular RL environment except many environments are stacked together
+    in the observations, rewards, and dones, and the vector environment expects
+    an action to be given for each environment in step.
+
+    Also, since sub-environments are done at different times, you do not need to
+    manually reset the environments when they are done, rather the vector environment
+    automatically resets environments when they are complete.
+    """
+
+    @property
+    @abstractmethod
+    def name(self):
+        """
+        The name of the environment.
+        """
+
+    @abstractmethod
+    def reset(self):
+        """
+        Reset the environment and return a new initial state.
+
+        Returns
+        -------
+        State
+            The initial state for the next episode.
+        """
+
+    @abstractmethod
+    def step(self, action):
+        """
+        Apply an action and get the next state.
+
+        Parameters
+        ----------
+        action : Action
+            The action to apply at the current time step.
+
+        Returns
+        -------
+        all.environments.State
+            The State of the environment after the action is applied.
+            This State object includes both the done flag and any additional "info"
+        float
+            The reward achieved by the previous action
+        """
+
+    @abstractmethod
+    def close(self):
+        """
+        Clean up any extraneous environment objects.
+        """
+
+    @property
+    @abstractmethod
+    def state_array(self):
+        """
+        A StateArray of the Environments at the current timestep.
+        """
+
+    @property
+    @abstractmethod
+    def state_space(self):
+        """
+        The Space representing the range of observable states for each environment.
+
+        Returns
+        -------
+        Space
+            An object of type Space that represents possible states the agent may observe
+        """
+
+    @property
+    def observation_space(self):
+        """
+        Alias for Environment.state_space.
+
+        Returns
+        -------
+        Space
+            An object of type Space that represents possible states the agent may observe
+        """
+        return self.state_space
+
+    @property
+    @abstractmethod
+    def action_space(self):
+        """
+        The Space representing the range of possible actions for each environment.
+
+        Returns
+        -------
+        Space
+            An object of type Space that represents possible actions the agent may take
+        """
+
+    @property
+    @abstractmethod
+    def device(self):
+        """
+        The torch device the environment lives on.
+        """
+
+    @property
+    @abstractmethod
+    def num_envs(self):
+        """
+        Number of environments in vector. This is the number of actions step() expects as input
+        and the number of observations, dones, etc returned by the environment.
+        """
Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@ def _make_buffer(self):`
`101`	`101`	`)`
`102`	`102`
`103`	`103`
`104`		`-class A2CTestAgent(Agent):`
	`104`	`+class A2CTestAgent(Agent, ParallelAgent):`
`105`	`105`	`def __init__(self, features, policy):`
`106`	`106`	`self.features = features`
`107`	`107`	`self.policy = policy`