Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WiP] Reproducible on- and off-policy sampling #2185

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/garage/_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ class Environment(abc.ABC):
+-----------------------+
| visualize() |
+-----------------------+
| seed() |
+-----------------------+
| close() |
+-----------------------+

Expand Down Expand Up @@ -350,6 +352,16 @@ def _validate_render_mode(self, mode):
'got render mode {} instead.'.format(
self.render_modes, mode))

@abc.abstractmethod
def seed(self, seed):
"""Sets environment seeds.

This method should set all seeds specific to the environment library.

Args:
seed (int): The seed value to set
"""

def __del__(self):
"""Environment destructor."""
self.close()
Expand Down Expand Up @@ -452,6 +464,13 @@ def visualize(self):
"""Creates a visualization of the wrapped environment."""
self._env.visualize()

def seed(self, seed):
MkuuWaUjinga marked this conversation as resolved.
Show resolved Hide resolved
"""Sets all environment seeds.

Args:
seed (int): The seed value to set
"""

def close(self):
"""Close the wrapped env."""
self._env.close()
Expand Down
7 changes: 7 additions & 0 deletions src/garage/envs/dm_control/dm_control_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,13 @@ def visualize(self):
self._viewer = DmControlViewer(title=title)
self._viewer.launch(self._env)

def seed(self, seed):
"""Sets all environment seeds.

Args:
seed (int): The seed value to set
MkuuWaUjinga marked this conversation as resolved.
Show resolved Hide resolved
"""

def close(self):
"""Close the environment."""
if self._viewer:
Expand Down
7 changes: 7 additions & 0 deletions src/garage/envs/grid_world_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,13 @@ def render(self, mode):
def visualize(self):
"""Creates a visualization of the environment."""

def seed(self, seed):
"""Sets all environment seeds.

Args:
seed (int): The seed value to set
"""

def close(self):
"""Close the env."""

Expand Down
9 changes: 9 additions & 0 deletions src/garage/envs/gym_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,15 @@ def visualize(self):
self._env.render(mode='human')
self._visualize = True

def seed(self, seed):
"""Sets all environment seeds.

Args:
seed (int): The seed value to set
"""
self._env.seed(seed)
self.action_space.seed(seed)

def close(self):
"""Close the wrapped env."""
self._close_viewer_window()
Expand Down
7 changes: 7 additions & 0 deletions src/garage/envs/metaworld_set_task_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,13 @@ def visualize(self):
"""Creates a visualization of the wrapped environment."""
self._current_env.visualize()

def seed(self, seed):
"""Sets all environment seeds.

Args:
seed (int): The seed value to set
"""

def close(self):
"""Close the wrapped env."""
for env in self._envs.values():
Expand Down
8 changes: 8 additions & 0 deletions src/garage/envs/point_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,14 @@ def visualize(self):
def close(self):
"""Close the env."""

def seed(self, seed):
"""Sets all environment seeds.

Args:
seed (int): The seed value to set

"""

# pylint: disable=no-self-use
def sample_tasks(self, num_tasks):
"""Sample a list of `num_tasks` tasks.
Expand Down
2 changes: 2 additions & 0 deletions src/garage/sampler/_functions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Functions used by multiple Samplers or Workers."""
from garage import Environment
from garage.experiment import deterministic
from garage.sampler.env_update import EnvUpdate


Expand Down Expand Up @@ -33,6 +34,7 @@ def _apply_env_update(old_env, env_update):
elif isinstance(env_update, Environment):
if old_env is not None:
old_env.close()
env_update.seed(deterministic.get_seed())
return env_update, True
else:
raise TypeError('Unknown environment update type.')
Expand Down
35 changes: 34 additions & 1 deletion tests/garage/sampler/test_local_sampler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from garage.envs import PointEnv
from garage.envs import GymEnv, PointEnv
from garage.experiment.task_sampler import SetTaskSampler
from garage.np.policies import FixedPolicy, ScriptedPolicy
from garage.sampler import LocalSampler, WorkerFactory
Expand Down Expand Up @@ -103,3 +103,36 @@ def test_no_seed():
sampler = LocalSampler.from_worker_factory(workers, policy, env)
episodes = sampler.obtain_samples(0, 160, policy)
assert sum(episodes.lengths) >= 160


def test_deterministic_on_policy_sampling():
max_episode_length = 1
env1 = GymEnv('LunarLander-v2')
env2 = GymEnv('LunarLander-v2')
# Fix the action sequence
env1.action_space.seed(10)
env2.action_space.seed(10)
policy1 = FixedPolicy(env1.spec,
scripted_actions=[
env1.action_space.sample()
for _ in range(max_episode_length)
])
policy2 = FixedPolicy(env2.spec,
scripted_actions=[
env2.action_space.sample()
for _ in range(max_episode_length)
])
n_workers = 1
worker1 = WorkerFactory(seed=10,
max_episode_length=max_episode_length,
n_workers=n_workers)
worker2 = WorkerFactory(seed=10,
max_episode_length=max_episode_length,
n_workers=n_workers)
sampler1 = LocalSampler.from_worker_factory(worker1, policy1, env1)
sampler2 = LocalSampler.from_worker_factory(worker2, policy2, env2)
episodes1 = sampler1.obtain_samples(0, 1, policy1)
episodes2 = sampler2.obtain_samples(0, 1, policy2)
assert np.array_equal(episodes1.observations, episodes2.observations)
assert np.array_equal(episodes1.next_observations,
episodes2.next_observations)