-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_ddopg_cartpole.py
57 lines (41 loc) · 1.55 KB
/
run_ddopg_cartpole.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Copyright (C) 2019 Robert Bosch GmbH
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
@author: Andreas Doerr
"""
import os
import time
from garage.tf.envs import TfEnv
from garage.experiment import run_experiment
from garage.tf.policies import GaussianMLPPolicy
from garage.envs.box2d.cartpole_env import CartpoleEnv
from policy_gradients import config
from policy_gradients.algos import DDOPG
def run_task(*_):
env = TfEnv(CartpoleEnv())
policy_cls = GaussianMLPPolicy
policy_args = dict(env_spec=env.spec,
hidden_sizes=(16, 16),
learn_std=False)
algo = DDOPG(env,
policy_cls,
policy_args)
algo.train()
log_dir = os.path.join(config.RESULTS_DIR, "cartpole")
run_experiment(run_task,
n_parallel=1,
snapshot_mode="gap",
snapshot_gap=200,
seed=16,
log_dir=log_dir)