-
Notifications
You must be signed in to change notification settings - Fork 0
/
Maze_Simulation_TracesTest.py
142 lines (115 loc) · 5.15 KB
/
Maze_Simulation_TracesTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import gym
from gym import envs
from agents import TabularSarsaAgent
from agents import ApproximatedSarsaLambdaAgent
from agents import HAApproximatedSarsaLambdaAgent
import numpy as np
from model.maze_model import maze_model
import matplotlib.pyplot as plt
import pickle
from gym_maze.envs.maze_env import *
print(envs.registry.all())
env = gym.make("maze-sample-20x20-v0")
#Defined by Stefano Bromuri
episodes = 200
env._max_episode_steps = 10000
env.reset()
obs_mins = env.observation_space.low
obs_maxs = env.observation_space.high #[env.observation_space[0].max_value, env.observation_space[1].max_value]
discretizations = [20,20]
def test1():
config_heur = { "Strategy" : "TrueOnline",
"Pheromone_strategy": "hard",
"decrease_exploration" : False, #Mountain Car has a decaying eploration
"learning_rate" : 0.1,
"psi": 0.00001,
"rho": 0.1,
"eps": 0.1,
"nu":1, # Epsilon in epsilon greedy policies
"lambda":0.01,
"discount": 0.9,
"n_iter": env._max_episode_steps}
# rho, psi, nu, hard
# [0.3, 1e-05, 10]
# rho, psi, nu, soft
#[0.9, 1e-05, 5]
total_result = []
for j in range(100):
print j
#Change the agent here.
ag = ApproximatedSarsaLambdaAgent.ApproximatedSarsaLambdaAgent(obs_mins,obs_maxs,env.action_space,discretizations,[10], my_config=config_heur)
res = []
for i in range(episodes):
rend = False
ag.learn(env,rend)
res= ag.return_last_steps()
print res[-1]
total_result.append(res)
with open("Maze_simulation_true_online_no_heuristics.pkl", 'wb') as f:
pickle.dump(total_result, f)
def test2():
config_heur = { "Strategy" : "TrueOnline",
"Pheromone_strategy": "hard",
"decrease_exploration" : False, #Mountain Car has a decaying eploration
"learning_rate" : 0.1,
"model_based":True,
"model":maze_model(),
"psi": 0.01,
"rho": 0.99,
"eps": 0.1,
"nu":1, # Epsilon in epsilon greedy policies
"lambda":0.01,
"discount": 0.9,
"n_iter": env._max_episode_steps}
# rho, psi, nu, hard
# [0.3, 1e-05, 10]
# rho, psi, nu, soft
#[0.9, 1e-05, 5]
total_result = []
for j in range(100):
print j
#Change the agent here.
ag = HAApproximatedSarsaLambdaAgent.HAApproximatedSarsaLambdaAgent(obs_mins,obs_maxs,env.action_space,discretizations,[10], my_config=config_heur)
res = []
for i in range(episodes):
rend = False
ag.learn(env,rend)
res= ag.return_last_steps()
print res[-1]
total_result.append(res)
with open("Maze_simulation_true_online_heuristics_hard_model_based.pkl", 'wb') as f:
pickle.dump(total_result, f)
def test3():
config_heur = { "Strategy" : "TrueOnline",
"Pheromone_strategy": "soft",
"decrease_exploration" : False, #Mountain Car has a decaying eploration
"learning_rate" : 0.1,
"model_based":False,
"psi": 0.1,
"rho": 0.99,
"eps": 0.1,
"nu":1, # Epsilon in epsilon greedy policies
"lambda":0.01,
"discount": 0.9,
"n_iter": env._max_episode_steps}
# rho, psi, nu, hard
# [0.3, 1e-05, 10]
# rho, psi, nu, soft
#[0.9, 1e-05, 5]
total_result = []
for j in range(100):
print j
#Change the agent here.
ag = HAApproximatedSarsaLambdaAgent.HAApproximatedSarsaLambdaAgent(obs_mins,obs_maxs,env.action_space,discretizations,[10], my_config=config_heur)
res = []
for i in range(episodes):
rend = False
ag.learn(env,rend)
res= ag.return_last_steps()
print res[-1]
total_result.append(res)
with open("Maze_simulation_true_online_heuristics_hard.pkl", 'wb') as f:
pickle.dump(total_result, f)
#test1()
test2()
#test3()