forked from xiaowei-hu/pysc2-agents
-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_loop.py
37 lines (33 loc) · 1.36 KB
/
run_loop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
def run_loop(agents, env, max_frames=0):
"""A run loop to have agents and an environment interact."""
start_time = time.time()
try:
while True:
num_frames = 0
timesteps = env.reset()
for a in agents:
a.reset()
while True:
num_frames += 1
last_timesteps = timesteps
actions = [agent.step(timestep) for agent, timestep in zip(agents, timesteps)]
if agents[0].isMLSH():
subpolicies_selected = [agent.get_cur_Q_action(timestep) for agent,timestep in zip(agents,timesteps)]
timesteps = env.step(actions)
# Only for a single player!
is_done = (num_frames >= max_frames) or timesteps[0].last()
if agents[0].isMLSH():
yield [last_timesteps[0], actions[0],subpolicies_selected[0],is_done, timesteps[0]], is_done
else:
yield [last_timesteps[0], actions[0], timesteps[0]], is_done
if is_done:
break
except KeyboardInterrupt:
pass
finally:
elapsed_time = time.time() - start_time
print("Took %.3f seconds" % elapsed_time)