From d012609d53a35789955367ef0a2efe8cf1784378 Mon Sep 17 00:00:00 2001 From: quantumiracle <1402434478@qq.com> Date: Sun, 23 May 2021 23:10:09 +0800 Subject: [PATCH 01/11] add dppo distributed --- dppo_clip_distributed/__init__.py | 0 dppo_clip_distributed/dppo_global_manager.py | 62 ++++++++ dppo_clip_distributed/dppo_infer_server.py | 159 +++++++++++++++++++ dppo_clip_distributed/dppo_learner.py | 155 ++++++++++++++++++ dppo_clip_distributed/dppo_sampler.py | 38 +++++ dppo_clip_distributed/queue_data.json | Bin 0 -> 94690 bytes dppo_clip_distributed/run_dppo_clip.py | 73 +++++++++ rlzoo/common/policy_networks.py | 25 --- 8 files changed, 487 insertions(+), 25 deletions(-) create mode 100644 dppo_clip_distributed/__init__.py create mode 100644 dppo_clip_distributed/dppo_global_manager.py create mode 100644 dppo_clip_distributed/dppo_infer_server.py create mode 100644 dppo_clip_distributed/dppo_learner.py create mode 100644 dppo_clip_distributed/dppo_sampler.py create mode 100644 dppo_clip_distributed/queue_data.json create mode 100644 dppo_clip_distributed/run_dppo_clip.py diff --git a/dppo_clip_distributed/__init__.py b/dppo_clip_distributed/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dppo_clip_distributed/dppo_global_manager.py b/dppo_clip_distributed/dppo_global_manager.py new file mode 100644 index 0000000..5a9dc1e --- /dev/null +++ b/dppo_clip_distributed/dppo_global_manager.py @@ -0,0 +1,62 @@ +from rlzoo.common.policy_networks import StochasticPolicyNetwork +from rlzoo.common.value_networks import ValueNetwork +import numpy as np +from rlzoo.common.utils import * +import pickle + + +class DPPOGlobalManager: + def __init__(self, net_builder, opt_builder, param_pipe_list, name='DPPO_CLIP'): + networks = net_builder() + optimizers_list = opt_builder() + assert len(networks) == 2 + assert len(optimizers_list) == 2 + self.critic, self.actor = networks + assert isinstance(self.critic, ValueNetwork) + assert isinstance(self.actor, StochasticPolicyNetwork) + self.critic_opt, self.actor_opt = optimizers_list + self.param_pipe_list = param_pipe_list + self.name = name + + def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, + max_update_num=1000, update_interval=100, save_interval=10, env_name='CartPole-v0'): + update_cnt = 0 + while update_cnt < max_update_num: + batch_a_grad, batch_c_grad = [], [] + for _ in range(update_interval): + a_grad, c_grad = grad_queue.get() + batch_a_grad.append(a_grad) + batch_c_grad.append(c_grad) + + # update + should_update.set() + self.update_model(batch_a_grad, batch_c_grad) + self.send_param() + + traj_queue.empty() + for q in grad_queue: q.empty() + + barrier.wait() + should_update.clear() + + update_cnt += 1 + if update_cnt // save_interval == 0: + self.save_model(env_name) + should_stop.set() + + def send_param(self): + params = self.critic.trainable_weights + self.actor.trainable_weights + for pipe_connection in self.param_pipe_list: + pipe_connection.send(params) + + def update_model(self, batch_a_grad, batch_c_grad): + a_grad = np.mean(batch_a_grad, axis=0) + c_grad = np.mean(batch_c_grad, axis=0) + self.actor_opt.apply_gradients(zip(a_grad, self.actor.trainable_weights)) + self.critic_opt.apply_gradients(zip(c_grad, self.critic.trainable_weights)) + + def save_model(self, env_name): + save_model(self.actor, 'actor', self.name, env_name) + save_model(self.critic, 'critic', self.name, env_name) + + # todo load model diff --git a/dppo_clip_distributed/dppo_infer_server.py b/dppo_clip_distributed/dppo_infer_server.py new file mode 100644 index 0000000..46c804a --- /dev/null +++ b/dppo_clip_distributed/dppo_infer_server.py @@ -0,0 +1,159 @@ +from rlzoo.common.policy_networks import StochasticPolicyNetwork +from rlzoo.common.value_networks import ValueNetwork +import numpy as np +import copy +import pickle + + +class DPPOInferServer: + def __init__(self, net_builder, net_param_pipe, n_step=1000, gamma=0.9): + networks = net_builder() + assert len(networks) == 2 + self.critic, self.actor = networks + assert isinstance(self.critic, ValueNetwork) + assert isinstance(self.actor, StochasticPolicyNetwork) + self.state_buffer = [] + self.action_buffer = [] + self.reward_buffer = [] + self.done_buffer = [] + self.logp_buffer = [] + self.gamma = gamma + self.n_step = n_step + self.net_param_pipe = net_param_pipe + + def _cal_adv(self): + dc_r = self._cal_discounted_r() + s_shape = np.shape(self.state_buffer) + s = np.reshape(self.state_buffer, [-1, s_shape[-1]]) + v = self.critic(s).numpy().reshape([-1, s_shape[1]]) + dc_r = np.array(dc_r, dtype=np.float32) + advs = dc_r - v + advs = (advs - np.mean(advs)) / (np.std(advs) + 1e-8) + return advs + + def _get_v(self, s): + return np.reshape(self.critic(s.astype(np.float32)), [-1]) + + def _cal_discounted_r(self): + discounted_r = np.zeros_like(self.reward_buffer) # compute discounted reward + v_s_ = self._get_v(self.state_buffer[-1]) * (1 - self.done_buffer[-1]) + for i in range(len(self.reward_buffer) - 1, -1, -1): + discounted_r[i] = v_s_ = self.reward_buffer[i] + (1 - self.done_buffer[i]) * self.gamma * v_s_ + return discounted_r + + def _get_traj(self): + traj = [] + for element in [self.state_buffer, self.action_buffer, self.reward_buffer, self.done_buffer, self._cal_adv(), + self.logp_buffer]: + axes = list(range(len(np.shape(element)))) + axes[0], axes[1] = 1, 0 + traj.append(np.transpose(element, axes)) + if type(element) == list: + element.clear() + return traj + + def inference_service(self, batch_s): + print(batch_s) + batch_a = self.actor(batch_s).numpy() + batch_log_p = self.actor.policy_dist.get_param() + return batch_a, batch_log_p + + def collect_data(self, s, a, r, d, log_p): + self.state_buffer.append(s) + self.action_buffer.append(a) + self.reward_buffer.append(r) + self.done_buffer.append(d) + self.logp_buffer.append(log_p) + + def upload_data(self, que): + traj_data = self._get_traj() + que.put(traj_data) + print('\rupdated, queue size: {}, current data shape: {}'.format(que.qsize(), [np.shape(i) for i in traj_data])) + + def run(self, pipe_list, traj_queue, should_stop, should_update, barrier, ): + states, rewards, dones, infos = zip(*[remote.recv() for remote in pipe_list]) + states, rewards, dones, infos = np.stack(states), np.stack(rewards), np.stack(dones), np.stack(infos) + + while not should_stop.is_set(): + if should_update.is_set(): + self.update_model() + barrier.wait() + actions, log_ps = self.inference_service(states) + for (remote, a) in zip(pipe_list, actions): + remote.send(a) + + states, rewards, dones, infos = zip(*[remote.recv() for remote in pipe_list]) + states, rewards, dones, infos = np.stack(states), np.stack(rewards), np.stack(dones), np.stack(infos) + self.collect_data(states, actions, rewards, dones, log_ps) + + print('\rsampling, {}'.format(len(self.state_buffer)), end='') + if len(self.state_buffer) >= self.n_step: + self.upload_data(traj_queue) + + def update_model(self): + params = self.net_param_pipe.recv() + for i, j in zip(self.critic.trainable_weights + self.actor.trainable_weights, params): + i.assign(j) + self.state_buffer.clear() + self.action_buffer.clear() + self.reward_buffer.clear() + self.done_buffer.clear() + self.logp_buffer.clear() + + +if __name__ == '__main__': + import multiprocessing as mp + + from rlzoo.common.env_wrappers import build_env + from dppo_clip_distributed.dppo_sampler import DPPOSampler + import copy, json, pickle + from gym.spaces.box import Box + from gym.spaces.discrete import Discrete + import cloudpickle + + should_stop_event = mp.Event() + should_stop_event.clear() + + # build_sampler + nenv = 3 + + + def build_func(): + return build_env('CartPole-v0', 'classic_control') + + + pipe_list = [] + for _ in range(nenv): + sampler = DPPOSampler(build_func) + remote_a, remote_b = mp.Pipe() + p = mp.Process(target=sampler.run, args=(remote_a, should_stop_event)) + p.daemon = True # todo 守护进程的依赖关系 + p.start() + pipe_list.append(remote_b) + + traj_queue = mp.Queue(maxsize=10000) + grad_queue = mp.Queue(maxsize=10000), mp.Queue(maxsize=10000), + should_update_event = mp.Event() + should_update_event.clear() + barrier = mp.Barrier(1) # sampler + updater + + """ build networks for the algorithm """ + name = 'DPPO_CLIP' + hidden_dim = 64 + num_hidden_layer = 2 + critic = ValueNetwork(Box(0, 1, (4,)), [hidden_dim] * num_hidden_layer, name=name + '_value') + actor = StochasticPolicyNetwork(Box(0, 1, (4,)), Discrete(2), + [hidden_dim] * num_hidden_layer, + trainable=True, + name=name + '_policy') + + actor = copy.deepcopy(actor) + global_nets = critic, actor + + global_nets = cloudpickle.dumps(global_nets) + # p = mp.Process( + # target=DPPOInferServer(global_nets).run, + # args=(traj_queue, should_stop_event, should_update_event, barrier) + # ) + # p.start() + DPPOInferServer(global_nets).run(pipe_list, traj_queue, should_stop_event, should_update_event, barrier) diff --git a/dppo_clip_distributed/dppo_learner.py b/dppo_clip_distributed/dppo_learner.py new file mode 100644 index 0000000..081c303 --- /dev/null +++ b/dppo_clip_distributed/dppo_learner.py @@ -0,0 +1,155 @@ +import queue + +from rlzoo.common.utils import * +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * + +EPS = 1e-8 # epsilon + + +class DPPOLearner(object): + """ + PPO class + """ + + def __init__(self, net_builder, net_param_pipe, epsilon=0.2): + """ + :param net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization + :param optimizers_list: a list of optimizers for all networks and differentiable variables + :param state_dim: dimension of action for the environment + :param action_dim: dimension of state for the environment + :param a_bounds: a list of [min_action, max_action] action bounds for the environment + :param epsilon: clip parameter + """ + networks = net_builder() + assert len(networks) == 2 + self.name = 'DPPO_CLIP' + + self.epsilon = epsilon + + self.critic, self.actor = networks + self.net_param_pipe = net_param_pipe + + assert isinstance(self.critic, ValueNetwork) + assert isinstance(self.actor, StochasticPolicyNetwork) + + def a_train(self, s, a, adv, oldpi_prob): + """ + Update policy network + + :param s: state + :param a: act + :param adv: advantage + :param oldpi_prob: old pi probability of a in s + + :return: + """ + with tf.GradientTape() as tape: + _ = self.actor(s) + pi_prob = tf.exp(self.actor.policy_dist.logp(a)) + ratio = pi_prob / (oldpi_prob + EPS) + + surr = ratio * adv + aloss = -tf.reduce_mean( + tf.minimum(surr, tf.clip_by_value(ratio, 1. - self.epsilon, 1. + self.epsilon) * adv)) + a_gard = tape.gradient(aloss, self.actor.trainable_weights) + return a_gard + + def c_train(self, dc_r, s): + """ + Update actor network + + :param dc_r: cumulative reward + :param s: state + + :return: None + """ + dc_r = np.array(dc_r, dtype=np.float32) + with tf.GradientTape() as tape: + v = self.critic(s) + advantage = dc_r - v + closs = tf.reduce_mean(tf.square(advantage)) + c_grad = tape.gradient(closs, self.critic.trainable_weights) + return c_grad + + def update_model(self): + params = self.net_param_pipe.recv() + for i, j in zip(self.critic.trainable_weights + self.actor.trainable_weights, params): + i.assign(j) + + def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, + batch_length=10, a_update_steps=1, c_update_steps=1): # todo a, c update step + # todo max episode + a_grad_queue, c_grad_queue = grad_queue + + while not should_stop.is_set(): + if should_update.is_set(): + self.update_model() + barrier.wait() + batch_data = batch_s, batch_a, batch_r, batch_d, batch_adv, batch_logp = [], [], [], [], [], [] + for _ in range(batch_length): + b_s, b_a, b_r, b_d, b_adv, b_logp = traj_queue.get() + batch_s.extend(b_s) + batch_a.extend(b_a) + batch_r.extend(b_r) + batch_d.extend(b_d) + batch_adv.extend(b_adv) + batch_logp.extend(b_logp) + pass + for s, a, r, d, adv, logp in zip(*batch_data): + s, a, r, d, adv, logp = np.vstack(s), np.vstack(a), np.vstack(r), np.vstack(d), \ + np.vstack(adv), np.vstack(logp) + s, a, r = np.array(s), np.array(a, np.float32), np.array(r, np.float32), + adv, logp = np.array(adv, np.float32), np.array(logp, np.float32), + + # update actor + for _ in range(a_update_steps): + a_grad_queue.put(self.a_train(s, a, adv, logp)) # todo 这里待优化 + + # update critic + for _ in range(c_update_steps): + c_grad_queue.put(self.c_train(r, s)) # todo 这里待优化 + + +if __name__ == '__main__': + import multiprocessing as mp + import cloudpickle + from rlzoo.common.env_wrappers import build_env + import copy, json, pickle + from gym.spaces.box import Box + from gym.spaces.discrete import Discrete + + traj_queue = mp.Queue(maxsize=10000) + grad_queue = mp.Queue(maxsize=10000), queue.Queue(maxsize=10000), + should_stop_event = mp.Event() + should_stop_event.clear() + should_update_event = mp.Event() + should_update_event.clear() + barrier = mp.Barrier(2) # sampler + updater + + """ build networks for the algorithm """ + name = 'DPPO_CLIP' + hidden_dim = 64 + num_hidden_layer = 2 + critic = ValueNetwork(Box(0, 1, (4,)), [hidden_dim] * num_hidden_layer, name=name + '_value') + actor = StochasticPolicyNetwork(Box(0, 1, (4,)), Discrete(2), + [hidden_dim] * num_hidden_layer, + trainable=True, + name=name + '_policy') + + actor = copy.deepcopy(actor) + global_nets = critic, actor + + with open('queue_data.json', 'rb') as file: + queue_data = pickle.load(file) + for data in queue_data: + traj_queue.put(data) + print(traj_queue.qsize()) + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + dcu = DPPOLearner(global_nets, traj_queue, grad_queue, should_stop_event, should_update_event, barrier) + global_nets = cloudpickle.dumps(global_nets) + p = mp.Process(target=dcu.run, args=()) + p.daemon = True + p.start() diff --git a/dppo_clip_distributed/dppo_sampler.py b/dppo_clip_distributed/dppo_sampler.py new file mode 100644 index 0000000..3cd7076 --- /dev/null +++ b/dppo_clip_distributed/dppo_sampler.py @@ -0,0 +1,38 @@ +import multiprocessing as mp + + +class DPPOSampler: + def __init__(self, create_env_func): + self.env = create_env_func() + + def run(self, pipe, should_stop: mp.Event): + state = self.env.reset() + done, reward, _ = True, 0, {} + while not should_stop.is_set(): + pipe.send((state, reward, done, _)) + action = pipe.recv() + state, reward, done, _ = self.env.step(action) + if done: + state = self.env.reset() + + +if __name__ == '__main__': + from rlzoo.common.env_wrappers import build_env + import multiprocessing as mp + + def build_func(): + return build_env('CartPole-v0', 'classic_control') + + remote_a, remote_b = mp.Pipe() + should_stop = mp.Event() + should_stop.clear() + + spl = DPPOSampler(build_func) + p = mp.Process(target=spl.run, args=(remote_a, should_stop)) + p.daemon = True + p.start() + + while True: + s, r, d, _ = remote_b.recv() + remote_b.send(1) + print(s, r, d, _) diff --git a/dppo_clip_distributed/queue_data.json b/dppo_clip_distributed/queue_data.json new file mode 100644 index 0000000000000000000000000000000000000000..2058e92bfe09b3dbfbeb372356e8056f499e494c GIT binary patch literal 94690 zcmeFZc{r7A+c!*PC@Kj}gcPMgsmykjp;C$zB@!u3SRxS-Nv1MI$vlr4GQ~R2-8@w? zWR_USJS!6Qy6)?CJ@3}@T;KgX-~0aYecN`gKhEFUhvQi1d93~1*Rl5FcbpCkx)eGQ zT?)O3p^4=!GwWT3rshVwZdu;6xOU0h{F3#ui{?g#rY5&7%q9x${@}E`; zgDRafh4C=G0cEKRg-L_LETT%U%AmSemC=R5@=xc>7S?7)%P6ecOX%q6u1HdrsnV&^ zyHJ*EP}oFN8MNsaw6YX-H8r(kzZPEqIH=NDP*xbY9?D-$M^{2LJmjf~L5D9ZTW9KG z;go{$o4OtDuut_uzPH0OlXaL_cClHhRMjDw{gQoM zdn9%{wA@%R(h4GO%ie0L+=G+5TQBNw?!pUiYwdQv3qd7XWv|BPD!8&E-)nWYCCWvK z?G?LLhsi!v?daAQ*d}(2MLH}NoYH-{YgAlNe{?2jV!JESnd?5t?thNOI>(o+?X7}4 zoWEw1%3TnJI-OQw9h&t$Uq3$Wi#OXPwjX;N3j$0_^}p`-K|0O-9bf7FaMjxl$B!}k zqVsNpeI`E(QIvPGmWA%7yjr1-vD{zNhT_0Awu-vnXbd!quo8zS^o6#RFDw z8qGX`o2#d&Hrpm(2c2mvvsWS(D&jX4=}zpwCitUBHxerI-z~M;I0(hHQ_JFAQ&IU= zmr1E@FMd0q)Sk602^vRSc9h?ofSDym$WZbbOGpKX>3YT=&-Z3uYSv zvEx+^58okM|RwQu(|Z@XJnK;mh}mVfnArv9HhifbpKW zzWUF6ta9GGN#&H_Pg*Y?sF5&{IZ^?K zQg)Wt?wW?Cr#$xR=c8~9eRC$y8*k7&5$?a5?h(FIA>*%I_J*mCtKBAcL}Biw+e5Vd zZy_Y6u=D!~XKWRdXj5pb!BRP!-O0bgQQ?bLls1bG+)+y$-Js`%`d62VU*>lPr6X_8 zI@N@u&hqsqxzb9}|C)*glZY1z$Ln06=ct2SM>pxpMMR?TT`s-knnoPBqW5dx#)sJJ ztKPYSpg~pPcgc22EZ!_(FW$VBh7Nw7t0n0!!sE}pggpHa?Al*H7p{g_HcQWYF6r3ADErA$gobllz1swOqCw<}=5aI0AxL=>YbR@( ziS$lIQ>HiiaId&i$K=o}V0lV=UQag(Roge~T4}w(te$%kwc<3K(Z3-o<(>&sxkb%C z4iAHhiA5yqK`NRtzmw0j=|c_brMfYxTo8Q9eU>+85=vKO{c7RL#{+EK55fvZaMjoH z-HD3%@YsF!IE^*~lZU?Q)VUX8{6+O^=sbaa-wr)rqEHNUMdwaXM1F#+xTf|4#}W)o z7OKtrG=d(d)rV9#E8zB!S!ArnEJSG4pH*>v%T~ z+Q%FY?sJSmgK%Z};wu$EmoK>c09!0dZl7!Z=39+V>|Qx+N9F(RPh2qQQiru)Wy22(D6x`llEGf`}X}RcMTf;-~XQvTu4@($2o36hT z_aGFWcYIl*DNz9#XE)Bq35KCr1?Q{t=j!ppG~M}wCE;-I{(HsPP0b*oT}0#cj6~;# zmGZjhyK$d!_h9uywO(`(n&$EK|v;yqS*yBAR6LoG>cYc>?Mv2Qx8yF{2 zL4oxj0%h>1RmM1#4Dc=|DVMbqzmY1W~{{Ci_;cOJSTmDd+ zzW~xx-OMc*QXySM+iK)`6TV}1-r6UZjYUENdN=>*Y}Z15T^wsP;@CY1nm6YO3=7LS0;KVP)@V+OWow_ct9R)wS`zj5!G1PJi< z+S1}w34~8!=d}t%gw{u zeJHV?r8)6e7H(k35;{co!hu*X{kM@m;Bt2)U6X@`FH#;FY!!cvSKl)}WMplH`!A=G zHPviEaHVH%;j>0eb$c>y#hQjUowl_KU&{mjGdg+8`Q_0@cd2>Gno`v2SJ=RBmx`~R z1kNovSBVvuiqdl!%us%MV(WgZMx0TIWEyr$!@I}hZ<@8HLT=v%ux4_>OA?u!B@HE5 zXeBipye|!f*0hJ0r+>g>Ar>2*8J?h;%*dz5T8(Hv*?M8Wbvlj-z5l@(9{?K9bkC1i z`{N_c=@Y`8op@rq^NFGJ=_vZ(lHJnRwXpFx`}5eZVJIi+<&$qO^_32qSi)-<`up z?}oa9hvTWj-H$8aaFa;ep5P+%2={O6{n?A@dE(lSdc2@w|JT__{0vO4(cWQ?OHk9K zd0GM%K4081PH)9aP;n|tczv}WmW*Vja7~q=?W&r@j|_cSUM}=ZFD@FUmVdY5I@tl{ z7Ji5NKUd(fA-=#-eLsM z1~X0{4^kC7A!&52{VT~TWc@_<$y##+X-W?DQu-OtqR{z4<-{QHk^c7vHL6j!o9BDT zrart>|Fp=F%z=oSj{B@7G_aVx+cMRE+(pt@XnFBbmXicb$Ol;fHDkqm7=AMZNu^s0!}@e zPUU)Ch8OHK-g&0iqFLtv%S9zWh?eN;x=>36CR*)e>e@CeoMIN|NUFt#x-0TkN8SP# z$GFdgPcB4#&OK9d;0wNZS+!R3N-ciiJ+k>Ca|^VkkY=lvW&@p&Xviy$K~13OHuDJmHn)iMkazU7&tVOw^Di80Hl& zh@~mhkjc5Sk=*tYd@Tne)<_RSE|>w+S#MBF5S(}CZo;+R99El0V}R=SBq2$l8}u*V zy<>UM3AD!7>f|0N!^fA+ZBFSY!@W23?oTZmz{2g%`@_yA^gkn2w9GaY zzD?cLbrS9Y{--a`1ncNxZS=>}91qIy=J45FkJ?{@aMbeC-v*kX{SHG$-K9I2FRK>K zzR4NhGG?4^Y|8{{bXbP)A%wxl;x_KCmbeSw%N~1IhSHv<7ihLwaKE5$&OxjVo}9K3 zR5NhIFY2pZtu33-LqY7qzJ>U|d6*@NJkbZzYC>!37`<^b^}I>P!u%+B$J?bT;Z#uB z$uTiE@EHW8UQBH2^g|JMvC3bOP56)_r}e^agzDw*m}r*PfkPg^oCuVMT>yNm2ct{?IUR0STX6I z2}5De<8f1L71-P-V>wS(3`<%J9*v0gfSE>+&Xuzf=+M;m<|^+s+<#nq<3i z>tEIi=OXVFo9abik(7SXWN-yWD;`S!QdSB>p$sNbCVg<5+8?(1Migq#8CMcFno;B3 z@e(C!Ihb3kec6#Q4*dQG=lxovLG0vRp}~8%px1D|*-j<|;~oib-sEcs{yps}3=8vh znWn@Ucgu2|3~#B9;0i&*-t?^z{ARn2ED|)gRq+L+Hldi5lJ{r9=cxa>X|6|y z28zSq;{v{if?Irz_cvb}rnet7=_q-O*MrkXXYLKcw%!l1rCfes;(3UJwWbfB*UvMY zt+oLDnzrwPx)ZSLotEaA4`vW?$oZMjLVLcjrBkk%L6A{twm0M2AlQ0{@z)dg(S;+G z+LY3SRqg>Qru?xmi{*Q}5Y5q*kHq1CT~?v-P*a>S?OKa+sJayIq*r&^eE zO@0vO;ERIgQWSx*Cj2^u=Qkk@hy{=*d`U=o*GBPz!%qN53YX= zX0swEdruF8fcMSg1u2o3V_`gc=t~p+wBDBK{5BiTzdNuopXdgfx+Ys>Vl2)%T;^Gy zM?)X31}-k%JeZO%k!tuk0+CMz-*F@)W6~u*n{tMJe71BKohEBONLGX}SihPA&DyUm zk@e}Qky%NooEXKkeL8^@?IQSkOh`Lu;yY-K>Q)yuW@FOlF_Vf-{ivWUGZs`K9X({4RK5mAIf=W05B2ux;VqTUCyR zyR|bu)JKDTzy6fflUiUWGsd_$Eb)vk>mG;UCY;kt>7l)kgU_vNxgW3Uf+#L~_6%D$ z+!c0etMg1bmeZ`4$MwDfMq1Pk&MO}w&)FtMSk4=zvm~9*Fg4>I!AjoeTxrm8SaPSm za5uPHq>}|JFPzwa<`j{BLj>G#m zH))5TZN_s&-WE-7NT9ik|<>M>tP>!g5RN#b_Jc0WaoC+b8BYaKnKowXKSIFGqS6FvKIsF8s3=Z>N4{{eM z0*A(qRnKe`@r4=54i;BIrxj9dcCss38fFX+41R4{eAK1kg)m1 z(>*kh-1MpCt9>DyJQ}81#s3*8D=jFi42+i;|D#sMVZ2Z%TdhIi{AZ!;5oL|>pOvv| zk`%5#D`VUm6rP33nDKwEiLEtwd?=NXj_!||*&nZeg@1be$8pjBKQ#aKyZ=+a|7rW* z{TJoG!~goM|3kajZc+2Qyr^Hq-|dU?BL42Ts9(fI`ywvt7x8!fqWrJ$|Fr$T>i_TT zi=VrQi++pp@A$iYQT`qOuHB;Fzp^j-{T=>y+yCyjC@=Q=yS%7h#J{sI>KF09>;Jpo zV*CFoF7~sCzqeo1FXHd^zmtFWUz8W|U$vvGv!Ja1D+g})BL{BOpz!{c1Nn^E{!DCt zb~pzA#y>{j-{bJ_IQVxQ{D1jAplq_B@c)(A1pY{Df*O>~e0Uf6WU2Rr`Nr?Zwpnubda`lLf**_M9zee1Fh9AigJ!A>CQrAJFC<#vZ z7{l^gI`AqZ5tb)jfY}2lV2fJ7lx15X(4Iw8YVgLty$TJi4$g(yH_`c4NI=C10Cc;3S zVJALjzlJgc!N6Q;h!Y&XIC?S!sN806Wq+$15Fzj93fld}I`AZuE zfpIf){xvEcsJwgu5?{FCpu0O0Eajn2%@5r1Z#fv zz-C}O`~?Y79x&8n14d((e97g%wq+aO?RI_CO23L+={|TW*9)hg`XG;9CPrM&#GBhP zF^jz&EtEcC_1iX-Dq_s9jAzVmOJmHZ?6aWk|0`Ax{1K}X8WhQY#;WQP7mAeeUe|y8 znX+#SqHg%Sm+G-Jo=A;xBYs5lkmnf2h*t-~iCeu73D2l))L7FcRIQOPB52r&XkWXA zy7DF`wX>d&_{7FWu>6W7j=l~h`uL`a^xe}$mYWb+TE2zkwHhU2etaPw-Vr443G-9i>rtR+`+&JZg39|@j~X#`Efh)_CbMYtdTk*C^OOq>!jBT8M^$xZg3 z2;X#BQuW7rvfNRe8nB+sT`l8JOdho%ER{H^?)t2GvD!NbW8HN`TUi3Jb~2h+F*#50 zh|CjoKZVIpfg|bd@C8xC_vi5EO9-hlla=cjeKvrfy|gcL(0%Ak+e<+ zQlIG-+0Lm$PU!&YK5~-Eb(mZF!tKX|l+tZNV-Gv&o!w37eSJ#AH(C?SOBAWgr#^^y?X^ntoiKcU{0=%h-~ zaT3jxCBzJ+i@H~%ny8J6C!Do65c`(X6P+)PQNQK1Wb39-Z7kJbB(?=-Lwi5?C_ht>xwv&4I9`q?4s*N`j3k0Ezr z&%(IM)W0Qcn0QH1kL9HC2SrlhLFGFe!d?DJ?KydAoTeTSBj2Vyso!mGN7FF$Gs zCf1c?$DOt06~jqF+^Uc;>L?_xwXY{Edaa1J;wxpfVjKx+FF9GRx*Ec`AcSasAWR;- z#6d<@8Iy7Che+dHex$0#ZBmI}wGvfE3J%&=NbzWBvT%70>@ z3QMu&%~_ZeUR67Y!f0NqNrY0~=H+U{@+oB^jENw=JtYYZH5T$a%X0ErcQ%o8Fq1I9 zb&$y1yoU%IBZyl+(ujQO5#qzOZ3IZ)pz>FyODB9hM~q2tC%$g4B21oU5S-tx6Rh;Y z#QT~Cg4-mW&`x|nSO;z)PM%##+&NCoHLLa^zEk%QUtb6k&(A;4i_?iF4mB$f(Zmu` zFENj}C+R{hbmh+zS))qG^q(cH*>Z?F)>5K0h>_IOT|s7M?I3e>B*`zy^Mv$!CQ@cg zH1X+75+PE#nfSb9BN3vQNEmR$6CqRdWJ)j{X=am7Y#56mwD&QSXS8OC0KI*r+;S0e z>4r;Wt*JT*OQp$(0U>f(3mtV2r;l{$8DV1A!)=7CS|TCQlR|h$?Ix}d3K46S(}|(I zNyP5(rKFS45|V@C4beXkODM3a6Qw#U3B@HP1ep>~7@aa9V)L1agO^qj?NOC7%a>{p zLUZ)QI#ViPzsjFzeYKK|y);Kqq%9~i21dFRS&_x6lH4EpOZpXZwTI=-eoe=Nw-dgl-047fk+4r2`NL7$MIxZ~f)`MEF4m_6 zWMYzpLC1*Q2b{c6ZzJ_B3@0zHwD2w}#XvVT=It$6IJSBqtn$1sw42y)nv^dTurGa| zpDb&{4c(GGh6P#p;>NK)&lR;G6YLb>;2(p0Q8xr1o0a0FvoC%;JDG{q^>MaG*&;E@ zT`_uNDu<Dc2QH0xJy<)4T~$?oBUnHDXuQDe5tqbL_kRX^}7D;dQ%-p8o-v%-r z!FMnaZMD0h39*CutazMbFLE{cU5p$GL9e6lDf?qufSE%>tp9xhzDx9;(p)-@bMxPr z9~=2&ZU1gjEIx3uJPERms_E)#IOq()hY&GFin6NSwZ{d8F6sUM%?nM zpe^>mvhGb6D}kzg?M>rw0$g~ydhpSf_bA(N?z*1Sm4)#heRi_*A=IsW88i4i4(Oh> z>0Bzz0G}ld=49h#JhMSgR7R{4taNjF@861tvvk8tb=Ov)_Kq(M+B)ZeY04|*-Q#8$ zq}YzT--w0kg8c1!6xz{7D@Ja{(P@F`!>O3UPzi=tw>Mb(MF6A2g!>A)61>mL`;w!< z9m0gxK33Rc5B`Cv{f?bsP&Kh*@_c*&tP>e%9^B#qH_y7i5M5c0w+Cw#B~>H9ZYoyS zK%xzF_1r?{%pXFBe9eWio%bNuM34JhRSeL{?GM>tRt(AxhT~p24?(*?&BTbP0AqzM zaW0fN5D^o&6Q@U_+WIb)9bY}bHqzyL%gJW=rr{>yE*%T^ZrYoyStuxrb(nN-_Vb4e z8=r4z^{#-S@5ePuc0_{l%mwkRrYxi{J-XEI*KX)3^N zAMg@#1tT)#`Kn-7SdM&|StMv%&D^UvQvu6vb6)V?7X}^1IZZ{=amXz-xGl6S)(ohKp|3ek5u_f&R* zsgb48ZKp_Zi1rM$J+B2zBr^&nf5igVhBq{hXEo4DQJ&iE5(yRer1m`JsDNk6E1zBd zut4}&S6aun`YJYVtM(3f91V01e$GuTWAgbGNuf*16X5t;Mc=J{b#TW1lupEgeb~kM z_UTm>*y+@8YN|gO;#;2hF(q~_5IcnFG#C00GSbd`#?g!^=RH37txtzzfy)y@0*9fq z?YspuYbY##s4g;mtOCIdTd zA2z{}CZ&t}yQrW)J-um2AMs^z_)~U`NYKlfpb41M;5xnX2aiKkc*8kiEg;&2ULq@^ z1*PJ_g3glZRzx4X6XCn)cRCj)Cue*2D|e%5Vd10ao~bZ#*!=XaZ?nqu)Ne*U(WgW+bt>2@1Av{-Sd~6C50x#hA7@f$NXmBW^b`p`Y*Ek1UZg zC~zF}402nLuge`Vp7;np6>>AF+F4*{@l9mqTq8(-+8Y-rkqu$Ds?M5qr-0A;!Am`d zbD;cJ7~hZ>4d`R)Lclc}94D2+0^=%B!eDHWZs)>+)d9I(E;Wr1ChE%jz%dKxI1fGG ziz7i$ICSP$?;dIxlb?fV!Vfxbh z8^fQTW0~EQo9Xr;tQz#L5Mh6hq|i>C(1iu~pH(S=C(NU<-}#|sA8mnzDsmj9kwiAX!v|LQ#POT$U}sAYeTj>QsSt?#;<1jpJCZ zbCI0yeF?T-=~oI0^5s-(k&75Y6 z!HZi}?o{5ZL+7HSuZ7lxK-d}oXR_)sxJ^`_c2OuE&2AbjMVUa9P}(gLygL|{@8Ub# zYgvV~jg@;IUya8P^lXENIIF>UAx}@9d=5Lp+*aLx9*w*#H@SK0WAOR6YUR6Z$@uDE z#-R`90g%~IHX0gRv~Zt_RM%XK#h-IlSS#0vP92It?E!(HAoant{(J*07l`nw6iCG4 zw(p+{O?xo?yp*s1=P*bW^2Jm2MR59v)7D+m|G{;j4Z{OKT9kl78V4(esx(k z0Q=fo+I0-CkyCVZ&OH1JroAk5DqxR+YmbP>f)6@DijI+~EFudh+y={yb;dBGbbu=- zHWBzXT%jqH_X2&7^mA;_LzmNAq%Ph0irs+@y|dwIu=@DM?34bT&~9H7V69hxU5(4) ztIm((j~NlabLJTUR=h7bI(uQn(|uNBOEHGLeRe82;Tv8mZvAYgm;-4%z3xJpoiI7b z8lGQLhO3TsPVm=HV9-+96>?o3$UptLwth(;i0k>S6s)Mk0=E>4N$nZTS$=E&iJC8@ z&K7f?a5ckrN49pkXHlSd{hj&D?jT4YPU~Kl@&%h82WCZlm|Mt&9OMFR^T=gnT)8)g-8@ z*F^yJo@#;SzV{e(S&y@&ClK^s%l))gMKmh@z2w}x=Xx(2>dR)2*)U~g7?TPdUhXVn}@9Wtu+;5%@&O*^(w(Ly~S3wTw ziaALH+WA9Fq)e~MbOmzR9`dIbi~;q0Q9~taGn{xC5x{!Z2ZC7NFz`AyW12jO3MeFi ze6`V3ZQK_q={tS>z4k*GxB2{jNU#zU@=u@Lo1Y3~v8i;`0s(uS=ZfTO3&h2y%kMwj z4Qj^Xi&1ZHd`k!A8{WHY>v|z(_fl>${1P^Ob5!17Sc#(dE@qr9&4L80WBY{ewu7x5 z@=Ir{8bvgmlkgn|1xpm~ft1(Rid8hmOuCaxEiZO>Z@?lf@u7XeO;{RPe=etF1>~ zDRiNW(U&5P?0i`DFrR;5d;*qj6yV?M8;Tw+%c?(aZo%2TOBGLI5gZYfntwbv3|rY* ztdGq^;?vE!PG|JGFzT9(`)Z+5$c|q1vFO|sPgXY`%AVjf^u>c#HsTH!daP}X*b z^8-$A%iiAH5()J-1O5jlYa#kB=lhnrNKCjYuBW!L4ck=iv@KhhUp^94cyL~-3-+XM zSh`z14v*-(Q22hV8!O+t33ae0gU+$y^G}$Ef$kb{$oIo5EKk)aJNa|~S?rQt&TV@Q z1NWBEbgU;K%tFgFIrcT4sp8bXa;_U69U0z#!srd$Ut#bq_QD8=Yg=3F>d(R;jh-ow z@B!4gXt-}!frRDt|TY|4t3vKR$ zy}yzoLo*0imA7WyawQt>bLZ(1DfL_+0!Xi@)fj=@;?Kra%X5Kw)pbw8XaM`g74JoD z35Vj+=54zUO@P+Py*(?QX2B1)uBzhG;}{hZvzxCz37+IdnH;$A6~tQaW(P5(0qyg! zv4qE0Y`deh+E5`A_Ad37ZIqgTpw*KuO|nr71Of`bmirjKNmM`!t~~ItNUGbkX9UKY zuMrlO zV`S5{-8BNLc?_@1%)Ib2*PYVG>w58#bG+pKqa_d>l5t)~Vgd|yR|-?4LhDg6!V&E~e}Wm@2AHbK3G1^TgxN1qZ){w^CXSq|5|JCg~3^ zd;1zkkB+&=ycxr|hsygb9#_GOmq&E@4voNG+h2s}mN$6e6jR#e?gc{7rjt6WZK~m! z@O#gSm3?rKy6k&CKY{*!&bvIlIxs|Um){+;8rXQDlCkA;Coq4M|0Y+4m>|R^-(Ope ztF+UO+Y8r%vR}XL2E#^pzj=Fp&(nNV;9+`nk?%21?pS?HE_C5O7*y3(T~!Jv`#yLC z2jrvK-eXEHFINFakIYa`V=Z{;v5IqtHo}^&+o|+3h?+emXWuqu<1#KPeZr|aIRB%W zDPcVgOy3{iW0%Uu)8j-ajkOhbyf2<+TlNuN?|-0DV$=tzW?WLkCkn7UNG2<0bst`_ z%Z%$}sE131OTKSO8-epndez6Ii%^+yZ=2w`aV+HvZ{0560GFJcP4C>F09J<4gUU%;~BIikMl?x27u%ZtM?JJFYxVY!JuKqGSK2M+Izk08MJT8;IC|~#=MlP zOfCogk$+5;f#`QfGEC1X%H9j^-VT;2c8-8pT^ruH$Nu>G{)aXT@iL%!)pMPa^9F-{ z)mJaLtMShCmf6HwAN1j?+!(mJ3Eoe1q~+{-43*B`*RPiAM0G|Mc@?=w`1A7gE?s3B zcs=(v8y-Cc;+S-ErR@OHjj(N_m>mMycHZ>XqvVf?iAx@^84i9yr9X8B)zL(z-Z1KMW8L9~T__KaH}B$?SXFi)rAyvR%U z8nFo!qCe!mzdjzKO8rc2Pt#!aMbB#oDzfm_fpP|(j6s|e(B-mCPXU{x@=k!4KwcK4 zPe<$udff->`Gg{_GZx{EuaXDa_@~fqvuZ@&t6X3S5tL_fp#jfj*NV@wr3)J@j1Q3V zW9Y8)T;^>w0m3;PQL;o6gw*GJQEDj1sGHUj6^sjnFE#xVc;FA3C(Wvxm%fDZF$tv` z2Or|pJ$2bN;c>vXr*8YpWF+VFz^7mm#1yiq+MKI8)XIKr*C*jk916H|rJ+aev-+1t)5q$ihxaH=K zM2HScho?$2z&C02i;*WBi?=BK$`|Rw1FGE8p64?mOv~n%b=$%Qpy$@CLbY+|mXn&$ zJlTmn%NyOFW#&O$F!xFi=}`!&5ag-$dWFmzOQU5#sZvQ*^cl}~Ni~2?UJN=@45f}ab4*$+?QU5#s-p`_aQNJiJ;-Y>L7xjzy zyZ(21(QgqK?f8)F!7hm+|9p;b5i`SkDvyO?*~ff@ddiu0@#BD3Vt&=K!5|2q)Vu<#WFe$#)cy zaC2vh+_mXcd8XwA7zw-}O!ai-?={D_fo6=s$4Z#~ocS{mBbg2DWVFx>>J^^3G6G_0M|P6VqL z+qf>I6ne(?UNwLq5$aKRlGV5VGxkcwIxnX<l$LaE=gs&P)haPbB5R-A53`Jr%>tlBvG3-JRodhG>OV~7IMDfBQarg zlH6~`PmTw0l3uOtgfsU!a)fyc`TUX>IWm2ov|IL?^y+XSpI&38(ilRd*GV2AW)npS zZi!9Q7k7DO>ZeTzJ6TzxZ512!*SVY=?q8>g`h&ZPuKmMAltV62Cbpkg%g9SupEjW0 zEqapIk2i^(A< z9wej=(+IJ#Vq$H_R`TE-PO`V7gqU5DMCe@KPV|fr)GP@OqHtqAHK>G}Fp|uo_9VO{ z;@BP$oTV!X4$EGuH@6nifHl;g9_Pr|Avsc_^$EEy){Hz!oFiAi0MeV_BDp`Xkt5~0 zq^;#)GCt9pOi8#y>IlCkBTW3rN1yyiEtz}d`t@dHMzkt<@udWHU+xa7`r`*g)a&hp zin1m3L4yEwgkOfRsxc*2eX^y}2~_2EHu@2lqEraY1$I%*h4)0%fncIUN}W)rUrXwL zsv-I~^vJj0H0lO)&l6_RtsCQ?GppR~w5O{P6ECp-BC$>hdh(!Nuh%m^*X z`&i|_<^$2JZUp&_lUZ=KE&(#*^InC=44i`@np*MC>pY#{y z*{r@yoM$^p6f7`#ZY(g7EH_*wX8Dc~EeGp}i}D3TX09n=G7N;>^++n$@aeql*AECO zE=7Wlt&3n@olkfZ0;JwYc5>~3LE=koE@73mmGmfGPTotIA+kraiE1x4YR>!TGRHba ziLZr{#E5%h2hsl;YH_}togdBH>AybZf zkW-}($X3)R-}W3Jqw=ppu? ziN>>Oe0@nX!7JkHHavN!sYohm2Er;Ihtu-#h=~t%+ z%i)>4{q_OUYLyR&jHRl?U3Ny&&9jo298)4a+qp?t;zZh0RLDH;2=W?_6-gF6ATLuC z$+tR($t%$uh@=`(=_gCLtRjfo&p8CEItv-r%S_t1?jhrw_mGB1&XeAP=gFb^-K2?(2J8nt0R;lane`vV^?wE$ zYraR`!cU%9b<=IPBGx0WdC>F3Ck~mj(yW&V+@7x zVzX-ppDqnJ-K^%mIw1%=d2pSSY=jj3{X)#PMKJT(X4f8;dZ2B!5G_f`hn%>(X33?U zz!h7eXmq+5NbA|mf@=#a1n=#Z*KH_-mbJT*vc?;rZGe8b#G@FhS0(EwE3|@r?#|C8 zOhr(6u1N-O7_N@V-MLAzvre#LE+)w>z36&eNmHXIxceh`ZuZ*1X7 za5U)Mw>|j$p(#qGy)-t;D?$r}-2xM4w3joNOOFnhT>}ysF?m?@sBb zPrQ&>#P*8y$B%f7TcG@ItUKE5WBAEh(F(`RN1P@l18}{Y%J7NtEb>60rB>X>W-h`IC9;RJuA2#>F!T%JZeafnsMiuIZ8H$x)sU%NgjovS`)%0W86rT$*tXFT-aSE2f#>~hvr&-MIm+3=S_I*7GUEFVM&RoT$; z1|F__5foz+i+8fabDlhm#;_w=`9|9#ah*fDwj4_g2oajL^E->cRA|sT;PV zXsRB=ztT;xddEW3Dw^Y5YYcw(D70j`9*@N`rdnG~QjuZJtFRTyo? zXsWV00q-)(5?lAYh1>TeE3U;aFd6nQ+1;@z2HCEBWjE}J#p99BV;{KXV1mvvQcyAq z9t7F$jD1uDKWbkOrgX$%^0{tN?uus2IK4)MxjY)q+c>%m9cuue!N6N>bjj#-h1c>D z^$XHH>MRsI7Z1N!okvEmb%M>+Hu|*93-MVMQTF!q7%tnHJDW843ih#Z&H7$#0K?E{ zJyN>aXtr)>M?2HPqk2Yi`BN0qAc5^}jN+1FDAVbt=|^&veUn75q&Ti9PC|lAs-b-w6~RdccZWH)Vj~# zNGccuxJ6bDJ`X9d9<#oNQH7&}pejp(r7?FdZ6qZ2!_X<$|V?fm^( zZ_v{;$WU23fIU;@Th_0B1{zoV1|x@?K>uZn^v7{^_}Hr1vgb($c5E8plV%HsEw=pM z1u|)HFU@LG{DdRsx{DlO>hH%e{hALEj#1Di5*qPhst;(V3JzMW2*h0COsjaxIF4KC zkIvmsf{LH%KfdSCU@OB8`zY-=oRg)mUwU`|#{ve+_2knaspl7i^;{n)=uRA>*H6Vh zz1ao^wh83k{7S7#E)#N$W=oV0jzYY~moDS2S$NrEv{Yc(G)Apa4_G3f3pXXwUKAEh zfFg5e>j!571BEjbFAC1##-8GnFE`}_R2lH^J2MJ;V=+8O4;SG%$7R&@D`zm~-MgnJ z!VBTzqwC98i}b^TFRn|-jb*rES=jjA-BTzY<+;x>su+%ra{AQ%qQOrCzI*qzD$$W! zSpL547>bDSZggOL03~dS8@5TrB3*sKD6?fK9Lg)1wL2RIEQ)m()@s~`FCRXYxYSqS z>5ZeOx336;)|A1X;|BGhd_XAu?hYfk$K|D;Gt!DD+6EKZZ6YC^!`#eVuNzb}Vq#Oa zTO(&eWXGGYRoHZ4)sEz!@j!9+sVb~!fPK|{YQA=lkS*|7rNHSpG_o^Yx+W?a{OZ=Q z#F}J)u~ym)Y5Evlh}7_PA|LS35Nq!V%Y{ep?O*l0av@$aIcDA2_IcyGo{=9tylpr_ z4OHipP6Ll6W3()jFR)xB`;xwRAWlua8Vol0fNo=+U;3&uAY}Y)i>zxaux^|?Iv^Q} zZ2B1UO(_ADuFoEAX3T~!H-0)7GL^u(p(f7Rg-4}bF-exan)U%VoS#LJwroJh61)cZdz2=>u&c53rv|=IO4ZjLjeBOp*v(1M$D3TCqdw)4&{UD4+b|-(jkbuTp zPwD@7){QxRMN{k-3gA?B%Jm1rlc1bYa6H_4;Za;sUj6AG7Ovl6$}RY2@dC4oZB))^ z1a22}2BnmxW7N8`Qmr6;f z2uXSFKcV>K#g%8<-hr`=LvVo#eWnCXfnUOuuu)qWZo#neJ0v=wA6a zZ2KxV>=`+6kncbj-1sI?u*WnB^-h}>*@+CGh)^NNVm@uMl_Vap?J3acq={&S**n$ z>kB%0N+T5{QaZHAg~Lcwcy8EQW7a$LqSgw0|Wo@)f@ zrP#h{KaxLPjCxK5mHvD3q4Gcg_xeR2VON8hUod+LCase%RvvW)EAb0OYxD}he*0Ix{_V;8=_tBFJUsMhqN<)fb69v%o@XgK#hh30r;=t+Ek%Sx0{@TaU&;+Xm zNuC`81;8AAc;14W)yPXcJyjGOj}KhVyB%BE1^&Bsim}QTzz;h=YvIB+yrNMnZf+BW z625OzW}p|2Yd8erY#t2CpP3X2{DKBu>W}7i2BM1Tyk!pzhG3V8`2zM0^wKoCcQXb| zhj7P&gSx9<+MwUQS1U6r=*pBI#(M>XbHQYn(TNc98*&%A7%+R=1DpGaklgNZP<7bQ z7yls*JY!Xd-+%al)QO~=Q?cPN+vy?jeBLBvrB?;7xEBje?jGzco2PN_gmGs0*%V0E zy|+P>XBrlZ_MP9<=M0xlyw35LWul3)+w?9!n+xiTUlylI&O*MZt$ZWljTf?pmYcdV z({@CixjZkHe!o7vCqQ@iJZas_t3yw7reJ5x>%-j>^JxzPJgJWbNQ0(x)Phz6+Hp!6kRazdD#KpuM%+K#ZVbH?jM{{I?kXpwOV@6=3I>8JDXY+Zwip+G zH0b|Q6@#73?Z!p4Y6!S>Mmn9Yyj5=YXN#!ufGxqDO{rU=@PRS6zA;82$7Ube%|(7t zpPar&ytEwh)&0Vb`$eJJsffN6UiCO;<`s0wEC{Nd_>Szo*$itM9fUm&#Gy&|y=TfB zKVxvPV)fhaVUVUUzd7Hm3k3XJ=4p`As8YEiv$n({C<23rt8*P1ew*AF; zWqo<(uCh^FesB1b{}2_9%RV}?Qltf1)cu|$9C(9=&&#*pST~5ZyZ79GcIr9a=TdKQ zJ{SR_>BFXR&M{EtU(59Fcob}?saRa+{|vXkz9Jl$SAZ-`Z`ThjiiJ;2FLX+R>fvq3 zkJYk6_So^`_UkX#>am@-U&}u-9x5U09qjrH7W1xF7=^i`NW-pEBmO&E=_|iU%rI#KWoNcC7hRB z`K(cA_`CXw8xa*UsAW?^+kZ?CHA2&XPcpW zUGd3YNP(E$$rpRmYVmwt>6EgJ6{J7j;e9r}3pPs|hSqx}f$#1kpZ835BGXPY&Yh$O zTxT=!Z2L+d$yDBYX(N4;Hn&I2Ow3oD=pag+1;gOF`$wyfQC*-sKc|WJNGN>ztnA-b z(t$D`e_9rp#6X|*Pfw1{URcAyM&so30TZLPcv?vx&YCIhb^MqFA|JI5js|zZV7kYH zQf3Qy-79JpC)9!T{i*X_GN(bf1HbIUkG;U;_%0tnXJci;d-~;E~buN&^*QUC9W+HA9z*<97*WI zYcayedK+PH(<0?1k@U|;xSWX1Bt0K5iz>4Y>OdoX6Qiqt9_3&A{iy4@iqVZfYe_d5 z$|c5H)LdjNSTXQ#@wxtg)&ASOe;d!>Gw^S`f97Y5XCQ+=*Jk+thI9FUvl;%m$e4Go zpJ6lN44mtqYyXYM@Xy7+jc4!}aR$!y|Eo5GHy0W6GH`BuuFc>vkP)A2&-MSaIDtTxxByG3_k<^Hs4%7gU>)le6G##GmsHy;9Nh$o{RsC|F`&D|KH|i z@EACkXJlYvbn8!IyZt+{-BB?z{FB&>{;gd4ed_+L*!jl=^zZZV@AL5g#^=H4u8EQH zpTuVJJF(qUF}nXJv6;?ME-@10Ki=Q}`jZivi~p7LKVHvcu>Zfve>46EcaCEolhFec zBeOrp{_yv)o2wW-`g81$8Oo($Wj*Lyql-s}oq&z~I9zG>M!gPem|VgO(E+7sw(vHL zef>pweeoAcwJAYCVgf~J%o>=t3W4p^7RvFJH{o;DJg5saq%Ykofa}F5%7NQ1q)^y4 z7*?N05qDw3OM26khvxZ&>Z~g6>k6SzMqiWbI$UvU`k2&P`}OFRN5kNdG$L@vHB6yz zGTMDNn%r0vh*9*)y=QaQAZ_?;u+2CF<2Nu2;E7GF?0$`6Z(?J#l20R&D>Q?xfuQ^JBR zVOwViC0p5IF%8cjEWk=27mLcw@e;FXlVHAbb~xTk&3FcvKN^6y;WgkZx&>KL$Ki9h0^IOd1fTVSFkQY44EG4ZNB|RD z4gW!5dBg(h9*L9>e4!MN!8nS3}LVe=B<5hxLEQwI%kS9X!Y@nW%+L)VHe2kDB-%5ynWToD!U7p)V z?js8hnB}zTQi133} z#BAGYGT6S4kZIH)TR-!YZ7S=?%m=-M_91QZtIQ5khSimf_EaZ-b)=ANoi=2k={wS= zK7b4f875WNrIXTr0@MTa0%HlbH;GqYRuNhUW{6I~B%;zX^O zF|k!7nyOf3ksI0+NW}e8CTJ{cNL8M3lq^BA^eiQvh98rc+oZ_Q z;2@H{Y|%jxxQO92=|Mp36JSw;(lB{5g4ma3>zsC#gVDh zZw-|>ep%+kQUO(h?P?Qsb?nAmp(b0x=8Y<0Fw#r35T%5p{Z{hD>g8ln=qPddegUyv zB8mvBIY{)6Gm-mv3W%+yl2n;~mz3$fC{li?5F$bDD3T?;A1@+fOmvCy*nDbX@-{-Kn@uu$;0{qH%R}7L zdPA(T4J8aySCab;=aZ3vC&=TUM9GZ3R-}r_Mbh6mfZRk{lSh3r$=&z8$y~M?a#O-< zk|^#a4RmQ_LNgQ2^L$T^U0FtNJk?EZHe8L8suSdX{iS$wdnfrhO^_;)te4|`>lvZr zrAV|NKR|Wb&XKcu$$sL^z9WRmwKLR%ty^+WG6fJP(=QVOn^uzFmo^ca>(xp5_)R3c z(OU9gS`&dQv`Miyn@RStE#&L=79uzNI{BtyD>?gqH%V%C5uE2tNt)OWQkzAZ{Gl;I zXjq=6+HEh$iTx%{-0E9Sd<)Me9$yP5$g)F3M$l4Xgz8OQZpC&;G(nwk7!V=SwL6I` zwlrd&qByaehl_~!^`r7=3FVlDZ6r3#AEutTmP1_o6iFyaZXk)9^T~Ezb+TJUjLa3` zAPcVjAOfGCA|um;Ncl|*NmZhUxV~MUWV7TWw=ZEL*^ho8+SeW>uQYBZbz6QCZ54Hd z(K8bxo8QW%=YNz-w!ib+3l$?fMt-9!m+TqJrIYnKieoLQ@W|zMjl@bCqD zat8!gY1l0YFGTzMEK;!7(Y>liAUH^UQ-yyoL+wZ@TgEwHSG*gqt~ABZr`D5oJB81 zcqm%$c54`Nm#{2}7z+j7g?r{@$`qj`=F&I^yuo3_v%U$dLC1|pO5$(`7_MQ-T&@uS z`FrIpgcSn8;?P+0f;=zehf|f>4k1w1`NO1HqX_p#OvN}?1;Fzhv(&V@8nBm5JJ%f_ z1S|B4S6|!HfKBIm!cXMWn{3FuD0Oh{gz#8VIpR#%oCRJhUwS@yx&O*DIu)xE`{7*&Ma zQXFZ|&66NwllhY~Jav$QeXPHi4hfbVYD2z*a4sV4hy;%nrkGL`tt}hyoS}ZjEsY#_^g&la znWr0`==#6Oba%$$);BXQtevR6Yq^mAX97N-AGF(cXc#!(^ILx36ic7j z9|{M@xq8-K3Bja83S;_fp9A63E;Hj@1V*kZ0m#(|OV#aeSIdUrjJM!5#d&4$Y$Rvt z%U8v4`jXP<;^p}uq_z3$y(@v(`0$6r(8(r<`BlAcVSh0^DOF%LRLjJa<~0U33Z58g z(#^KBq7!ySWf;gL6~mf~35&(UYLR=hQNV7iXZUlZP2kLQBkat-XfW7X1ky_f_v{&S z2Q4O5rmdV;kn(%T*Fx~ODJl|IYDyq#pXSK@lfVS?E*bCO+@=9o6K}8Q}94$`} z(<*{}7C$%sifcvn^?Z}!4nDA1X*1=-_#l+c%M|rJPzXb3H*=rs_>3BZ#?GP%k~kycbk zLd<~aqNUJ|^{M>U8teqfP77se-|PW0v`6ubBo!PE`v|%HD#ioGvXcttG`P7;_Nu2J zy@|s5M=W2Ib3w-?b>-Ri7A)R(wvu-+A6AKpQ}*kRLQ=`mfti)r5Gii`^O{gM>O4xM z6txvW?L#K63s1-4D5`B4v`YsQ5t%pnPX|$d(_L*P8%VRC<@>?gk6mj=Elxgv z4@-*8*;!mBU|7NJ*X_7RIK+3v)i-4njpaX>8a%E83trn9pQI`1h}_Gau{H>oYCb)r zbK?hnkMa6N)@cpU_l{rgc+Cu0-B@RJ#4Z8FpS$bafoY83d6g6;)C@Vh z-?2VHupA1HN`^h{8(wr$LK55zt|dT zGy=h6cz1`-{C0G!Uq7m|FBYOGQ%peglQ4HFtK+(on64Uv~J{6e>lm zOv9EENIMhQ@k;PJ*mox!%c0+=Vnd$PdxSI5)~-z}h`(A6dGB`F_bW|;>jTL2B zlsh~ysKG)@kZMwtZwkg+ucuFH#JZt2u~qiHjy3#TY9n9M6AX7()k+614Z)q|=hxgb zF2^-#-NswUXMp6@^&461Aa#3+`#Y&HtfpEmEc@n$E^$6mC)c^aff&1H8Ln{fjMlg5 zHVi`xmW6wjjEiCG)AMp|U+5);UrDJ16;$E$kVe|Zh!C83xwN~-*c}g;{8$x>*Npulr;ahRhtcz8Je8m4yqw??t|CmQPY$33#js z@3%{hgLAa5WHvK_?ZLO2&$@IWttIl|3E_PBnZ8YoOK1cRap+dA=cJ+g#^wk4rOmkL ztQ^g9un>+q`CbWn+6UB9gN2v3<)LVaZbskNw`lv>=Da9d3Ft}g)EDmU1buF0?t58z z=;C10(~K>I`)o&VK0Fiw4}}JfPHd|}50;IhyFR(V z#KQf*KHG-iG^;wjN^&SH&$(t0_AU|lqR5RgA};W1gJz1nK?UC0DfY%yI}En^XE<)W z(*Uw%FKmB(e*w?ZXHV|s3PqtqV~)2sN5SU18mwf0I_T_Nqg4IO7EGNzx~sJcVZZ8L z))A&?*cRJVeKNEZ&zadQc4Bpc4WkWbD}w8xAXH}X4qZu`6*W^1+#hi|Ug)Bkj3=0= zx|zQdF9h2y+p_fz(3?cA`+5G&EWLJ^^bvv0Pkcb1H{EU9t9s!4K{*u19Rx{5QDsW@ zt!T39{oqQCU>K8~I@G=_S9Xte$-j0o~wEqc}1i;P1Q(D))7Jw#g=M7d<`uAsc{x6B= z2wcJ;y4FA;51wBY-{ecLN%~RyqPciLJmy@Ori((q;-*iLM>l8|z%9OAD`t|rfcE9# zl|Io_{JCg&GC!dc<6{=eJs2tk-(~z7uO^$JHhs~_9U>W6+H3J)k9aMf6yCEpnWY$_ z+nucR3%g*;m5`>vicFmHm-#WS_z6Yb^!7gYE`ir=j~B9SZGlyr)Gyv;%f%YDxILTd z8qlN3g!x)RDMaV3lxc0I|NhRE=k3O+s8}bb!mH4Y-nACL7R#1FhIB-p-L^j9Io?>k z>j({poCCVayU#@`ATA4-fwk)=wP;w+2l_x+k>?idWUiDJ8UdDeooxNCvnqi9?l z9kcJXj2p}f8$YaE5)Ot=&AGz#W`IHtU!+=(N2ArIZ^BHc==b+$o0X5>4Tnd&ZXVq5 zwh{~3f4y{HABnV==Xsshxk9ADPU@?v5K#QGUn85|oNt*&=SsoZH9L}dM z_@)yc3c4Vg_^7xXU*(nz(`aFs#N5iszN8sw`H4FnZwAAO3Gt@4AKOszOGfbC(u`Nr+^!(&`hm5DLRY?N9kVpn4@a#OIfvFSJ;-@|q32HhK)6w* z{mjdfu6*0$!kwe_8e3hu)n`@dHHCsM-E-260w#ayo`|9$kXoDECYGCuTe#ffI_1CP z5S}rKDM*5$6{T^03%-Gkx%aBV)*MW7k?z`iZVKBbigBnZ1NvKk>2M2thndLvxTZ1> z8!87M+&j!fE4<5lM`eTn8M~kQql%MY5yEw^U|tEn*67h*(8o+OG%Z>!I2a7W6Fnia zromvb&A$EcFKH;SuirG+qmR<=tM0&zdKv3)QVa z<1!rm5MKvd%Z~^&KJ|xM&ba8}Gr{n}W%K3p8=axONBND*;ushVJ-&jKKLA#RpPms} zQ~}ICo^DCp=mP9=$?`_dm1v!G>MTolFobV9IWVy*7#^x94!$&g0c-X>D-DrNfYD#Y z4|M7I@~5ZhI;Y7(bXFvEpS9V8;X)&3wY+)=(DMBewK52xl^l$luE#GoHfVm#bcgSP z^IE3cK7rd=@witxUeIKJb3Zd{5sDo-7sb3J1p3OH*mvA+0G_=y8RMGt&y{JsW*)h=5m*30bWzLAatXum?0bV4AMsG!>`m|3o#+Jastat_(3Rt1PkEI+ zHiToIM*!D-y+Uy9E@L@8^9ovvb_74VQjIcfci((?7>eP2XXC%!3dMJwS|=JUZ9qCy zio_J8WCF+o9DlkOTSOmzZEh~6{EyfFgZDoi z|9!}RhX1@W3NSGW{B!Ig;t@&uz%!^+xj1Y6xvp2bl!~+DR~h);$GKE$GFj-T9Pl_xHe(Q2HE# zR+LcV)ljA(3+2&;Qq8*Kl#8NbP{kEYES*QCuzp_$C0q|kl`l6amCm!2Qk@>swlGL) z^(`vp=UZVcatS5gU$>;Z2r@z~eOt1lvP&vTBmlP-jgX&>wTbduMVK%khSQ&}lP{-6 zP)7JJ&X|56pYYU^f`9F^No#|7;4MfOZ zwq9a^{zLK_H$O@6N|U{cQ^X?KXQU4I9&%{!8tN?HWR97)5D`z=Nq9_t$&Iohb3Ur< zAs%<_BrK_k#5?z7B6PVZv2PRA>M_%NPWP`8+*>d-q} zX%Pf(P8^}l;=$tRBd;w8)bR*)T!vWXxJBTlp~Cl4Aile|Gk zNaqE6$+UCEWQLk9x%S5%(zKC}%%WZ)XLHraZR;G#`#lyUasLe2u~CssX0s(z_3o3Q zCnCrV^3LQ(r+m_za&}D)5uq3Fu7SZf_xfbOKx57Kpt3ok&HZlH`gWK zIXAIzC-E)kB#~BJl^c6GB*#bkDk0~!od{q1omg-;gSgWyPncS*B(5*%B_0MR5dGGB z$!FIWlZs34k({mvNN+PA(m3ZT*>7w}cG3Sve#zbBR?b;sap_&MzhMuVnCMF;uh1cj z+H%O_$#!IDMH6Wu8A2ML`9YrAkVP)tw}v{r&RXJ9$N|Fhl?V}Yi9$WE-!HKs%!bI? zewL8n+)n+=`0C=(I$Hc*>)8A-HIju9_d_Yr#pbBKVjG{WWmQDW?&5b-M~ zhhSTqK==hLCLcsFlNnZGq)7Wta#!dQa=!Z{A?dZB>|V)FW;(Hx&o7J;9>3~{L#%GZ zhvpNN@@<6Y#|T2_>3Sl$vX@%S{hs*N8c4`^ z%_p4BzM~#7iKKo$Vnhw+u_bb!n-aqdW2rga##El{9%|fRE;Wc=TSx*olGj!-k<((; z#Hz=RgtOskqC=IHywFljSlNdYnW+~D))sD(-Sj0Kia zO!1J$-Qwi6{2F4d`$Dpy;~Y_U*^laT?TSRlT0x?5^-^N$eH4|CWDXuM@W0u^`!No31Td=oG?nKkTxl+NlC7!3Qoh>3u_%l>8h>-Wt3|nzE7Pk0@6%NUW8ViH#w}StrThDI|qvTS@m`Q^Sa`6l)XnJ=wI z^1pgcj(R;MA4dC;Y@{nGm@Q0oe$*{7a>a&7S5zj{1a?xZ3O60h+ayIqvq%z)AD^J| z-zt`1TP96tIqfHmVoZ!;e=C>b{wSAT|ITmmDn<#6{6<$UB{Gyt?lI5%#G4FJyIZD) zk1qIBD*xtc{^}(<_YY24E)76tYV75E1$XfU*U`a?BjtFUrnB=)mIL0%ktFQGtKoC4 zs8ywk6;%SY= zWtl9cyXoDmR*&kL?{UC${A-ne-fG5XG%wT^1%-T2QRWM)wUSKGTAByae?CU-5g{j$W<{)Sb2Si^C%7sUuygW($ z^w1Bp&27dn^^~JPnULSYJ2r4Upypt=bT!Pz$EwbZ1fj96TmK{DW;|+8e1WO{73kkk zs+{%i07r9ST%obdk32D29y@v zPr_26o{7d(AN(<>uPgp70i>mzzrTr#2vWbZ8Lw_cVA$3GqB z>S(UU^ZdT%PInVgZCQ9m$^$?AP<8qNulj3!)s7#f0poWNd^6D&rEUmyaZHXmx&J8-T;T+Nphz?iv=OEOOG9wv}3HVmsj}7 zSR5HYb+1{n2q>qI$@^Z3fQfx?V=CfG(Q9*8U9xX9swE@~Y9~+rb z2A*UBcm#N1|myDt!)`Y(U}qIGH{$wES6B?FZNHU;r{Y|kb%loUDbp%k zRg92XN#97me+sTWcyKMxhrrE?Fgls94>t|o{8%AX2p6qBWJZbnfHmVUoGyK$f2HKo zV7_@^7*n^{9Etl@0^YLcQ(V2K>DBn9c4@rTglMnA?c3wS;elD>_hbH6Xb{H|JG%85 zS{kTnOta|0tS{F^Ir`rAlsBC>^jWO2Zja1Y;g|1mXI4j(U)5u<(&mVZachA^{A_-P zHm`8Uht{(^hq~}uh?d=qoHK06pn1+)LVps!%!l>a3V(ci!f#T5){jZzpJ<}%0-!nj z2TBr@E=;VJiBs_f3&V|AgD77F>~h9}nu1ZZcJyKS8LDni)%Z z2C8>FT>rgt5Vv3WBD;b&1vI5b#W|Z>VA<*;*9WrbHGl5(9O}~U#+w7XwwP(Af#&=x zZx#1_f()Kzkq#&6-9mae4w_vVM#D(%dz#*vFtmU3kg!GH@5ayLU9$N&rJUxN7dDPc z^G+7l*5v|wLhwa(y(2Ns7eIV|sbwq`& z3}x;gY>eCY1Haqn9TIB}hq3wUTOTOALYl}^uQAiZc%x9bdCxa*oIlyJfj=c29*1jv zSu~<32j_kQ=kTFxHm8b+lPYoEvDgGsaNR96ejTEjS(<~nOzGq_Cd#S z&*IQ4p}?9UI6rEr3{CDoyXkCk2U0mEUq6zlfo~j7tEVJ_!QV&C&1bv`Th+HIHg{XW zwnn})HS;^c(=}nGHorG~U_ZF`xI`EFYMc=bnQ{l^#`v^d55K}uuN?;uq}alC+G3$g z(@hwo!(mC2r+2^Gxg(_QbSETfj@nns7(goDqE98#U3g#~=cpB53~2Wc1=HC3fij+Q zWUIR+hRXR(2rO?#t6Bq=S&n3AwTO>;FWLnSi(jwXuN4eOKVjWL({R+eVEfbWcra9} zJUwQ_R|>R4V=r3!g5c6Im82QIO58~;{JQC2DBKN+Gk<-j4W2km-Je?R2hK~={8o&I zW2drS7)?3~&VATixw)+jwy)q4xO~_bUM)GQa4fO_de;^1UU@YJhL!!^_1#Lw9p!l! z59$ZNGYvn7OOX-C_2m3^KYEu;(fk3EHz}16sMvhp_Ju!qXUQI{dO-jE%KO)!5{QGL zD6ga<>kjZ;H=(2-?FAOAnBI(CiNtp9LK-Y5Xos0wYnm?Y3z8eg3f#htHSWkS0%-vR6r)Et-_sVIR+}-q= zIqj^Pw%sH=5vw{R$1x1&)}BsW&3_HM+P+@N4y4zZTD|?@iUs)~J~_gyyrc)_>p5qy z++l$)>e@ulq{iXEz{$7QM+#tZiNw=ck#>-I=g6UZ$qKU~YqA&T=0VXZUkw(iB500i z?(1G$1I1Uq6u;Fo!`JyEo0d+*qe6}M%-yd=@Y0d_nYKQ?28-p1_%nN+ATiAQ)o@E5 zJU)L~FaJRa#Og|`@2qM7{|{%FBP5K`cRGIOssl-QCthK~*`O5s_GE9^Gtdprsb5A; z-F$$p@-@-42Q)Ch^nU(2nKCe+{rX^@N*nN=diwGFqi|$$x2d*veStT$R_N%`d@z3K zrMyFtJ4!yinN#i;j>auU&o@3Q0ik8|7U+V$*p8>&zS!2Hl!&n3`&3BVPs720Z7I-$qzVYXUsBvLp%s_wW9;5FxwTjhf; zn47sM`d&m2NH%8^~!6U-uJqqi}8s9!WPGay*rqhx|zfgL9oUCgGJ^yg}JgfUP2)*_r z=@C5zXspwvH<><(tO5td^+FQBfo0=?v1)p^nn#V73Kov#*a zZ+m4B^jao*y9Exfqt`G>n~ew*XtGD|YGzZ}llqXQDR;E;N&$pUaDFjWkHklhFY^i9 zqIc7Z&w5wuWC|i)Gs?~ME{5ulP0sKJ#o&W??@vE03`YfF-|GR3O`yb&(stgd0KQ+9 zPQK3?i`P#%x;;NtkJ6y8c23R$9tj_9kv&xp$lF?~b%2dbncAEvd%rc~it@OR ziYEMRdApAG;Ehvuf;w8Wjv36NY*lG`S*b{a? zkjDl#>V9H2mkL74tVj8x++Z+rIMD0b;f_}wj0?DZN-+3=L&*-;K-~32kT4ak1yj9d zFTQAR>~ns14rCfJGJ|)y|9WqfP#Qk&w1ckn9QbUTu{Rj4B+4)EwJ1R?$p^AJ$+o!6 zF|)jG<9itYAZ54tlGN?Tff;ZjFt{i+& zLjOLqS$=OHz3_2iZ?tJsd4viTMjke;#Ne;0b@B_}j5oP_1UM zUAvu%YYsnYs;74WT~I>UZCXhKCAkHkKHqu+2Q;>e+GQ)CyA=eg z@Ys6i+efz*!lpMYf>uFYK+Vov)^A6{D_W!9_=H=r>Y8&WUq%s}Vwx`Ry-iob2aXBa z#^zyshItrMZWXpe|C^ii`|{=$#cm>f-H^@lW_VgLA7e!X@~X^R@yDeP>X|>^fR;=9 z&hqJDh?LB-xU)7GuF=eE%QiaVSi={C%={>{c+{QYYVU-T${Q8W_5{J(cR`dG$r50X z9~Q0Zjlz|}S2HbqYH`oSTM``AAy91=#JOD09<3Hew7Kk!LwSQS0j9V}6kfGPL@t4@ z#N#9E>TJqTvOHWeBqk2kn69fSdsV=#xJ5-fd4fR5w&UI7lqQ^@303)?h{48%TVH<{ zYlrILX_Ec3C%8FXed~R;3`@u}p@->8i=Qr$y|im@K#Wa%SUDDP>zaR82*-qExD;%0r~Q=qFpaRs-|x)E-={_?ys_t=I!_Z+BXV{8_zrCVf62RMoXsW|B8WMy9bAgH?_f- zZ)Uyu(?neTEq(u|m0hSfbBp)Gj|7&bnB6$_2-xC-Rrtx9*93FR&%l()NVR6sC2 zkX?Tzeh}D|1M~`|Nj&;;rN&R+UVK!1drR$_d>A`@AY#UA43_N<;49skhX!LOZfnw& z!>^`e7hlk&|DPN7t5hj&7%EKa-6M1Iao^)HwJzIc)YJ+d-^fu4JHFcwtSRe-DziT8 zGi3$1L?Y!`vuhU)_4oA3Zz_WuE&A8{ZjM6eX*stGl@ZWG;f&SZ84W$V^LU<|jDm@K z(lj+rXW+f)w1Ve&BxLXLe6)YdZq!p|??@*Xjmc zIrg%}?tXYAoQ)SO&A6TdkM2rW-T9UV(nFDI!UDmtzqRn|+b5OquJf1MRBZ~q_U+yy zb(1AHnvfBg;}Z&o8oTv6-6Mb(z6d^OO9sK~c7*jg8r;~jb&mv@g!j~2UzTZk@dDgy-9WTQn;-sZe2Aa3-?n?mKh+NH~NGhMnDYp1y;@Wyti0NIEFdZ}RrE zw_ucHVwC*nQGUwrM_s8ZMrnW6lF}KWC8TrR6hM$4|YMjCU8#3l$ z#2NO#ii~j#oI8%;{~P|RdgZadVL|ey*QkGvW;Vo8KtI#3=JmV$1rS*s@iOa{eT?+GyiwW|3)^-H!&*sbL@q`kG)96sQAyZmoSt|(Y6)vaA6EM_{l(BQ7LHs2!b8B z1qOlMu0`Az6i-f3CTjA)DcTh{<8mm84Y{zQ*A_}rbSPy~+2Cw^A6iVUr0VL@;UHdt ze3^dYjCleS)9W0KaV3*wa*<#$Aqa+1Yw@^o0DN{{2wd40FvY_QwuVzF+3xPRt;!jS zSKOey^rOb7m~lnlXW}tfkxlW_%vWSvT!1d zi0uYBdo}#NH5=0Ivx5ekGoF!10Tu0PirFqIKBFs@mZTU`WQ%*y&oB~Rcl%4-wy>lK z1O$Mm&LXl-*I!Dl*9&GS*(7~^pQti(gm&GH$g?epTrg?__l9&)b#@g7t3QN6W?$6) ztcgxTS}?UNAAMy#Fp~b@kjIMNta}~eEq#8Vw@L*^)d3t?%|hSQCj`EdkKmYd5#?$j zD_r`11p;51QC2K0r99R>4h6R^O4-3(ioU84+$x+Qc7E5BQa;2AgEi@7t2Zlo{arC- ze)oFZxjlo-E;gcA`sg6X(bbqLX(!bd=7Gz4H1Va@B9iJ%-~nC_Jeib8)`<+_qct>C z>f}W8du$-2GKkNaFQ9u-IVGU?Ii+Bc8)_(ArNmtelu9lp@Z8szQk#}m63gTV@cE)o z#CNwSa&>ta`J^wLE*Ij)fe=odU$7h%hSkxa^#Yz;sD`7X&KP9vjBmfZLVI;8zLBRt zmg>#Ih^QVsW!{5t)qBvW)Wqn`p9`?;_XSw4V)XVuF2LW)r3!{}X@A|V+>fCKxqCde z5wFJN2>Gv9b63=lBi`rc5Vg5nMCLJ3BB!=A_w;-7TpGtpVk&nx zu|7D6c;22uyrb9pnK`b+?nLx?x8pYhJq${6kvA!zanq zRkMPUOeNMt?1i(0ekcoR8bv<^*;fgU`SOIBpEGrt{Wpn>K64Zqp`T9^OKZ_R-ZJwyI<#z55oSWKI4b_TD@!r|<0> zZZL#oN@fkxKm#d6b*2kU;pNsV4Mj3U zD_?^InTj!@xe|B1uahQ0zDEf&UCAYJB9EN)V48D#MY6D7qokGg#z zmRR~Gk&(-U;mcylIBV}j243GLP3wvoW*SzEr5B1BV+L-FPhVRZ91LfM(pnFac<~Y z&82u)evzJEStLk3n50RX61n6q#_=#&vc-Ot496NLvT+V8QEd4@s*gsJU1k2Psx|C-OnUq~TCH z;|LW<%I><7f|c^b>|8iQ?8r+}`Ti;KbY-SK*?uA7FU6_mOFY!ZcXO##0iA?rbq%SW z{eWnQa8i7=?d0o|qO9c5x0zSFc98BJ{3Lc(7I9RHC(4g@5rOM`WSu+*!*Y#BmibX7 zvQ2#@$(%mM;Pe>H(hEI9bketxi)?O;kE#n8#iMIU)1r@zLs2;luZHCe;|f#bx4$cw zD*vins`{PZs@08aX7U?dxl}t-x#V=f<*+cXE8bZua(30eGMbIW^ypc>*ARM5!ij(*@k|Vp5XG1!~~6#QmaT0?$a67K46>;aehZTJgrAz z;cNZ_zBTa}lbFJ-$vTMLBVSi*w&%golc`QCN+&^U zE`~Q}8b4-mG07Jz8|^53?Tp(czeLQ_iUiHFc^@6EqfxLTm-!3v!iGc4_XFx2k(256 zp0W9PD6F_+B9{@3rjO0lIZiT^AK02(b+l2J}$p4AB>I$d`IIu-XV{!Qo_Aj zH_Wdz5!^1?3Yk4Tvlixrp`!oJWBUto@scXL*p6?O=#%tAM#8!jg6HG5yL%$A@%FXT zE+5}vhUAMe#UUeH7JTQhaa9X&oz}RvJSQ5Z5}r{+qzxBtn~LfxHUp;X!X~eF_dv+N z+FSGG}n6u4^m3;F+jz>Y~OL;D7ntzwulmy5({8DVFErJ7ak`Tk>V1dSz5GrG)IuO#YSt~YTdnZu=59rywq#_zdn6^| zOD=xUtj+ZChy)Lc^ZUd4H=rIYC;vI*HKtm*{TiQshkNtN;&aBLAz)$VMb~|85LJ6N zU?;uOt82@?%T%Ea#T=KMJM=If?i3#Jx8wf;ZP`ipMT9a?S8M0`7hH8%vC!(x)fKNn zqE~*mu6{c_N&d#?)R&2NlJk6-)Z0)f&eJGuUK)5kIU`f=&Q?r)h&B*?xkSgTW} z4m(YZUq$$3LBW1Ob?^0^AhEXbkp5jnRhx00?8|M~wB5Obs;0nbS$<`PRX^<4p$0ec z=HT~0_K5`!Mhs=1e|q8h{I$jtPHDj)nb-AElTV zQJ{YKrn3Ts!42oy>U&2Dz#--lOU~z4aA#5aAzV%G5vOt*lSabfJ%8gbL9Ti@VJRKN zdMFkS7{0k4Jx~Zeh-X)$ev^3DWKFRIil(wbu(Z`~}SFTl%q-Gi-xsJq4$Q^)L5J z^#gU;K_inj98YiO{Cs)WH&l&0ckggr4oI8|7oM8>45|TaqTePHkon|xmkjFx^wYLF zXql4-<(#J!j})}QoHfnT!Lw5_;&VE?@4GH+9_tr05iJ1owQBla?A?%=FxAr+n}Lf$ zjV%;y1~KZ{&NK7g)7W>{O?%^aG@yKTpv~S-k|5OoLjPxCJB;j|aGp+�R!YsYg$> z@CLD5rot`QWp4c^ypamdO~nrepT5o;7VnmXOT! zRjPV^8l2qX-o&c+8e|!LEpc|~$XwHHXxvZ&Tdv!FRk)c(v*4Xh(7#=TkE#zmoMVxO zk)c0T-yD4pDI6DeGTWp=*Acw^%kBeS>_4#eo>>asJSenN^+P+%6U?a2+w~e!4I-2~ zl4;D2JpS?#lO(iV_$+G05}Iuzd_&g;MjJ(Yj#R8jEvUB{dh6x6`bm zJ1QSZL_(2M(&JINPq<_0@-&YR5xCmn(NnJ3JwOA$?&r`8f-e79%^&>zSaDRT>~qR< zq$1J{W%AnLyT*@+yDUzy6wOW_z1@jti^1nzvMbK-Im#};)B~+qsuL)>3yeH?6w(sE zqMGp9&o#SG*jvsUbRqt2y_MTyQOd_5{ zmxbe<^l*L7!{aoY)rE{h8^b|eV$N{=-T}I@M=97nHi?Go{a*U%-3Z2gO0{|~5D)o% z&q@UfXiPTNAttNY894Mhbo0~ZaXkB4CV#1C3Rv!Jo%8Sn-Tp(rzutpv^t~Tm`6=le zvWrbh#rkAIX2#|9I|X|{WH98G^qV}CyfN7}ojiyQ6SBd>ffV#|FwQNRZilaSsb9|1 zMCBeH$GSpOd(ifhtku5SY*;_!s3E1&1E#EwmwrAjMwaPi`(iYPQKlg1{vKU=+To+I zrHZFJ_~~A4NR;%)YP=?!{X7b1y*G4tRPq9pI=urAm=~h0*~P-VEdltU+xu4h&I;h$ zIMn-YO90HeWnz3exgM{@ZRh`V${XJ(`7fji?7(Dnp6FhG57-%QxToksA=)=4SX{g6 zfV=Lh6<-!FhqsZL7MgQhq1Pwo^s$%qI7#N-e{jhXw-=0_mO0Q0H>6TI)96Y@ty5ZE zyR-_?!2M^DUFCVGOLjTNqFn~PFO6#IADThT4^u6sn0lHu^{8yEzB$N;Nvu-d)e3vD zO3p|4G#~yS=gYEd@kkVCJh-?qsBs9YE>6k4WuYq#?#sM-`n(^DW-qrDX-VLR_xLj%z#W6TX4GHBsW0>Iw!!X)hw3W(L-YIJimFvSS_Hpb70YP=>L|x z&S2szti1NK+f5+~MPsJ>y6eAV^P&Z3Z*xb1>mA<%Th|YR-0khV1C5d~<+#(v>YOPw zufN(m(v%1;f!r~#X7__q#~jXk>I^I}x8V0O8$%T_wZqZTH0!b-cSN1QFsv6A*>BpL zgL0djS8RScjdqnvM4iU^`iQB0H%LStfZ5SFO*0 zUiL+6n4W%t$hjePm8F5u`eV!@(LWyzogS(XI|5g^2NDu#j61d#U8C0){;<>Pbl8fai#xaE(D%2~)~_R(Cu9Gq#2~aKKw9?%uT~(5zLPl8p<3&&3kEe%cgZ_x0N?2kmoV zzmormJ)O7{PG;OjLZLgea079wlBTYVxAAL_s-++b8Ln4Y!xRtuLr_Xu1%Ex ze43>zm2F+v8ybUBVB_)pb6-HpBWjIemmjc|bqD8Wdca#_W--N|MKGbQ$np4N2LuZ~ zOxIQOhs^Lx9~j|DaKe@vq-;Aao>pWqLBh%qIvrA$0T<+EM1>UF-ap%Xop(}% z9V-Zn-f|O50?;x-i`9F?6rSqvh{~G!!mQOP&jQw#!sEz=9Q|BD6FFDxEZPB`dAX80I=RUHVyOGhJbI@BoaDQi$$&jp`d9mz8)5cFwo~#$`KTt| zdVRs3ZnSk3bzT0Jf{3eU-dB~LRPq<3%-8uuEQtc0P-~^WNqf}H-jFMp>W>Ds(hd6~eZb!;f5oez zNI17$x!34+5xA-CnfkiJAD3U)5|@@)g}rgk%v-CYAw+R=IKNy2+$!hzAY>ARb&U#_ zQZ3q%k+CvZB>5HCCCshLb}a(C9tEqtXCkmR(B^SMZ#ACyFzZu`XFMe6Y}Lxy{0JxC zS1(z0DH^lQYj;~+bj5`O0se*N@xZx$+fGi&Qp|royDz{c8ns>>NZ<>sga&SN0p1I* zK|moz+^$<8z{r4~GnLR9->h&bd7yJw;&j;I{c|y-1Y2dUaYX zr4G~lY8>uM#sg3DR(naAhsb8zakrA~C9WJ@C3+_=8I8ZVj9Ku|^9=FcvpwIJ;OV>D zFQ2M@iPguYoMi`T_HQuV^R8;a`VeWR!^7&%VFUc%lejWs{ae!}ZatA=Od(YS@R zPE?_C7;o%VTXa@44Wgn&c3X2y!pa|`RZ{^;xP6|{=2Ii1*j5L8{jU(pm7`2cH~)e@ zb4NkZ&(t<9dN-d6ukM91sN$dsX}PF-Ji_M3*IN zL@vfzF1F!a9f-zeQ~kQ)?_fOIkXxJ1Y2BtT1(NbTOnnB72_0kNtub zOmiYZSSO%SUiU2oQIU=hGQ8+&u?9kA@GzmJ*j6qT%EQ^NNLg15ih` zE?=zVDQJ(rWgJ-;11%gWbItC*h52b-ld_}!_{Cb`yYJCzG=67lT=&;he*N#Mt_F4E z_kXP=HO^En{Z|W7GfP(gD*ud|(P!jL?2JAm|FqBOGxE>4KkYOAKjlo^jQl(QjD05V zPdVfNC;4Z)8U4?8e`o(Q?oa*Cc7N)B7e5pCKb3#>H>3YuKY#lF)Mwhw*#Ak+#Q!O0 z+Wl$&JAEepPdO9+clH_o|5VQO_up;zck%yZ|I4KNz=}8n^$I*gAeEwoY~9Pk$x0uD`Ebn#qmyiT=ym|M{NK%tKbsnN|MlH_et-8~b>lC8efPeZ%B3~e$7DYj{Aely z$;{(;+vbDZ*bW9JbGe|Nq?_EZcOO3N$wE0oWXZz5wEdb&2F7ke)S}l=em;Wo z*U*4pQ7<9Zb|G#s+YMf$G|kS6BRFc$1zTeL;5d&9`m21Av)JYaUE?(5^*lGZwQY}} zI-no*$ERc$ncs%HHm@LcZz}n)L<`)WgoCGII5q7n3wK%qKq!F|TO3wHxVS4+Mx8;A zm<~Cui}t{~$s4DxJIVo?!WVi2O82cG*+%KJkS{rkhlSXv%M*J*bz&`uFUz7Nc(~z( z^q`#m1#TSj8IoIT>Myq_R0p?dhsrfOf0LaR?Sn}s^W_$%#SvSZ91M(eBCpp3QPw*L zPQu~qZWSi?I!c~8p z!FsK6_o@vCh~OGYQfiz_(o~YkG2unjI;Ppw=nfgm*Fl_m_3;X|DfuMT;p9mD5U`}y z%@JfYD-UGF4)Btn3{G;mFoYEC45t~^hKcpzA4EvqlPI0`CC{R|87r?fGtT=xBPDKb zq@<>U(1hTmlG~9iPkBuA%EuVrOB@-_?apK#u^}hF%q5qEj2I7dU5O!+6)7<3CFj~3 zh}d~wvZv9A?EE@Nimhu&f>#7Fczl6;Wt$=rWmROXDvdlpews{Z*^()@ye#FWGNKry zONQ(hQ$>0Wq}@c0dNRI>iV;1>sHyYG+U9nMh=ois?x&hDOtDM<&j{8O8w7{C7wtfdg_#Hcq4OGb3b)cA3TyZA?KQ>SqRo_VI;Z&0Jz@8L3 zuBFc_A9X zV3pw~Y6=%+Og26te9;C(>Ld&G@pdy|8<3~`ver@e`R`LQ3l33FD`_UPT{NQ@lQDH+ zffALsQ;51K#6fKtyFxki%2C@#+$jyS8`K5iBuZx2Qz|CVl}dVZg(~w*q=aufQ=J#w zs4-1FN}T61Wr^D;aaj?Hsh^3`*>;=yoVkOdawQq^{+qM1k_3pdsSKHQA%cWfr<3Q; zmy@D2F;b`ff_!UEB)f1P(YNL$?j3%_tm!4W`i_O{Jjg}P`}h!zng}As-A}|4ei4;9 zyi|GPCd#8lg}UQ?j2f3WrCi+2sWUxKsa4`GRBvo3wfRg46)aVfrP}r}OM3AQvSGG3 z8C(C0+*K!}X`=%5vT-rm|6DgC+Fe|1edWx`KLC z@R(Zp=m0gkFp83wH>c9G9H@QH`>EX`npEZ%F6!Fe_0&9-JhFO$9<|PQ6}4V)8+9zH zmh|8DXMBIj%t$f2O7v>hkUk+M>WW7S*}M7PxY!~7Nu)c4UJ=V@BBW>O-qp) znPK7+URicd5W}C8_UYU}GWAfyn#u3JI!L|%`j|Ze7Vh=GMF*W}7 zyK-stugazGzw_Iey7Bl-exoawCT1#^x(CvhOspybkDH^tNyq!4c_GiKh*1hh6Wktt zcsYt}rpl`Y9+$xDn%tHTs>8rOc**_Va4y>3{q%C>mq{#H9LmIgs~kpW7jO1A?1u-e ztJ*pC6rn)b!AD$k$55!v?oh0<2`ttqZIe704g>ttd8w1(DF0P~SK(YJZr0F9{}^`* zPWrXK8+OgZ+a>erpztk?kr>C9j61(Y@3*n8@d|jc~eJ=9w z-Jn+RHF0;|eds1`g`jO0G~3WeHno6T=sE5Wsq4%0?g0Z%-k)|2&(Pj9W+ZNN9eQo? z=KS%*0bQ>QGSWKe${e2c_wLQ|#^_46vtKmxFrXz(sm|jP%IvM!Q&Cw0{sWIzm9F(e z8M)ZY3!G`zE)BC&&3z}K!RURz-NQ8eb(!N#x0EklEoj-XFrWe|f2zyP36R8n+ZL=l z?%seZdCCbJ)I3mmbnn{9(k8f|&M-T6&Jsm$*E#V;eL_i_E~msgJDl`=XI9JK2{9|< z-*hkVLc7DQ)9R}SP)%X>&_x$bcp~bj_M(c$#E~=p^=TpuRgZ5`NdEQ-FZZuq*@5(J z#%vz;l+B&M*^`|6h$$Aw_n#|!nK6Ke^>j|8?e~VUr#nv<8TP`gH!P1cmnCC{@b0_Q-i>+-|Z94$Ux3#F)OtjC(w7*ZoP-Daqv)F@+_Cd2%L7% ztK7?+gGLX>ZmP2ULZR_PQSOgY;LWd)MFBLXP0-4;2Hmj&JgzvH=stG}^%E59-sWdQ zm@~&hcBc_AucFc(tCXUE())Q;vP{`}IezED*MtG5dg_nG@oq@<>6gC^al;_wu>?JPrv-Y?%id1q8$bGR4oR0%^o3%@+c4TKX1 zmRQFwZ^WDD2UOO^SYgoTTWanP+d$~3Kx@)f8gr#^-a+O49k~C_iP5S43-GqiFR;0= z7sh1^4vz8LLfm}K$6GiXQO2zD_zz2W&>2_QC1ThHE^jo&LW57kRSTcH&vum2D@Gnq zuSEw!LjQ_gns${Cvw3M?)NEZ8GU%iV&%5D+&d>HP6Tz@}6{dFI_r-o)iyfZdFQZ#= zaoF8}Lhwzt%~zwLHWRk_Yaj8e!F*AsBhkHzkjdFAk(XN!g-zNX1)H9O_>Nfzir%)P z>tVl(AvY~ScA-Grt+nk?wXel6md6#Y@Mqq8*Zu`LrYBIK${mWurY@dOuLsM4RX%yM z_u<3$Z3*}2N|WwK5rS%0{GnSsCNpJqJ7nzD-Eytp9^bzg-v2G*3;NB^wN`u>3MGAS zmpwSu4JQY8>UT*6;$A(k==1)=D6*lb@{r0);5_Wpa*(?nF2sii+pxYwF>VL`R6lI1oql7u zG9ACn=8UzNH;&Iel|S*G%7BIDVOqCLyJ7oj`&Ca%5M%AGP)!XZsQJM`sX~#!kB9{k z%oNSOV=E`Ge#yk-ImX7e>$uE>Gz#6?@UY=@zjyWo)sR}s4B_Iftm`H4aM zhDW*^-C*wx($}jQ0qc9bWkl{pgLkf4$b$2JIQo2``!%{l&m+f=yUi-#>QJUp*p6t} z_wsu7ZLJ!#+Nt>1^Mwzn*=&mP3k-(?Y3HADaYq2tph3a;3%8*)x4UKRzAr3mbKYtl zPVc!!L1ul?2w1HoST){X1JfJ$CE=tO)FwIjrc8%{mC{Z|$C+qQTlea!a6Zk>!F3r0 z#l2xcUp1#QPX%m0XyKQY91Z+xJ-XsFM$fR|o;YFV0N^uKK3Hej1ln_}BTh@w>?DOT zS^g2N__KrM@j4olWc{S9klsWGtUvsy@%_nUHUbW!C>kC9QRvFPNG_O~>zRSgH0~!_V5ka7?+*WKSQU@wsXXDn3 zslm=&8w$3mN5Rtbt;Y{aguza!o8kM662MyWT4V=x4qntg7XI-)7oQC;TpvVZT#4Rx z)*TA0hj!n2#c9x~|=uK$VRN(HM6$v`mcy&gDB4PT4ki^X@3e3hg*G4dz zf%@C)9a>=(!0`K|>F^;6&T{6@JGC(%73^L_*{VK*;$Op6ym?Kq(I}^A7$RZKa~%>} z-h_wVSa>@+xPxrauU$rWDuDNcNb$ZmG}{#MZI;r?Mh8n%=YXp|@U3Kg-r4kUaCycP zd)p!ymY(Vyd*oUOk>@H?=Ir+c$58KJ9{)Vt{Nt%s%%MPd;N|#eABRaae19kK(IdVNRrNP9YUoPS%1>-^`=&dB>jN1423`O+ft9S1m{hgt++En%AmovA=k>@1E7KAaYL> zui?ix@I8ocY48FcaLLrDdfDiWPG@|$Iv>P?fw#k*5#1&@Qnseo!qpoRm_jsHQ8mb; zG&^t6U?L0-ohpgx?|}Xr)7;|nuHf=@>bf0M2llQSWq!bu0&}k3P~_j-3+AsrAANG# z6sD6_?ms3$uWWZXz1LF9fTKQ=a|Qc4fP2<2hkf=>R2-1?8_Rd zIU>C<|GA|5rwJdtWZb&zhf*JM+~4E0_%H?ke88XU%n zmx5dO4d%k=+C2|XZ0m&}9X5B7+5{}O{Wb60a38L?S9s?mTLDx^o;j=}GYD$QH_pGi zm4e;ebH}~!jv)WKZ%3Phi=eqItl{3d5s>@vMM^p`3r#NDTw{GUj`y@33NK`p!o!p! zmsD-OLr$!4tbJh)+Dcz9Nx>hO9(%IfMxz4GJuLm{5IF(smp&0;?aWuZNK2jgHD(L+&S;JK7mWmRmCmoind$ItGCDuU4+tQAI_~wDaBRWHaJF{ zae{m_c+)0S2@)OeX4~D@#x39W)sL@s#b8ox3>RsAkhmV`|v*g{NJz@P?hrZf})_+TcU6 zA+ze9C%Bl8wcAXh0|y(ID=c^r3~M7QR_Z%^f$UEimOVXwc<{opWgezIxN`9vm8L^6 zFzZQ5N^0ORH2#>=q~I5hr7sjzdT9(n5veb=g=>;wv~Y4YEFIR%~DY7U_Y~3X^fsGl5*vk zL-ThDXYKXXi9`>4^yHcWT{*JhweUStJ7~!aZ8945K%0n1JFN37@bjEuqXCHkT=*dF zr_RS}5L8|s8Q12EYg1%1?HZc!_*(W1dton3n$(NiY)fN;dMo5z)}`5abw(S6g*q|Z zJx73Y#MThjS?^7|&7bjxf`I28p#TH2={ zy0=5By4E5AHyVRfS77i6jnQGfZ^z=%ZW`msujk$OvUW5{Y)~Zh{lRt&8_d4Y2{yY^ zHkb58p#H>Z%i|CFu%{Tf?HjZG5vZO04O^2p+ zH-r^$4Z^7nBFkt{BAgX+-ex1;IEsaQm}PN-!1=<;`)WQTAY@rt*U?OnFS31>@~$7K z_DCC=-sC`n&5`+&Sv1?`-IRL&fIOO=u{fqpnmIe>-c5^z#sy$v&K$BKYy=i>n17;x zy977x7G9R(`4bmbRjfY6=Y2tP^R58rCJow&>to6|K%v-q8{^7uCnRRdSh zH7N)PN)~HdpDPB#<`;(QCQkV2(z3aWzf_^9-?m`(l}~VCsPs^Jd;^GU&bv7K zmOC=VW}J1sMc*H${0wOHGDn$>VkSmD?Xau=mca!jn(cOv>ij&`K72JL{l1y?7W{H~ z*}kc<8?NxYAByyiLe7RawHNA#F*k`%Hb=-4#2wc5tqN`j>#h8`M~>5&X=c~P{oeMW zQ5%!)1IHlXh+Y4-sjC})S!F)CGMI{SmD}>g3`g+LC)rA4>nOOh;Cy%i&7Q?~{GF|t z5W(98{5uXhjpO}q3M#5{iSR;jQ-62nFm%-%z5C267uPDXe$dJNfy#NaDIna}>BnGk{B_u`5213Y!Q*Gxz z`=DhaWG-6#5I?W4kiE(E670T5S_G7r!<#qtw)~5H@!g@mjSGe>F>JF}0 zmebt`TasrrtU2=nImMdKw%qE#58IL&cD+x4%TC2Nws)0-qks69?Cx;PmcMO0m;QZC zTP@`z#GC|~4`OfF(QJsKUq=0}CAr-4ps|97p~y^w91@!mb^HR{aW(dp0p4L5o$dw%8TTVAvOOas&|o+uMv=!>edC|qgLfi6~t z9=pA)Fs?xF_`&ibcz4C>NMv3JW?XsLDVAG{D||&>>JHsRqW}+u*$gZ6y8BVSZ+QeZ z*!`@nlZ(I{+(pk>nd^?%M#40m1Sir)Z3;@eOsKc)RepvPwysj2m!G&?CjI9b|#;y6C4AdWTTw zN6(dmxHwQ)B7-{+{4}en8b$-iB?lJgEy%(NiKfP_8J{te-$G*D>`<8Av~u?xlY&gFJS8R)Qk(9E91M9=@U0PlP#*?PTQ-4k6 zPye3k`k`+8^RKm}Uw>9E%`9Ba$Uj%SX6%0_XX5|#&)8?=jQ^kX|J%6#Zof1A|C9W) zzrTzBGwx4)rrqD!XZ$nr|2FQ=cK@W$eBO+liJy^w+W*x5jGOWQzm3y|5VQOJ0oY>&*(F9#y%tew9n{& zC;zAIOn$Gro0>4w$C>nI!a^%_hBINMrMk&1`Z$y4OlJRfvD+&o%$oV2f7i17$Cdc^ zb@=yn_Sb4h8`l(-X7>X2 zFjc{)bBY+!>j5ifzkzLc?UD7GJvd7gL!omrR)^mL#(_L|vw+^ZqIC|2QyKKc_kM6> zSB41QG|)dKNaMTkPQ37VqX@U&eU249br%M;Lr?erS^7fum5y>=yNyP5)?5(~(y(}HkQ zFahFL*;0oNGlL*Tz>v^1b?1J(+?;&@a7Aq|`fS@O$J^=xo8~y6g+LwSJlI2k$ZHf4 zdO%Up_d%qs1Lq%~rY_6sfyWmo*sHw<59;oR{XzFZ_26SX!4LooUOmtjrysOBn80|%Z{RQY$L;RRrEQpr6E;v68+^Q_B;UnA9tw1x14gRZ5Kgf z!zA@Xn2k6d+z)MS3aEO=j@r6K08~5eaK)Ti82X$El5-Q$e)C@3o{=oK?OYo+;vW6>35Dd1||^95SbO zQAr#_ly%c1?Bw2oY=;1yX2oOw3TrGbx5Irmnvu^t7FS=2LrL8jBv%`8BCrux-fBdX zrSu>(HCaX<{~D5{O`Mo&Bt9Rajzqkw?QJM zecY0o6!4^$N*tjqn}rw^HH}$wo7WMKKt8fiG?)n+T$9lu16dt|MYi+sFoq@GQ** z(z_{-r^GHtk#v7!qkg>3CmdM^sIwu{_HunZzuna_np<5yl>>J$^6s zOPPa$aT{vG+YDwb5)r))6gnve%TGXijEjl{{6=a*Vn`e%{|fGt5m8H8+aXzm%b>-Y%kS z2fImlT{2Oe-cBqxuOL$TOBi#R-)1`c%92{I6=Zv%IOCzTtaR!-E%M7wj>PEfW2E(_ zWlp_UB$U-=lCra%oOqo@!8Nh?@|HJv)f};ZVj!y=PeoOBa#R z2Lp^K*(}1PA3^-Da#Oi}tkk!wkwnekmy}3lGhXpJG7cCAkZZhd9+vwWct#n1$-uxX@=g#lMmUlMAV;e%8EFy{H#o3g=x?-~6#Q|#QE*JHk-HxjHwvW1d zG=lne{|+!=i^OXWb3B12ji$GS2Y;}O3$Rvm|K!Xay!YPtOpE({QbLDh}#mSS<<9G za4ywzEsxyKbs!v)l0-ycKBdc*N18>BP{uKw)Q+7Flz5dgH4%J_Qq$(7#?756c(ji? z!W>5>xZb5Qr7Nf_TRka`NRO;@x2>|=TRn*aw>~k5UPRSNHj?Y@USzJXK2eS2Wt0|S zmZ)tY5s|z}Mqe+dN{_S>2ljCC(&{#u^?o&_qx^~VXX#KsLpM^~wY-!_eh=|%$R{>K z7Q~6MnK~vlL?q7NqTbT%bUp?wdcqG9P3;C~45ETw)Da# zmLT(`AfD;H>V`y)qbYu zVoKjp#UtfRnsP2(hM@d$KE15inHa6;{4Nud-#mDxr#^)S$xkR@odP({zi86h`a3+- z7btEm$-y$+J@OX7l${fHZ|A1TVp!0>E?XyJ0oEcf}^Wmb|`K&^j#^C zS50qdH|6mW3j-ZagNmx)5G=bQOlYM=vjZk7#wWAsTx1K6O*;Sw8SrqhA5%q zWihV#u!x6^(X%fBuSe7$4L@6lsT$La!Soo2x+46=m!?W#FrthWETsW6y64Z&ZA`{% zK}?!b=VO4kv_W_Q_Zw7OCMKM0mV~h(lWjjeih;#$VnGFa6maT|jB>9G!m%sK?UVNs z@q@m`c*JR>r$+~vFK0%=?4_m)qdt~n^}M-e_G^={U($b|=W8yEL0A2*YvFJ(=_D6Z zV}WT+<5)FA$gA2#n#YXski%S$hE2LmQIl3CBoaf=F4baCN;LoYKy*>0q& zFy=Y%-G)P9j7DT`HlAiZ`wAu6OO%a-8z6*Hn)hI>CrlQd&oI<0M~(1b*G^hS(-ql= zpPZ0Mgs1&y6Lz|~fcT)ui@MN8Y!ToYe>zs)D1+`18;?q0c}b-x|@Oqy@f z;;Vto{ObFA$AakP4MEnbIpw%i!_@L4y}ZSwyzeZF>t$FxX_F~06^e&OxtVw$He$h~ z^)LJSn|Nx&rDqGOs^Lb&!GtF!k=S_p+-G1Z$FT0ZhP)GMz|>uPs;65RyClN(L=vL0 zNAt~;oJAuR7s5H`ZZ)`>BPn--R73v4LJ5tpukc$S+t?1)axDGvQ~3S%`@nR^Cw`<% z7UEP=T5XmmVC%h2lLfjyC{|p#=7Riv2+}oI<8`57OBSb_QIZL$T~P4+)x2_aF|9NR z`hFCcZrVKejJ*jiM(hm*bkK%~nA#O=w1zU2>tpw_(-`(IFkTBy4jZGZ3|+@8J3`bLQ+Y!J{8iSfqbe z!~2+nI$IN8<9g$s(#Q1mYhBN`!{yvbVA?RXQSgT@Fyzj;id3dxLR4LP*)< zhPoHQ*r`YkEM?B^}DTN95c{f%zRG&5g!bxk)A5xrg2{{Re!${SB(oRo%l+kGtfu) z2_N%V0q9(M%(8K{7hdrfW0CyWjzJA?Tt614WA~TAz9j|Ts5jnLqt6wL#V+fsvkhx7 z{DJc=mCxxYzvxig!S*t|f4=&agG3nW$S=vzmGeUV#(mGLXJw+xa%Dc&R48g zYhn1UL5j1as}?2O-#T~lW}<%E#c!gd44HWMTKnhGfD1MUmiqNa;z}V&raQhV$hdx2 z-ny~^RqtPxzIiAP>^__8?Y&$B6`je)S>B~$e}rGc%HbyT;j@(UPfh^0px_O1;T@ni z-7I}#Ne0SrJytf9>qP%=xpNLQB*Q6_=IcAczk(6xylgvF2I_sRxR7Goi?3^nG@f~- zL)&4Mk0Y%k;KnY;qph5cx1G_FU2hOKHuM~yGeSXZW{`rxtO_YA8mweg4)8?J=T}YdlTBn$L<)t+fIh4Qv*&G#jC|lpm8^;Tzx)+udo{ zD1mHTK3vXjk(ezV)m&+{2jq{{EW3Fv9<$GyZ%HpdhV0s%6=BVE`?=aL%TttTsw%hb z2ZP^$Z}nNpme+Ui$T97}d3{ajuF*a6f!he4#z$mt$!`VARkJw7q@Ut$(YyQj>Ca~@ zcQzV$^aN_J&pZ4f_YI^-EQpt1=s_?4@u(=%-+zC$IZQuthwa+>n@^0ig4JH9xCka z>7H%Pxck{vgDqxnWhk2KKK9SHUbXn#nQbh!^A-BVcV(>mR0mpGj_13E{K42oJzGYp z8+n`b!fv{xvH>q*r8$Ch3UO!cTq@lr?PZ)4jfd<(eRpifgdzCc?lNP zAithg%)GSA$TagbWZ*v4vh_5l@y0X$Nbj{8zy8o0iyGa1HYI3d2kSu}vClbBQqU`V zr_>)muIiSS%&Nsh+l%*m9_0uA@29MIIHK@->*ClB;b3ems4qNgNVVY0&$# z>ELVS?$Igf4p?|bGqD^(P%Gnl@5B6ZEV9_UGve+8P&oQTF7SRSh`cK7oR=1cf(v7d z@i z3+-wTiA5Fv(|$4*)c554T-ojvKd6-y=sA*+h{fiI8&|K3!{ps+E7X_eLa~|2HIpm8 zpt#IYBtyL(Rbqam*Sv_q+WYgbI5tP3%J=5jV?yrGIC#S9lV~)q^9b5IzAP4>Wx6eV z^*skf&cB`fCFlm3QzEeu_H|TCE8^7=r)X?r@L4AGtq$^Z+0(0E*+FBZab)TkwK`*0 zK0ke71a5kLdhs>UP8i~yo{)>zM4sg=svtUu=}EWznC*h_v7yl5&9OS*P+WU?!FEp! z_#iY@*wKw^4-bEKnCFLUx4PbVJDCGk_M1Zbhl224S7p>yNZiC$@Uw~{ZS@W|OXo*d2;ycV}S@pXwOR_}3RcZn~@&>fQ5H#MR$ zNl3S!>2@)g%0K-M$$rQ*5pZw%egv`|Enn%NN_{UR=SX>vWnu4{hl^!>{gFX4^vf1& zsui1vpnIuF6e{aE_Qm;9yAlWYeN^ZO1cssmTzO}YqM+Q`&_N+rpl2n8nZ;Pcvu0_g z53PZK`o~RfauwsTc?P?-X}W@#q|U-(-x`Q#i&~)ASZ};KsvwjHQ7v)6_0I+}a3Q zQ3BUCc(?)olc|KH_)e_UTW9d$YACFceBb;rrVCiq_O47}F@vN_H|3(*`mmfP&$dOC zYV8#pu-vVo5qOAww^Z8gar?m8Dmg={fU99cZ=7;GBxmah-ihr3x#hM~#%sK=*=F(7 z<-R_wQ+9zR5pRIQvM%RK8%1lVEWZ>U9g0J#EAxl)I#JTlc+ri}WH?@|B)BE13t}Wg zToqKKk>jEJ&M4b{90=EN+8dk(ayG6f=8ralt1p;_u6To*4aLS)QC(P*C*ZmzCbPp8E#2l{KXlaimOYwF@`Ua zqQ&l#{}u8UL|2&BUb;{W2ZS^hD)pqIYUQuE;6n{)wO4$#LqsMV_0inKFWv+kW-|Tu z^mIHR>B(SJ`2m&U_5{hq(jZ%gYrTC;G5lB*`i@yD3q74z<-hy={{_xsj^_2?pmZO*NW_5Zk4Td`AF=pr%;th|n+csk)GRy=g z-WqcU`hb)1x1KDheEQ(@?nN$8&}DiST1m|GQ2g3hU4tVX&Tb#SQLSH@$Nk#B7hsh} z>e}^Ubj;Y?ClI0@k9X{F*poF2TDXKe_?sFqBfHl&;|d-5Hb={Ej4y&ocHv{YsCxbt z@{>J^YrAk;+v&h;I#M~!39XR6h)E&Y83-4H94ibcXzzhGPGS{+`Sm#7{x8t-A% z4}mI=VhUQlkv}$+dDZIASh?ouaC`L=tWWIgYHa9)ekMnm-C?0+BcoxNGD; z`3R6%bIm5XRsC?RsMe5KH6AZ%F%=h{9Kis&V^SWCu0XB2R)WC2>$j@)k`z>{8mwFaH^Uz&2l2Dnvb{B)_HcjPvWT{y$;_2 zItbnz`7D|I1H`YioDlk6gw3UCN7&P+v9V(EBfnifyvdQsX`5jt?)4De=L;o3MBui?%c5T|>E(55gfg+>RERT?frk zzTf-N84{h@>Prv0<7oV4b%V($XjGhPw%m{apH>|I0SlaA{F&qA`*Wq3>ioX9&^sDF zCA3++e$fCcMNLWrj@rP1ZxIFO_ctS-qpMcL<#^cIq;$+b`~&s>B&!PlWhX$b?8jz1 zhf-|cZl!IpB?%6l@89*D-UMSCIqHlK>LXjW!!f<*o;a17Fu(gi3Iwd0ckR)~D5~bP!bDwQ*8~x@_RAGFF)`f<;C_ck4khVUb7w)pvY)@A)_1@t7pujYhn^VY!yj24 z)Qt4PH@aMJ2vCbm=6&^mT5oDt8j#8qh+bcxmTdOx#t~81Dvk*{*lBNBdC6=9j5bh< zS;j`Zl>D>ezw!6_7Pa`rN1?LOCQAN!2(|gQYt?_T2dF>)Z(nombM@Ilv-Vm1n|;>* zH=H|eu0D6%T>sznS^F&h({X0~v+;BBZ~nRVf71WE$DKXiT>RT{=ElwHbL0PA{h!9q z_0QF3&ogVEi?jY&{3rix+${c6{J-m;d%RhFF8=L!v-a8eS$!7g+Gq8-I5&RQKWqOR z&W)SZXK{Ai-}Jfm+4xzU_5Zs#J8mxicgN4h8*QPEWNfsRl7CX&HcI)O>i8*CHrh_f zKdElVZ`Xf_Qq?>q2+{yZ?+NhL31qg|Bzlj?T=&SzAr+e4wU z(OydaNp%9hUH_9P^G}0}oiV*iedhig$8=HXkaT5?75reDN_d!y?5QO*a2Sr;WkS;d_hQJ-* z5v~WqgdtVWodytg}n%vwJ~ipbBygPDxD==)A|SiT#Vitj{O-ODKX@G?Tq1-#_sgpV7X zaPpQT3QR{~S78(yc|@Zly%c+?M_nHki!nKG2)Anvp~SNxER9=ApU7HDFCJe?Hxi*H znX!>5CI3uGvETEKnvw@8R5lW)=Qqp5W+O7207kmNn`#dM6=#) zLL^ym6CY-t(2Qpa_Nd7FM^jec53q)uEQ%Fg-`5w-6K4Xa7ov8t2A;!7?>Pi!6`Z&h^ zleVudg!bw5HDctsFLCF2I4#V|iI%O*PwbakL0s#Mr4=zerv4&_pWTaT$+Aaj?dQdb zPb?zDSB7C?-J(K5X%`R&q<~nleT2~PDk08YmL;mQWQiH;N?Po&4NZ685>d%>iAa51 zMce(>ly-1Pm@xL6M+BNh6P_DF33l&a#F+CmvF_bYa^)F*GS~7vx#7lXa&YlJlF4}^ z>2_)CkJDQE2bqx(=#4|@#`7l@zCq6 zNAGO1=AB@&Se|dl~F`vw0$lEO5 zde4n8vQi)LV%e51C-OiG1=Xl9gscht#$QI)G2X`XloepuuoQY=bxRpi+U!siH zh*(>8n3k6>Leo94l#u9^C(4b?X(tyO(l~AA6S1a8h$9aeX-xA|GaZCwh*v(x3A6pZ zS!VNDvN(0`5MH;o5v?m{h|zl)L{`WlvLbrffV> z94AFixi2P5JSPd?`dGq5)tca}n5I2=ww)N--kEjC$BSst;UJV`myuN$l8Nt1aRmQ@ zZ3O3p1QB|cnf7z~DA9NJ6tP}Fo*=ljXspHp#A`7jV&JU-p}8}UCUr`HxE8RAI4ban zsFzP9xE2-=`)pWA*24?Pm2x8FM}2-W_Qz#1$l?@f5^PO=ySJ8HcUhV2@2e*+JmVu1 z7I2Wyi%yaS9%Y1r!5T8tJkgxnYZh>ZQ_OET`iM6U3E zM7o$-lOi8I$QF4|axy!DjIgT9`u2?~^<*bUj0~P6xJ&!8IP(3nI`5q)Dp$!9uQYQB zPcu5v6{tqkrO6WTLyN|@^Lyq!Zasp=c9K|{SVzRSWfL7wglZWPUsUMU`HD_{To+X4_Ze!+CBT) z)%YYC<=*yPU4NIv72ZPWoZ`*6srV;jR#XH=l$02(`&tZBGJ}!ZsR!BN{15yCbxM(o zvuU-TXBe_2)_5&+$;242L#t{!jwc+5ytb_w4+THhHVh9(w%dE?G7bgc+;_rQu`L_> zQkFSQ%5~$J?7iow8$xmU!%A@;-uF-HV6{6-cK$D_<4dTu0=2atw@yT! zqQ9l;A;fEJHR1dOrTT|1X)P{B=2tseV#;6%p;rJ4Y8to-}oo6@)7CHqA>KrIT1-70;*Y=m<0D?Ce_irS=+2a2y2H_nulG*a<5%<#x_oq8DIShn z=oQq92G0lEs|8X(Imtd^MR_mWNxx6KQCy33_QRz-pQuh<+*b=rPiKJFp*R8diDCF0 z&&;@rtsXl+h#qZC?!^*rcD0A>BnVtkbH3x+4{RN3g(p@u;ENS^y?)wuET zy`aYJArg1HVTbR>j1BG71D~C|;uznImS1eo`jzH@YYE7-u{FT94P6T}ZZx8|RKv5- z&~6MWliK?(Jr{ngQ1}0G_#@CNe3sM*G~pM638CX7&8YC;SY7bLeAw?!2S=*2U5IbC zcu6MJk>O0`e3m0+SXc7NzB#%8772=pH}JfJk=M4(sSBua#rOB`Jk)~wS~5*4#0sH5 zx+F-4p$i=D-KrONqaG;fvTJdC?Z$P%Pvb3Oia^`6pP8ID2>cSCwn)u90GF*%Us6oM zv2}&6<*kE(prX>VDn;@c);6X88hmCB8*((QHeD~r4^Lmdoj3Rrio@kEKfhWHI`Y|* z47b!kqSIC@<3l(~G@N?P@rl~co3&zre6azf#jDsSRLX($ulJnHw&nQw*8QY`p?WM4$%@CkS|yVcUbq>D7f09TAROS&-_T-6TI`%O)?4?6fI`D17g5rTkO54 z8}=A1A<3^m5phVKbvWB* zsq@j0`%7bVyQnSt^nu>7yD>OMgf%NIj)M>uu?s(66$5{lwYtYSdwj&dExPb^1-hmz zH{{`Z4IIUy=Y|zJVBEdWWTE+UH2%=%UT`cHcdZ$`x+L-qI58E3YYUe{l=AHsr*P zdPD@0dtaJtC9`qWAmhGIaY49@X#;HWVEM zTlewsgk#XbJ#AK*5qL~mOhWWFmA5wr@Sl1dijk2945|gWKvAc7%?zVZ#b9wnlX4p} z`kp$jFdU3>Tn-akDPo9dj-lz3Q}HO8MXGA>7b9P%E7PQE2s-qp{mPjt!%Ms0JLV`* z1fmnE8Sc%=;K-p7)rMhsj5%^UUVj z07YDg-@JXrU<}FzZYcaT)=kxek{14ElYlP5l~1fT_u=GqGFZ$h3^!hP{IRoe08Gt@ z*yBUV$nc=cu9Vu=a#(|{?q0MPUa)R{ai6IhOrP4vg|AP?0b#zjP@X>2RO<^pIcqZ+%4|?f(D=?L-+j2Lt3mDjrFWq2H;--ZQomHvv z$G4x}hYvi!H{n+}eqC$2u^yc*(wEpgB~`0{;_0#m^i&IUG1|ku3{Zr zm*L=pOV;NoMV@&9S63$7SJ>+hUZLq{k_z+TrW*H+a9$r=_WjY@3qw`-LvMUWH#QIs z)LeMKM!Nw7LXO@RYjGivWwP%({5z=W449eVDz zgQ!>Nvzg>jNO5GXJa?fR6Bo3W?Ffs(=I@(a3ynU*!L1eT4;K1EWAm6|I)5`Jd0Ba0 zstUmxqkUPvTi(O7+rvFhk?t_%F5P>I>a4uvwtq8&VgSD8rN6aO&xJd{xyj#=a9 z;q!h?GZI;6zAIDpI8QZv+2PyVLVbS*KjomCfLQw!d*dGJ_oMZ}%MPbpF>&eehA^3I zIB~^fU&KvSXq@tA@6~rhpAxgoLN;ftU;9j8O?@8DKSzI1Uw4M;_*Ob1ys;ER=#37I z@2T-j=M^Wo=umx9EOD${2YCbIg6n5oamlrmE3Z6Vu-dDz%aFMM4c9p=TJ=N^AD$8Y zbg`oV_%7QioTzd`D^6FI&5PR5^+t|$8rv>-rhmARakv2@Y7ckr(Dz5~4r=QWF z$cZh={vkxlcpT^y>w=Z@92pk4hojbw#G>?7!{{oefga8tu+96 z_cxXbTOR7gNQWCcO2qs@q2hqH;g&*>b~qLE>{JSNu`0ARI(MK`k%VIFU?2$OmiE1< za6;ve)dRA_X}C){Ww_^?99a|;_QR(v8G&3P`Zce z-*(FnI?ip^k2JN%1x~zjMf@2UqH4>_pIL*Ap!lO($PZ)|dInzISb_%rgVv=~ebLDu zhxLXiqO3*2YVcm<1uTtj;(2$QP!e-Y*}lKUKIa|U&cdbeVNtF8s(fc4H0!pg^mpNj z9mkg4=*~c&5Yxx|_qKuT@-v&Zp9sf*Q2RI5J-ujJzE?cIKNo^l*sO}rpMZq5SG7;L z#p8-AL#Go~4dIo-pvUHK3&C7}Z)@lFpP+N3|I|%3dwh4{Dedr^C`h!VpK@BIg>;{V zqV~V!F(q`{cq*4IPL!;@aFXiWoIn0^<&dNnW-raMc=f6pXzpkdw$>4sSgqb>nHdH1 zhSHUG-_`>LzFW*pVp{m-YBJve-Y4ihV0h)-)(WsZ6+9ENNDnM}dLQdmS7Yf|h?1j{ zIWm_d{aWyvs(>hG+*WS6lXZ^C1_4i`A6q$l~Jk- z*uCtNyCK^HdyMHct06xeK;4jK<^x!+zRP0sd4JFhWlR3b(+Z^ZqGmSF2yE53vYG2z zH>N~fPm|mk0^Wl2eZ-V|V3T*yvX@11xcY9-g__-iIHhGjJsKMY4R`rVY`pq_JmctT zzC0O6`u3J}JCEW=Fo|dKPK3j&*6=v1_rQ*5AIY~$Z;>gf@SOLfFDRXoaVn-M8Nw`F z_C-haf#pqmzBPg*)+c>8y>?>^zpdG*6Tp)JDbBn}b*4kWA1ESKqLzcfI=mV8L#L?y zB|0|0GzjoM?0e^AZXbx=s81ihUx1UH)e(!{jw8v$$L&`~2d4A~;T=zgfa!<-qtu~d zXQZ)mC9b}vYcFBtRgXyPrOS&w&@!o?K zJ3RElK-$cOIkmMLzU_M0#P#SYm>yvcoADUHTMYBnUl+u{-G)9F`!#)Vr`U13Ej4q?kiE!aD4+2AEX2Cwsb?qwow(vEPo7p0Mj-m16aG;$BIUzK$m38;Y#j4 zsBKPLZ!;BvAN^`0hL?=us=^3kh5iii7XLNUMb*2T4xCm{_k4qDY%A!KqZ3qJrNmIx zH6(D42wNrb_dy^_roKPCMJ?W{bG$*LxUr~p_sz}OKpWD&zg=_)*81tUoOyzH{1p2> z*MdnLtVqbprsYETwUh`y=mTB1x2MfAbMcAH_Cw#uF_g}Y7CTd)56pJVq7f^4;Kfe* z>4QrO(RF%$PjAl<4l8ewdT2;>a#Y`1bK_bca0_iK5~fof?j&^5&uD%_yPFFW49X%w zE2cm-SK1d7NhPPv!>%Y5LF;>QVUbvm zL42(>>Jcxst$Zl{^$j|<1BA9TtlGZ zFz(l(83=nW~boUd=J7LF=*WIR-R3Ok82 z&x2f>@lLgT@VnqZu<;vSoP%yfuMeE z>rQd3mN&!H-itW>t{0!h+N!i2P5>eCChv}%QMmP8=;8f(FI=bh%u3U<8y}}~T`w3- zg&d~~rS#%2&}Bc&YL^gvU25&c9gD3QS~@Zz(ljWr!g36>CB|YFeWM7;<^|Jx zIfhWBu4LRNoeqW9dD%2$rXViXmHpX;D`6cjIQ`XL0tp|D^wSz|FEi*x;d(`W6oIQKYn_1U<&I5%!q|6AOweKzjD zi?ieZzs7%hA9K$)8~;!C*|=Gp8*d~_d5n!tQSwi!lcSX1sZO3kWg`Vj{z-L;zg_O+5|9oP|e|^;dd7uBe55Mogf98Ra5|zA+jZRbYPpVV?ozJLLcZNb`BNa;iNp)v` zyZ#WRDCPgf>wk>;%h3PD{J;G9Z=b~I95u?==sYF=T+Iba`F%C26e=5Cq~xEgx%Auh zFQU}bWpNO9&J8YWbmQ~vSg0~}0@akobmWYLVHbO_Wi^0XRJ)0Xc@}_YRbln#AV^8j zhqFIUf?$R}SOs5%4$J*ev&0L8t{j0;6COAx<^gMd?gXpsc_8WL0@1hG!SwSxsXk8! zIP~J1lxmNI)Z}_Qc&U>n_3{08$-%(~(EiL^N}A*)_I2pO34$g`(>h0fxpNup1p)~s zpGvYI;TQy&+#xF?c@ai-gHB2fd7f0m`VDKTy^=h*Ys?k<0w$%(&IiM zX?iUgPPI~?Cl5(p6ym|dyMp2NvZgQG0Nk)yVV@)Fa;QzOYfWmgI;}LcbH9 zFdEH=X>x6-VdMrbBUkXsxkYr-a3^S>-p88v_HfY81}s5$3EemVc90fJ1DwXbyaH4fR6!~() z&jr7voLA3>@%Ia1&#^M8kFP7Go=(1#DvYp`T6*BAl+xwLQk^a=Qh_@bN-=I;F6F8s zMw`g`LMy%0bMWwy!{n#JljN6m;^f(EIvL27M{d5BPa0lWgIh0gq1@;?T->UPAA?Wh z=iQgE<&g^-2RR{8>w=3PCgSmR3Ao}xGTKm&l*clfFhimZ6MUGc3Li{#tzjm*ks3A0 zjEyc+^3RmK@_XJ%0}vx{MVHHO_aL!+xah|6w9)U#A|z7f~sgmD^qz)>vmQm zUQHMfDvdF;AVfWC5 zh{=0O1QkD_sY_N6^DbEvt@D`33qL!EliYly@NQo6emX1JbFPGVdUOYQ*PfF+qrQ?1 zOim*nO)`=STxqe$2btI{Hc0<%(sFA)cZ<%wv&$F%VY{VbvOeFTRS3vvHx zBjF?$MdYWl6IF>lw9kogvKnebPlC)xB1ahB!1EsO;NR`ktlZgCW+eYmiccD2NqSg*!U9BpDGrK!qR!<#G#hw;sX zul)k@juDOEF?JyKWo#j8!-{A(g{!kH3dMb(QdwYOuN3Rju!W8AE8m7Nn4xeLCg9^ z5^ciHMDo6EB>U4ZL|7%&yG?LAxkj6xJeTr?sGKP#iUXe#Sqc0UHKd+4n3zgi# z3Oz|&IKoO4h4|A(bl8bTvkOFo=xbtZc``AswUB&8$dQ#cBIJ>NGcxp*cm1Py~O(g%gPb{@6%2JK@&sr$zNgT8@B5n@&XXzgB%i5^$ zf=Jc9O$c`{BO_bi6P?kgNtF^_@~+-eGPu8q*f?>Tv`^YVCf#r%j~bjNi=EGssZHz2 zNU5jfdAHN#eD~L+s;f15eWN>h^OrJt*gA#OXt5#t6a{D^TV=AU+C7M?U*w1nUs=gx z7fOh?-g2aX1Q&U-&7ORtdW?LZew;j9%}!pAvLgqkq{&FTFtU5?T~hG6J=q*|h|Kkm zAcuDuk))I(*|PpH8P#x!3`%1q^9o%_sh-26+#+|{S9vK~MdVfDsMT%4Zj%M=ex(5I zjPZHm=@T8|rkgU2opTMXBl9$&eM^JTJ4Fym5~W19|8YX(gbJY@`#vi(=|$EttCK{% z$7$louS`NAA(z;)mzk9AUrP4w6eJa&ijqIAm`N+kMPz;I0a9{-Ao(a-oh+MwiIim& zC!_gyl13U-@-jBkq~vd+l-BReHcOPc{yVQxNl%+XWuqIvUH_9PrSseQFQSxtF_(bj zfId8ldc9*W-zOOH=+&y$ivyd+El##$eb~aDSefhW4z&K90~{9zVYB95`KZ!RI2EUO z#=d2Lk|sM6p5_X6%Qdg8XXr!T^g#`tWig;AFL*{=We^xz z(hN;5!RyOf=7rrI5U}OAabkEPvMF6C*K_N{?{`$!`P-1Nwz_%k-tC`2K0d4Z z*u7*FW~%-dp}8ruFD30k;HGBw+G>Syo|hNTRPs3-X5^3ZUlo)j4w95p9|Y+ z*vYdk!_dcgdiSOe6tRq|C15Z6Gd?+m9dGN?)Ud0za>^R|3 zJQ0RUrH{+1^L(M~mT`E@J8B_VCB{`aD<5T+v!_<2h9Re8p5pa~H9)tsX9-vy1~Xf) z$j2%*V8znCn*_asadYg^J>k?2bz3X>i?39I;NrY2*`%&cYzj>~JtXXnqV{7`tZDtQ zQeo4|PGL`2Vm|u7W&LN28<{AW>AQ!c`+{0N>&rH* zb%$R9Qn#d5PQVM1b-Wi9oKZsMf|;Dc2woJGa6eZa2@$=gu39urLwMvwT9s1>>IX5W zKcja1{*Y`o^sA;8{_kEKUVM$=klMkouKF7j@fp{1>GdijXnM$ZaEk(hZP)aR6ssQ~ zryke+X(kmL$JJ{i?|;E(MjfjU3g>|YLl@7fdy|kRWL%xtmWg^gwFfn1KjTw>rg&!F zB1n47SR@!U1}5i!DvZiT;b#XsZ|^*3yz{EahN;mE)zbtSmIbI{nxBb+l|d9T!jzis zp+ZpYD3X3RX@=VmuV^wL0gzGn(&z?eDYR5g&2!S!m;ZjL?cR zx{?}!Uk?TyJ`q!bx2hFwYcEg@BnpO_ZFsBUz|za9rVUZZ`!iW*RYen;1{A84Kd`_H z=YtPr%6GvJF1ua+lX19IL;sr4mTrnvD83@<@?9`-tV9OCe(lbghgbC%*0k9zXeka5DW((1M>$=zECOxjrr($_{EiKYFPL*4*d*N&kS@pZ3`L2=$=* z45OEL*q2PWo9Z>~^pY+_hnD2beB*0F(Dc5^DwvBRN~RxV+H|!UXEuwzNU6<%ce?~_ z-{NQB5ozU%-<*d>42AqkmcPS$p-Q5z6?uSPV9D3X4hYO0-f}S|57m9YyYyc6N5O5h z`}{i#AR2N|Jv#buab*(U2wZG%2iZ_&c%ld8&=)?mF!vf3&hPdod z-Mu>}d!S4*?GyKU2iSGjibbC6LxJOCtGrfPLqN;v^6M*FflJYTO8%fRwrUThT1j`} z(MtXp>BnxM=%cnZdEv!EN0GzT)vh%Gg(b_*Yo$VwY8RXjbh)796U_)7Hw7fd_ zWo{g2aOp$_QQuR!C+N6aEy>!YzX(T6cb`+$utCPd-|h94E7A2ryXVCsI%>JQMLP^N z;>Fv3qfXTSmIOAetrM>fhCM2Wg=vM^$i4W9)gtQsOV`BRaJ}J-x?`5VPQ=v$$D3si zEyr?k&BD^}UBjPIUvpk1gQ_LoDjduI;o1!=Uur+dJerTpW4WA9_I|~CJ92l-Bkw_{ zLh`n^tphNiGtGB1uMkV(jZ|M#<2ThZrJeZf4OgTB+WEb{={LXVfW1)|$s#{679(KG`uq?k5S?!4+R2@cZg)wcske(z`Odk)szM zHhh1$ds7BjavH1)dGHaKlU|TKRAsomI_>c*I=b+bt`NUAEekkw9`8y!6EFaL3{s}0vvbWQ=$I_x+(rT?AL9hrxRzv>@B;PRPT?tU#7 zud+Dr+Gq z*GC&C_7@-rvvhOky*fPM0MU=SUg4z&eBLk3NRTVAo%d{8E_4c*Hgi6z!*AVGY1FM{ z=%(%SGg}hD*?>F6>}eYu&k%T+R#Jz*-e;dnsSDvtZ<5>*qX8fK@N=ISkZoM14q7Kj}r7iowHxd*Z za}$M?_XU zVhRny?|UrV%O;B9vG6awsAFm9_Q+$1op%UhuC^S?mMsE?^5>nBYo>u-&;F%S%NGw; z+>yO`G71i5ejHLy@&v6Hcf(VDd7+c#&CvC?e2`_o)4Y*+6~MatV)rWvPk16eyxN6U zgQp7KUSC3ezm61*|NK1J0^Dzx-wtee3Q5;B#qt((<1}mNah?VCI1+sIki~F0FcTu0 zN2jiXt3mfVzXdfYxU+Aug~WYCxlEs*V=bV<6*c)eLlcV#}`((ea)`NRI zLMt+<@nc$V%zMh8p^x+~J}>4POj%1y*c<-{YGt`obbe63y9zVx>|gKMGFHH?U0rn3L{C&}HDVN_HV~{*JRwWkxd3;k#bl)6Q+i-6;P|Oo~&?8^SC(H%?0Tz<&KBGy39TxY93nAIIuJ6g>4}^VQWM@KLYJ zc{fKZwejNhRE1MK=CH>_%JB7|sIvV}ey<4N7q(uUA>RYd#}`F%&AdU&%8b@kY+tbU z$cYUGobgaS9V5MaZ!0{I)d;>{mWJ9F_2T;VKOoDgg3iIxM2Na_V>raM0(^>h4he~8 z;`+N2^8F0WXo5Dr+m)%t5C%3X_dlP zgWX?X1X&(orxN>Tst z1->UA-@p}dCsxJXmB9PL)|@k#Ku_t{Rl3(ZakbBpO?CT{L6o)cg~9FD@O0(YXwzt)N+dF06t8nwzZm<{aBZcJMVnC;5I_Xpxm}?3LC?a@ov;cm&zpg7MRGp z`g1MX&>3HxU6PGUoz5n0%qju*EI~}f_<1x9J`5r8}7HrLZQG$Knx8B+AHj0Wz zPaczc5Q0IX!3;q<8gS@CUE^lIJDBk+yKJ|L53CGkmAGgUf==>-Z%!U9hJzeOEejR( zv3`Qr@0)BD{@Soqw8Ah5zi`SgxWwB8Z^|_t_g}ZcSH1@8t}N+7k80L-nI<31@p^k= z!>MAh9%OtMf-f-7AonqYOBEIvMZA|Pdx3P5h--D(+HkaZ%5TMGKYV2So;Nwc0VJ5Y z&b}sJ;HZ=8$}{doSh~|~S0|4j=6{2j z5O<07!Nxn-KOnp_)6WOh1Va~8+$)AFw#_`Bd0*hFXZyERYt*3AuDG`yPhQ}T8x0M| zC7Ph)N&OARvwpZ$*5mf+!`-MGv4dlGuLB-vJn+bip%}?T{F{t4oic~7qwsiO3kwyu~+Y> zp!S0=mLH`jQO06bHtl0D41YF!y-c7Nq*e9=wTjV@=N?PX!D|kE6tnRxyJ8?u$yZ!(#)Q6XK_bxRt$U__vKOMl_Qwo0=i;8D!4*CS#_?W) zvRXM?3Z$HMm$K#QhVsj=W^ADlQv}@>4_z8YS?Pk%ybb9fSGhFgs9rDBr;ERKURZ*6 z%on_W-|`K|Vwfx(S5f^e&Zx0k3q^w6d?N+3kq~5fyQ!VDwZPj9+~y(@Pw-26N&Drs zxfrR}B1|X<9TrY#eWQ0$70UHG zx1@XGfz3ZfM8rFhD@wK6dYLC0T|2P(X=xw0EE>t&el-YdPQTV)I$no5B=g;EGPf}= zXLa@?u}(0$NiAj>8|hN=&x*g^@AWNe@#_|a%0~K>{PPgX;J0fIQHnDCKfY%Df5ZRB z!T*c%-yJ_2H#>Az|NjbSkNY>Ad!E0={deuN$DKQF);^1W^UvD<7B}mk#lOYP+W%dg zd%U^h=KAOAv+;B7v-&K~jhk!#|Jy(Jd~@}==l`4jPxgQE|I>MA{eQ#19e*}{u7B1( zi?jY&oVCy5tbG>e+Gq8-_)qb({=cEoZOUV8bcd3EQk@~C{7!X76e=6trR1MfXZ+js zAEFfXDg1|c@?TT&KlkB3_u=;)_&R5miBo7`&6iM}uzJ`a|0pOkYT#AL-55A1o7gmI>k~+BNHE68# zgy2j8iR|%Mzz{d+o?1?>$%zKF0T-B54%VP^bv?ieE8ID+2Crkc5?tK2kEw zp`cGyX41H|Ns1x)6?AlP!BBOBIO}(R_|d}vD(YLv2i*QZW37@pd*lr{UGED?Bkoc) zjY}}-f+xIi+92g+cMQkwctBB7j6@`#Ey@W$1KHI-2;>T-8ihE(7v}(SS5EV#H5a9Olps?7 z4D_EKpbD-_0RO(Duwmp3;GV58Ksx};vC@!Ku@v~MxM5mm4-8t4N_FVZgA#HbNP zIocG(G*Kj{5Mw)~TuPd>SPFH{|9;*|MavjVL`r0t8Zsmi4)b?iuK995AD*wj>;3ZF z?|s)a*#(I8?2cvF*iB11V)t@puIP1Ku4ur(%5M98ee%BW86&qq6sJ~9OmiQN2pLXy zb5vNJ^aIsz&Ly#67ON0HiYY}+vwe_$hP%wx>Hpq{DT z%Vsp{8LQ;~loj%4IUzT5uQbHFm(&f`;ZtxXqhl>5k(7}oH+DxhART6EPzbvE*zSb4ru-7Du?z$X;U--cTj&eK#R? zbs|ORXbPD22`bIj^m1o5h9!N3+L<0`jkln&KT6@{+Jg}718rwv@HPi=5tq0bYc!#sVgX%|ItmlB5Mgow z;*tYrrbA|&Rfcy>!{+%VRioGA zBo`W@L!09IFDaZ+(JRmm|;(*oedf3qhn?_%XOfjWN5?u-jXhW?|F*@e` z%p3Cbkk@I%DOKnp*>wbLTlu82_Pv~==piJ3N^s}RTKwkT$Qh(+!1zi(SEn6@A$%Iw zj94?AP5|d{JrDnc&xh%8JxX5Nh{F4U^pilJq%H?Yf2td`Scg;1+p#q7-UP06S6Ic8 zt9#LNHw-rGG^snd9V6n>;kG&$pN%ie_sunz<(S!mnb=^=i)#7Y(;~UA^I^OUjKcIZ zZPIk>MAlVrGB6oOhb~H}A;+77GEY&0S`xL+NvCi2deNfh Date: Sun, 23 May 2021 23:27:38 +0800 Subject: [PATCH 02/11] update run_dppo_clip --- dppo_clip_distributed/run_dppo_clip.py | 50 ++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/dppo_clip_distributed/run_dppo_clip.py b/dppo_clip_distributed/run_dppo_clip.py index 5e0fab0..88f1405 100644 --- a/dppo_clip_distributed/run_dppo_clip.py +++ b/dppo_clip_distributed/run_dppo_clip.py @@ -51,10 +51,11 @@ def make_infer_server_process( def make_learner_process( - num, build_net_func, traj_queue, grad_queue, should_stop_event, should_update_event, barrier, net_param_pipe): + num, build_net_func, traj_queue, grad_queue, should_stop_event, should_update_event, barrier, + net_param_pipe_list): process_list = [] - for _ in range(num): - learner = DPPOLearner(build_net_func, net_param_pipe) + for i in range(num): + learner = DPPOLearner(build_net_func, net_param_pipe_list[i]) p = mp.Process( target=learner.run, args=(traj_queue, grad_queue, should_stop_event, should_update_event, barrier)) p.daemon = True @@ -71,3 +72,46 @@ def make_global_manager( return [p], param_pipe_b +if __name__ == '__main__': + + n_sampler = 3 + name = 'DPPO_CLIP' + build_env = partial(build_env, ('CartPole-v0', 'classic_control')) + env = build_env() + observation_space, action_space = env.observation_space, env.action_space + build_network = partial(make_network, (observation_space, action_space, name)) + + traj_queue = mp.Queue(maxsize=10000) + should_stop_event = mp.Event() + should_stop_event.clear() + should_update_event = mp.Event() + should_update_event.clear() + grad_queue = mp.Queue(maxsize=10000), mp.Queue(maxsize=10000), + + n_learner = 2 + barrier = mp.Barrier(2 + n_learner) # InferServer + Updater + GlobalManager + + process_list = [] + + p_list, param_pipe_b = make_global_manager( + build_env, make_opt, n_learner + 1, traj_queue, grad_queue, should_stop_event, should_update_event, barrier) + process_list.extend(p_list) + + p_list, sample_pipe_list = make_sampler_process( + n_sampler, build_env, should_stop_event + ) + process_list.extend(p_list) + + p_list = make_infer_server_process( + build_network, build_env, sample_pipe_list, traj_queue, should_stop_event, should_update_event, barrier, + param_pipe_b[0]) + process_list.extend(p_list) + + p_list = make_learner_process( + n_learner, build_network, traj_queue, grad_queue, should_stop_event, should_update_event, barrier, + param_pipe_b[1:] + ) + process_list.extend(p_list) + + while True: + print(grad_queue[0].qsize) From 0fa819956bcb271d7b3a2cd79dc6b41f61767d1c Mon Sep 17 00:00:00 2001 From: quantumiracle <1402434478@qq.com> Date: Wed, 26 May 2021 09:58:11 +0800 Subject: [PATCH 03/11] update dppo --- dppo_clip_distributed/dppo_global_manager.py | 89 ++++++++---- dppo_clip_distributed/dppo_infer_server.py | 136 ++++++++---------- dppo_clip_distributed/dppo_learner.py | 142 ++++++++----------- dppo_clip_distributed/dppo_roles_func.py | 84 +++++++++++ dppo_clip_distributed/dppo_sampler.py | 19 ++- dppo_clip_distributed/run_dppo_clip.py | 25 ++-- dppo_clip_distributed/start.py | 59 ++++++++ 7 files changed, 355 insertions(+), 199 deletions(-) create mode 100644 dppo_clip_distributed/dppo_roles_func.py create mode 100644 dppo_clip_distributed/start.py diff --git a/dppo_clip_distributed/dppo_global_manager.py b/dppo_clip_distributed/dppo_global_manager.py index 5a9dc1e..d554a65 100644 --- a/dppo_clip_distributed/dppo_global_manager.py +++ b/dppo_clip_distributed/dppo_global_manager.py @@ -1,53 +1,90 @@ from rlzoo.common.policy_networks import StochasticPolicyNetwork from rlzoo.common.value_networks import ValueNetwork -import numpy as np from rlzoo.common.utils import * -import pickle +import queue + + +def write_log(text: str): + pass + # print('global manager: '+text) + # with open('global_manager_log.txt', 'a') as f: + # f.write(str(text) + '\n') class DPPOGlobalManager: - def __init__(self, net_builder, opt_builder, param_pipe_list, name='DPPO_CLIP'): - networks = net_builder() - optimizers_list = opt_builder() + def __init__(self, net_builder, opt_builder, name='DPPO_CLIP'): + self.net_builder, self.opt_builder = net_builder, opt_builder + self.name = name + self.critic, self.actor = None, None + self.critic_opt, self.actor_opt = None, None + + def init_components(self): + networks = self.net_builder() + optimizers_list = self.opt_builder() assert len(networks) == 2 assert len(optimizers_list) == 2 self.critic, self.actor = networks assert isinstance(self.critic, ValueNetwork) assert isinstance(self.actor, StochasticPolicyNetwork) self.critic_opt, self.actor_opt = optimizers_list - self.param_pipe_list = param_pipe_list - self.name = name - def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, + def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, param_pipe_list, max_update_num=1000, update_interval=100, save_interval=10, env_name='CartPole-v0'): + + self.init_components() + + if should_update.is_set(): + write_log('syn model') + self.send_param(param_pipe_list) + write_log('wait for barrier') + barrier.wait() + should_update.clear() + update_cnt = 0 + batch_a_grad, batch_c_grad = [], [] while update_cnt < max_update_num: - batch_a_grad, batch_c_grad = [], [] - for _ in range(update_interval): - a_grad, c_grad = grad_queue.get() + # print('\rupdate cnt {}, traj_que {}, grad_que {}'.format( + # update_cnt, traj_queue.qsize(), grad_queue[0].qsize()), end='') + print('update cnt {}, traj_que {}, grad_que {}'.format( + update_cnt, traj_queue.qsize(), grad_queue[0].qsize())) + try: + a_grad, c_grad = [q.get(timeout=1) for q in grad_queue] batch_a_grad.append(a_grad) batch_c_grad.append(c_grad) + write_log('got grad') + except queue.Empty: + continue - # update - should_update.set() - self.update_model(batch_a_grad, batch_c_grad) - self.send_param() + if len(batch_a_grad) > update_interval and len(batch_c_grad) > update_interval: + # write_log('ready to update') + # update + should_update.set() + write_log('update model') + self.update_model(batch_a_grad, batch_c_grad) + write_log('send_param') + self.send_param(param_pipe_list) - traj_queue.empty() - for q in grad_queue: q.empty() - - barrier.wait() - should_update.clear() + write_log('empty queue') + traj_queue.empty() + for q in grad_queue: + q.empty() + batch_a_grad.clear() + batch_c_grad.clear() - update_cnt += 1 - if update_cnt // save_interval == 0: - self.save_model(env_name) + write_log('wait for barrier') + barrier.wait() + should_update.clear() + barrier.reset() + update_cnt += 1 + if update_cnt // save_interval == 0: + self.save_model(env_name) should_stop.set() - def send_param(self): + def send_param(self, param_pipe_list): params = self.critic.trainable_weights + self.actor.trainable_weights - for pipe_connection in self.param_pipe_list: - pipe_connection.send(params) + params = [p.numpy() for p in params] + for i, pipe_connection in enumerate(param_pipe_list): + pipe_connection.put(params) def update_model(self, batch_a_grad, batch_c_grad): a_grad = np.mean(batch_a_grad, axis=0) diff --git a/dppo_clip_distributed/dppo_infer_server.py b/dppo_clip_distributed/dppo_infer_server.py index 46c804a..da57268 100644 --- a/dppo_clip_distributed/dppo_infer_server.py +++ b/dppo_clip_distributed/dppo_infer_server.py @@ -5,13 +5,17 @@ import pickle +def write_log(text: str): + pass + # print('infer server: '+text) + # with open('infer_server_log.txt', 'a') as f: + # f.write(str(text) + '\n') + + class DPPOInferServer: - def __init__(self, net_builder, net_param_pipe, n_step=1000, gamma=0.9): - networks = net_builder() - assert len(networks) == 2 - self.critic, self.actor = networks - assert isinstance(self.critic, ValueNetwork) - assert isinstance(self.actor, StochasticPolicyNetwork) + def __init__(self, net_builder, n_step=100, gamma=0.9): + self.critic, self.actor = None, None + self.net_builder = net_builder self.state_buffer = [] self.action_buffer = [] self.reward_buffer = [] @@ -19,7 +23,13 @@ def __init__(self, net_builder, net_param_pipe, n_step=1000, gamma=0.9): self.logp_buffer = [] self.gamma = gamma self.n_step = n_step - self.net_param_pipe = net_param_pipe + + def init_components(self): + networks = self.net_builder() + assert len(networks) == 2 + self.critic, self.actor = networks + assert isinstance(self.critic, ValueNetwork) + assert isinstance(self.actor, StochasticPolicyNetwork) def _cal_adv(self): dc_r = self._cal_discounted_r() @@ -42,19 +52,24 @@ def _cal_discounted_r(self): return discounted_r def _get_traj(self): - traj = [] + traj_list = [] for element in [self.state_buffer, self.action_buffer, self.reward_buffer, self.done_buffer, self._cal_adv(), self.logp_buffer]: axes = list(range(len(np.shape(element)))) axes[0], axes[1] = 1, 0 - traj.append(np.transpose(element, axes)) + traj_list.append(np.transpose(element, axes)) if type(element) == list: element.clear() - return traj + traj_list = list(zip(*traj_list)) + return traj_list def inference_service(self, batch_s): - print(batch_s) + write_log('get action') + # write_log(self.actor.trainable_weights) + # write_log(batch_s) + batch_s = np.array(batch_s) batch_a = self.actor(batch_s).numpy() + write_log('get log p') batch_log_p = self.actor.policy_dist.get_param() return batch_a, batch_log_p @@ -66,32 +81,50 @@ def collect_data(self, s, a, r, d, log_p): self.logp_buffer.append(log_p) def upload_data(self, que): - traj_data = self._get_traj() - que.put(traj_data) - print('\rupdated, queue size: {}, current data shape: {}'.format(que.qsize(), [np.shape(i) for i in traj_data])) - - def run(self, pipe_list, traj_queue, should_stop, should_update, barrier, ): - states, rewards, dones, infos = zip(*[remote.recv() for remote in pipe_list]) + traj_list = self._get_traj() + traj = [] + for traj in traj_list: + que.put(traj) + # print('\rinfer server: updated, queue size: {}, current data shape: {}'.format(que.qsize(), [np.shape(i) for i in traj])) + write_log('\rupdated, queue size: {}, current data shape: {}'.format(que.qsize(), [np.shape(i) for i in traj])) + + def run(self, pipe_list, traj_queue, should_stop, should_update, barrier, param_que): + self.init_components() + data = [] + for i, remote_connect in enumerate(pipe_list): + write_log('recv {}'.format(i)) + data.append(remote_connect.recv()) + write_log('first recved') + states, rewards, dones, infos = zip(*data) + # states, rewards, dones, infos = zip(*[remote.recv() for remote in pipe_list]) states, rewards, dones, infos = np.stack(states), np.stack(rewards), np.stack(dones), np.stack(infos) - + write_log('before while') while not should_stop.is_set(): + write_log('into while') if should_update.is_set(): - self.update_model() + write_log('update_model') + self.update_model(param_que) + write_log('barrier.wait') barrier.wait() + write_log('befor infer') actions, log_ps = self.inference_service(states) + write_log('before send') for (remote, a) in zip(pipe_list, actions): remote.send(a) - + write_log('recv from pipe') states, rewards, dones, infos = zip(*[remote.recv() for remote in pipe_list]) states, rewards, dones, infos = np.stack(states), np.stack(rewards), np.stack(dones), np.stack(infos) self.collect_data(states, actions, rewards, dones, log_ps) - print('\rsampling, {}'.format(len(self.state_buffer)), end='') + write_log('sampling, {}'.format(len(self.state_buffer))) + # print('\rsampling, {}'.format(len(self.state_buffer)), end='') if len(self.state_buffer) >= self.n_step: self.upload_data(traj_queue) - def update_model(self): - params = self.net_param_pipe.recv() + def update_model(self, param_que): + write_log('get from param_que') + params = param_que.get() + write_log('assign param') for i, j in zip(self.critic.trainable_weights + self.actor.trainable_weights, params): i.assign(j) self.state_buffer.clear() @@ -100,60 +133,3 @@ def update_model(self): self.done_buffer.clear() self.logp_buffer.clear() - -if __name__ == '__main__': - import multiprocessing as mp - - from rlzoo.common.env_wrappers import build_env - from dppo_clip_distributed.dppo_sampler import DPPOSampler - import copy, json, pickle - from gym.spaces.box import Box - from gym.spaces.discrete import Discrete - import cloudpickle - - should_stop_event = mp.Event() - should_stop_event.clear() - - # build_sampler - nenv = 3 - - - def build_func(): - return build_env('CartPole-v0', 'classic_control') - - - pipe_list = [] - for _ in range(nenv): - sampler = DPPOSampler(build_func) - remote_a, remote_b = mp.Pipe() - p = mp.Process(target=sampler.run, args=(remote_a, should_stop_event)) - p.daemon = True # todo 守护进程的依赖关系 - p.start() - pipe_list.append(remote_b) - - traj_queue = mp.Queue(maxsize=10000) - grad_queue = mp.Queue(maxsize=10000), mp.Queue(maxsize=10000), - should_update_event = mp.Event() - should_update_event.clear() - barrier = mp.Barrier(1) # sampler + updater - - """ build networks for the algorithm """ - name = 'DPPO_CLIP' - hidden_dim = 64 - num_hidden_layer = 2 - critic = ValueNetwork(Box(0, 1, (4,)), [hidden_dim] * num_hidden_layer, name=name + '_value') - actor = StochasticPolicyNetwork(Box(0, 1, (4,)), Discrete(2), - [hidden_dim] * num_hidden_layer, - trainable=True, - name=name + '_policy') - - actor = copy.deepcopy(actor) - global_nets = critic, actor - - global_nets = cloudpickle.dumps(global_nets) - # p = mp.Process( - # target=DPPOInferServer(global_nets).run, - # args=(traj_queue, should_stop_event, should_update_event, barrier) - # ) - # p.start() - DPPOInferServer(global_nets).run(pipe_list, traj_queue, should_stop_event, should_update_event, barrier) diff --git a/dppo_clip_distributed/dppo_learner.py b/dppo_clip_distributed/dppo_learner.py index 081c303..32e8a46 100644 --- a/dppo_clip_distributed/dppo_learner.py +++ b/dppo_clip_distributed/dppo_learner.py @@ -7,28 +7,28 @@ EPS = 1e-8 # epsilon +def write_log(text: str): + pass + # print('learner: ' + text) + # with open('learner_log.txt', 'a') as f: + # f.write(str(text) + '\n') + + class DPPOLearner(object): """ PPO class """ - def __init__(self, net_builder, net_param_pipe, epsilon=0.2): - """ - :param net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization - :param optimizers_list: a list of optimizers for all networks and differentiable variables - :param state_dim: dimension of action for the environment - :param action_dim: dimension of state for the environment - :param a_bounds: a list of [min_action, max_action] action bounds for the environment - :param epsilon: clip parameter - """ - networks = net_builder() - assert len(networks) == 2 + def __init__(self, net_builder, epsilon=0.2): + self.net_builder = net_builder self.name = 'DPPO_CLIP' - self.epsilon = epsilon + self.critic, self.actor = None, None + def init_components(self): + networks = self.net_builder() + assert len(networks) == 2 self.critic, self.actor = networks - self.net_param_pipe = net_param_pipe assert isinstance(self.critic, ValueNetwork) assert isinstance(self.actor, StochasticPolicyNetwork) @@ -72,84 +72,58 @@ def c_train(self, dc_r, s): c_grad = tape.gradient(closs, self.critic.trainable_weights) return c_grad - def update_model(self): - params = self.net_param_pipe.recv() + def update_model(self, param_que_list): + params = param_que_list.get() for i, j in zip(self.critic.trainable_weights + self.actor.trainable_weights, params): i.assign(j) - def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, + def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, param_que_list, batch_length=10, a_update_steps=1, c_update_steps=1): # todo a, c update step # todo max episode + self.init_components() a_grad_queue, c_grad_queue = grad_queue + batch_data = batch_s, batch_a, batch_r, batch_d, batch_adv, batch_logp = [], [], [], [], [], [] while not should_stop.is_set(): + write_log('grad_queue size: {}'.format(grad_queue[0].qsize())) if should_update.is_set(): - self.update_model() + write_log('update_model') + self.update_model(param_que_list) + write_log('barrier.wait') barrier.wait() - batch_data = batch_s, batch_a, batch_r, batch_d, batch_adv, batch_logp = [], [], [], [], [], [] - for _ in range(batch_length): - b_s, b_a, b_r, b_d, b_adv, b_logp = traj_queue.get() - batch_s.extend(b_s) - batch_a.extend(b_a) - batch_r.extend(b_r) - batch_d.extend(b_d) - batch_adv.extend(b_adv) - batch_logp.extend(b_logp) - pass - for s, a, r, d, adv, logp in zip(*batch_data): - s, a, r, d, adv, logp = np.vstack(s), np.vstack(a), np.vstack(r), np.vstack(d), \ - np.vstack(adv), np.vstack(logp) - s, a, r = np.array(s), np.array(a, np.float32), np.array(r, np.float32), - adv, logp = np.array(adv, np.float32), np.array(logp, np.float32), - - # update actor - for _ in range(a_update_steps): - a_grad_queue.put(self.a_train(s, a, adv, logp)) # todo 这里待优化 - - # update critic - for _ in range(c_update_steps): - c_grad_queue.put(self.c_train(r, s)) # todo 这里待优化 - - -if __name__ == '__main__': - import multiprocessing as mp - import cloudpickle - from rlzoo.common.env_wrappers import build_env - import copy, json, pickle - from gym.spaces.box import Box - from gym.spaces.discrete import Discrete - - traj_queue = mp.Queue(maxsize=10000) - grad_queue = mp.Queue(maxsize=10000), queue.Queue(maxsize=10000), - should_stop_event = mp.Event() - should_stop_event.clear() - should_update_event = mp.Event() - should_update_event.clear() - barrier = mp.Barrier(2) # sampler + updater - - """ build networks for the algorithm """ - name = 'DPPO_CLIP' - hidden_dim = 64 - num_hidden_layer = 2 - critic = ValueNetwork(Box(0, 1, (4,)), [hidden_dim] * num_hidden_layer, name=name + '_value') - actor = StochasticPolicyNetwork(Box(0, 1, (4,)), Discrete(2), - [hidden_dim] * num_hidden_layer, - trainable=True, - name=name + '_policy') - - actor = copy.deepcopy(actor) - global_nets = critic, actor - - with open('queue_data.json', 'rb') as file: - queue_data = pickle.load(file) - for data in queue_data: - traj_queue.put(data) - print(traj_queue.qsize()) - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - dcu = DPPOLearner(global_nets, traj_queue, grad_queue, should_stop_event, should_update_event, barrier) - global_nets = cloudpickle.dumps(global_nets) - p = mp.Process(target=dcu.run, args=()) - p.daemon = True - p.start() + for d in batch_data: + d.clear() + write_log('get traj_queue {}'.format(traj_queue.qsize())) + try: + b_s, b_a, b_r, b_d, b_adv, b_logp = traj_queue.get(timeout=0.5) + batch_s.append(b_s) + batch_a.append(b_a) + batch_r.append(b_r) + batch_d.append(b_d) + batch_adv.append(b_adv) + batch_logp.append(b_logp) + except queue.Empty: + continue + if len(batch_s) >= batch_length: + write_log('batch data collected {}'.format([np.shape(i) for i in batch_data])) + for s, a, r, d, adv, logp in zip(*batch_data): + s, a, r, d, adv, logp = np.vstack(s), np.vstack(a), np.vstack(r), np.vstack(d), \ + np.vstack(adv), np.vstack(logp) + s, a, r = np.array(s), np.array(a, np.float32), np.array(r, np.float32), + adv, logp = np.array(adv, np.float32), np.array(logp, np.float32), + + # write_log('update actor') + # update actor + for _ in range(a_update_steps): + a_grad_queue.put(self.a_train(s, a, adv, logp)) # todo 这里待优化 + # write_log('put a_grad_queue') + + # write_log('update critic') + # update critic + for _ in range(c_update_steps): + c_grad_queue.put(self.c_train(r, s)) # todo 这里待优化 + # write_log('put c_update_steps') + + for d in batch_data: + d.clear() + diff --git a/dppo_clip_distributed/dppo_roles_func.py b/dppo_clip_distributed/dppo_roles_func.py new file mode 100644 index 0000000..7900a45 --- /dev/null +++ b/dppo_clip_distributed/dppo_roles_func.py @@ -0,0 +1,84 @@ +import multiprocessing as mp +from functools import partial + + +def build_network(observation_space, action_space, name='DPPO_CLIP'): + """ build networks for the algorithm """ + from rlzoo.common.policy_networks import StochasticPolicyNetwork + from rlzoo.common.value_networks import ValueNetwork + + hidden_dim = 64 + num_hidden_layer = 2 + critic = ValueNetwork(observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') + + actor = StochasticPolicyNetwork(observation_space, action_space, + [hidden_dim] * num_hidden_layer, + trainable=True, + name=name + '_policy') + return critic, actor + + +def build_opt(actor_lr=1e-4, critic_lr=2e-4): + import tensorflow as tf + return [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + + +def make_sampler_process(num, create_env_func, should_stop): + from dppo_clip_distributed.dppo_sampler import DPPOSampler + process_list = [] + sample_pipe_list = [] + for _ in range(num): + sampler = DPPOSampler(create_env_func) + pipe_a, pipe_b = mp.Pipe() + p = mp.Process(target=sampler.run, args=(pipe_a, should_stop)) + p.daemon = True + process_list.append(p) + sample_pipe_list.append(pipe_b) + return process_list, sample_pipe_list + + +def make_infer_server_process( + build_net_func, sample_pipe_list, traj_queue, should_stop, should_update, barrier, + net_param_pipe): + from dppo_clip_distributed.dppo_infer_server import DPPOInferServer + build_net_func = partial(build_net_func, 'DPPO_CLIP_INFER_SERVER') + + infer_server = DPPOInferServer(build_net_func) + p = mp.Process( + target=infer_server.run, + args=(sample_pipe_list, traj_queue, should_stop, should_update, barrier, net_param_pipe)) + p.daemon = True + return [p] + + +def make_learner_process( + num, build_net_func, traj_queue, grad_queue, should_stop_event, should_update_event, barrier, + param_que_list): + process_list = [] + l = len(str(num)) + + from dppo_clip_distributed.dppo_learner import DPPOLearner + + for i in range(num): + net_func = partial(build_net_func, 'DPPO_CLIP_LEARNER_{}'.format(str(i).zfill(l))) + learner = DPPOLearner(net_func) + p = mp.Process( + target=learner.run, + args=(traj_queue, grad_queue, should_stop_event, should_update_event, barrier, param_que_list[i])) + p.daemon = True + process_list.append(p) + return process_list + + +def make_global_manager( + build_net_func, build_opt_func, traj_queue, grad_queue, should_stop, should_update, barrier, param_que_list): + from dppo_clip_distributed.dppo_global_manager import DPPOGlobalManager + build_net_func = partial(build_net_func, 'DPPO_CLIP_GLOBAL') + global_manager = DPPOGlobalManager(build_net_func, build_opt_func) + + p = mp.Process( + target=global_manager.run, + args=(traj_queue, grad_queue, should_stop, should_update, barrier, param_que_list)) + p.daemon = True + return [p] + diff --git a/dppo_clip_distributed/dppo_sampler.py b/dppo_clip_distributed/dppo_sampler.py index 3cd7076..65ea98e 100644 --- a/dppo_clip_distributed/dppo_sampler.py +++ b/dppo_clip_distributed/dppo_sampler.py @@ -1,15 +1,30 @@ import multiprocessing as mp +def write_log(text: str): + pass + # with open('sampler_log.txt', 'a') as f: + # f.write(text+'\n') + + class DPPOSampler: def __init__(self, create_env_func): - self.env = create_env_func() + self.env_builder = create_env_func + self.env = None + + def init_components(self): + self.env = self.env_builder() def run(self, pipe, should_stop: mp.Event): + self.init_components() + write_log('---------------' * 10) state = self.env.reset() done, reward, _ = True, 0, {} + write_log('going into while') while not should_stop.is_set(): + write_log('sending data') pipe.send((state, reward, done, _)) + write_log('recving data') action = pipe.recv() state, reward, done, _ = self.env.step(action) if done: @@ -20,9 +35,11 @@ def run(self, pipe, should_stop: mp.Event): from rlzoo.common.env_wrappers import build_env import multiprocessing as mp + def build_func(): return build_env('CartPole-v0', 'classic_control') + remote_a, remote_b = mp.Pipe() should_stop = mp.Event() should_stop.clear() diff --git a/dppo_clip_distributed/run_dppo_clip.py b/dppo_clip_distributed/run_dppo_clip.py index 88f1405..2c175d7 100644 --- a/dppo_clip_distributed/run_dppo_clip.py +++ b/dppo_clip_distributed/run_dppo_clip.py @@ -10,7 +10,7 @@ from functools import partial -def make_network(observation_space, action_space, name='DPPO_CLIP'): +def build_network(observation_space, action_space, name='DPPO_CLIP'): """ build networks for the algorithm """ hidden_dim = 64 num_hidden_layer = 2 @@ -23,7 +23,7 @@ def make_network(observation_space, action_space, name='DPPO_CLIP'): return critic, actor -def make_opt(actor_lr=1e-4, critic_lr=2e-4): +def build_opt(actor_lr=1e-4, critic_lr=2e-4): return [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] @@ -43,19 +43,22 @@ def make_sampler_process(num, create_env_func, should_stop): def make_infer_server_process( build_net_func, create_env_func, sample_pipe_list, traj_queue, should_stop, should_update, barrier, net_param_pipe): + build_net_func = partial(build_net_func, 'DPPO_CLIP_INFER_SERVER') infer_server = DPPOInferServer(build_net_func, net_param_pipe) p = mp.Process(target=infer_server.run, args=(create_env_func, sample_pipe_list, traj_queue, should_stop, should_update, barrier)) p.daemon = True - return [infer_server] + return [p] def make_learner_process( num, build_net_func, traj_queue, grad_queue, should_stop_event, should_update_event, barrier, net_param_pipe_list): process_list = [] + l = len(str(num)) for i in range(num): - learner = DPPOLearner(build_net_func, net_param_pipe_list[i]) + net_func = partial(build_net_func, 'DPPO_CLIP_LEARNER_{}'.format(str(i).zfill(l))) + learner = DPPOLearner(net_func, net_param_pipe_list[i]) p = mp.Process( target=learner.run, args=(traj_queue, grad_queue, should_stop_event, should_update_event, barrier)) p.daemon = True @@ -66,6 +69,7 @@ def make_learner_process( def make_global_manager( build_net_func, build_opt_func, n_nets, traj_queue, grad_queue, should_stop, should_update, barrier, ): param_pipe_a, param_pipe_b = zip(*[mp.Pipe() for _ in range(n_nets)]) + build_net_func = partial(build_net_func, 'DPPO_CLIP_GLOBAL') global_manager = DPPOGlobalManager(build_net_func, build_opt_func, param_pipe_a) p = mp.Process(target=global_manager.run, args=(traj_queue, grad_queue, should_stop, should_update, barrier,)) p.daemon = True @@ -76,10 +80,10 @@ def make_global_manager( n_sampler = 3 name = 'DPPO_CLIP' - build_env = partial(build_env, ('CartPole-v0', 'classic_control')) + build_env = partial(build_env, 'CartPole-v0', 'classic_control') env = build_env() observation_space, action_space = env.observation_space, env.action_space - build_network = partial(make_network, (observation_space, action_space, name)) + build_network = partial(build_network, observation_space, action_space) traj_queue = mp.Queue(maxsize=10000) should_stop_event = mp.Event() @@ -94,7 +98,8 @@ def make_global_manager( process_list = [] p_list, param_pipe_b = make_global_manager( - build_env, make_opt, n_learner + 1, traj_queue, grad_queue, should_stop_event, should_update_event, barrier) + build_network, build_opt, n_learner + 1, traj_queue, grad_queue, should_stop_event, should_update_event, + barrier) process_list.extend(p_list) p_list, sample_pipe_list = make_sampler_process( @@ -113,5 +118,9 @@ def make_global_manager( ) process_list.extend(p_list) + for p in process_list: + p.start() + while True: - print(grad_queue[0].qsize) + print('traj_queue', traj_queue.qsize()) + print('grad_queue', grad_queue[0].qsize()) diff --git a/dppo_clip_distributed/start.py b/dppo_clip_distributed/start.py new file mode 100644 index 0000000..837a747 --- /dev/null +++ b/dppo_clip_distributed/start.py @@ -0,0 +1,59 @@ +from dppo_clip_distributed.dppo_roles_func import * +from rlzoo.common.env_wrappers import build_env + +if __name__ == '__main__': + + n_sampler = 3 + name = 'DPPO_CLIP' + build_env = partial(build_env, 'CartPole-v0', 'classic_control') + env = build_env() + observation_space, action_space = env.observation_space, env.action_space + build_network = partial(build_network, observation_space, action_space) + + traj_queue = mp.Queue(maxsize=10000) + should_stop_event = mp.Event() + should_stop_event.clear() + should_update_event = mp.Event() + should_update_event.set() + grad_queue = mp.Queue(maxsize=10000), mp.Queue(maxsize=10000), + + n_learner = 2 + update_barrier = mp.Barrier(2 + n_learner) # InferServer + Learner + GlobalManager + param_que_list = [mp.Queue() for _ in range(1 + n_learner)] # InferServer + Learner + + process_list = [] + + p_list, sample_pipe_list = make_sampler_process( + n_sampler, build_env, should_stop_event + ) + process_list.extend(p_list) + + p_list = make_infer_server_process( + build_network, sample_pipe_list, traj_queue, should_stop_event, should_update_event, update_barrier, + param_que_list[0]) + process_list.extend(p_list) + + p_list = make_learner_process( + n_learner, build_network, traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, + param_que_list[1:]) + process_list.extend(p_list) + + if True: + for p in process_list: + p.start() + from dppo_clip_distributed.dppo_global_manager import DPPOGlobalManager + build_net_func = partial(build_network, 'DPPO_CLIP_GLOBAL') + global_manager = DPPOGlobalManager(build_net_func, build_opt) + + global_manager.run(traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, param_que_list) + else: + import time + p_list = make_global_manager( + build_network, build_opt, traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, + param_que_list) + process_list.extend(p_list) + for p in process_list: + p.start() + while True: + print('traj_queue {} grad_queue {}'.format(traj_queue.qsize(), grad_queue[0].qsize())) + time.sleep(1) From 2cb1ab59c2fb3d3a13589e64e9038475b274267d Mon Sep 17 00:00:00 2001 From: gqc666 <251843801@qq.com> Date: Thu, 10 Jun 2021 13:52:29 +0100 Subject: [PATCH 04/11] Update Kungfu distribution support --- .../Kungfu_dppo/kungfu_dppo_learner.py | 141 ++++++++++++++++++ .../Kungfu_dppo/kungfu_start.py | 82 ++++++++++ 2 files changed, 223 insertions(+) create mode 100755 dppo_clip_distributed/Kungfu_dppo/kungfu_dppo_learner.py create mode 100755 dppo_clip_distributed/Kungfu_dppo/kungfu_start.py diff --git a/dppo_clip_distributed/Kungfu_dppo/kungfu_dppo_learner.py b/dppo_clip_distributed/Kungfu_dppo/kungfu_dppo_learner.py new file mode 100755 index 0000000..6b917be --- /dev/null +++ b/dppo_clip_distributed/Kungfu_dppo/kungfu_dppo_learner.py @@ -0,0 +1,141 @@ +import queue +from rlzoo.common.utils import * +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * + +EPS = 1e-8 # epsilon + +def write_log(text: str): + pass + # print('learner: ' + text) + # with open('learner_log.txt', 'a') as f: + # f.write(str(text) + '\n') + + +class DPPOLearner(object): + """ + PPO class + """ + + def __init__(self, net_builder, epsilon=0.2): + self.net_builder = net_builder + self.name = 'DPPO_CLIP' + self.epsilon = epsilon + self.critic, self.actor = None, None + + def init_components(self): + networks = self.net_builder() + assert len(networks) == 2 + self.critic, self.actor = networks + + assert isinstance(self.critic, ValueNetwork) + assert isinstance(self.actor, StochasticPolicyNetwork) + + def a_train(self, s, a, adv, oldpi_prob): + """ + Update policy network + + :param s: state + :param a: act + :param adv: advantage + :param oldpi_prob: old pi probability of a in s + + :return: + """ + from kungfu.tensorflow.ops import all_reduce + with tf.GradientTape() as tape: + _ = self.actor(s) + pi_prob = tf.exp(self.actor.policy_dist.logp(a)) + ratio = pi_prob / (oldpi_prob + EPS) + + surr = ratio * adv + aloss = -tf.reduce_mean( + tf.minimum(surr, tf.clip_by_value(ratio, 1. - self.epsilon, 1. + self.epsilon) * adv)) + + a_gard = tape.gradient(aloss, self.actor.trainable_weights) + for i in range(len(a_gard)): + a_gard[i] = all_reduce(a_gard[i],op="sum") + for i in range(len(a_gard)): + a_gard[i] = a_gard[i]/2 + write_log("Kungfu learner allreduce and average") + + return a_gard + + def c_train(self, dc_r, s): + """ + Update actor network + + :param dc_r: cumulative reward + :param s: state + + :return: None + """ + from kungfu.tensorflow.ops import all_reduce + dc_r = np.array(dc_r, dtype=np.float32) + with tf.GradientTape() as tape: + v = self.critic(s) + advantage = dc_r - v + closs = tf.reduce_mean(tf.square(advantage)) + #c_grad = all_reduce(tape.gradient(closs, self.critic.trainable_weights)) + c_grad = tape.gradient(closs, self.critic.trainable_weights) + for i in range(len(c_grad)): + c_grad[i] = (all_reduce(c_grad[i],op="sum")) + for i in range(len(c_grad)): + c_grad[i] = c_grad[i]/2 + write_log("Kungfu learner allreduce and average") + return c_grad + + def update_model(self, param_que_list): + params = param_que_list.get() + for i, j in zip(self.critic.trainable_weights + self.actor.trainable_weights, params): + i.assign(j) + + def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, param_que_list, + batch_length=10, a_update_steps=1, c_update_steps=1): # todo a, c update step + # todo max episode + self.init_components() + a_grad_queue, c_grad_queue = grad_queue + batch_data = batch_s, batch_a, batch_r, batch_d, batch_adv, batch_logp = [], [], [], [], [], [] + while not should_stop.is_set(): + #write_log('grad_queue size: {}'.format(grad_queue[0].qsize())) + if should_update.is_set(): + write_log('update_model') + self.update_model(param_que_list) + write_log('barrier.wait') + barrier.wait() + for d in batch_data: + d.clear() + #write_log('get traj_queue {}'.format(traj_queue.qsize())) + try: + b_s, b_a, b_r, b_d, b_adv, b_logp = traj_queue.get(timeout=0.5) + batch_s.append(b_s) + batch_a.append(b_a) + batch_r.append(b_r) + batch_d.append(b_d) + batch_adv.append(b_adv) + batch_logp.append(b_logp) + except queue.Empty: + continue + if len(batch_s) >= batch_length: + write_log('batch data collected {}'.format([np.shape(i) for i in batch_data])) + for s, a, r, d, adv, logp in zip(*batch_data): + s, a, r, d, adv, logp = np.vstack(s), np.vstack(a), np.vstack(r), np.vstack(d), \ + np.vstack(adv), np.vstack(logp) + s, a, r = np.array(s), np.array(a, np.float32), np.array(r, np.float32), + adv, logp = np.array(adv, np.float32), np.array(logp, np.float32), + + # write_log('update actor') + # update actor + for _ in range(a_update_steps): + a_grad_queue.put(self.a_train(s, a, adv, logp)) # todo 这里待优化 + # write_log('put a_grad_queue') + + # write_log('update critic') + # update critic + for _ in range(c_update_steps): + c_grad_queue.put(self.c_train(r, s)) # todo 这里待优化 + #c_grad_queue.append(self.c_train(r, s)) + # write_log('put c_update_steps') + + for d in batch_data: + d.clear() \ No newline at end of file diff --git a/dppo_clip_distributed/Kungfu_dppo/kungfu_start.py b/dppo_clip_distributed/Kungfu_dppo/kungfu_start.py new file mode 100755 index 0000000..95f8948 --- /dev/null +++ b/dppo_clip_distributed/Kungfu_dppo/kungfu_start.py @@ -0,0 +1,82 @@ +from dppo_clip_distributed.dppo_roles_func import * +from rlzoo.common.env_wrappers import build_env +import tensorflow as tf +from kungfu.cmd import launch_multiprocess +from tensorflow.python.util import deprecation + +deprecation._PRINT_DEPRECATION_WARNINGS = False + + +'''Design multiprocessing of learners''' +def learner_run(rank): + from dppo_clip_distributed.dppo_learner import DPPOLearner + l = len(str(n_learner)) + from dppo_learner import DPPOLearner + net_func = partial(build_network, 'DPPO_CLIP_LEARNER_{}'.format(str(rank).zfill(l))) + learner = DPPOLearner(net_func) + learner.run(traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, + param_que_list[1:][rank]) + + +''' ''' + +if __name__ == '__main__': + + n_sampler = 3 + name = 'DPPO_CLIP' + build_env = partial(build_env, 'CartPole-v0', 'classic_control') + env = build_env() + observation_space, action_space = env.observation_space, env.action_space + build_network = partial(build_network, observation_space, action_space) + + traj_queue = mp.Queue(maxsize=10000) + should_stop_event = mp.Event() + should_stop_event.clear() + should_update_event = mp.Event() + should_update_event.set() + grad_queue = mp.Queue(maxsize=10000), mp.Queue(maxsize=10000), + #grad_queue = ([],[]) + + + n_learner = 2 + update_barrier = mp.Barrier(2 + n_learner) # InferServer + Learner + GlobalManager + param_que_list = [mp.Queue() for _ in range(1 + n_learner)] # InferServer + Learner + + process_list = [] + p2 = [] + p_list, sample_pipe_list = make_sampler_process( + n_sampler, build_env, should_stop_event + ) + process_list.extend(p_list) + + p_list = make_infer_server_process( + build_network, sample_pipe_list, traj_queue, should_stop_event, should_update_event, update_barrier, + param_que_list[0]) + process_list.extend(p_list) + + p_list = make_learner_process( + n_learner, build_network, traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, + param_que_list[1:]) + process_list.extend(p_list) + + + if True: + for p in process_list: + p.start() + '''Launch multiprocessing of learners''' + #launch_multiprocess(learner_run, n_learner) + from dppo_clip_distributed.dppo_global_manager import DPPOGlobalManager + build_net_func = partial(build_network, 'DPPO_CLIP_GLOBAL') + global_manager = DPPOGlobalManager(build_net_func, build_opt) + global_manager.run(traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, param_que_list) + else: + import time + p_list = make_global_manager( + build_network, build_opt, traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, + param_que_list) + process_list.extend(p_list) + for p in process_list: + p.start() + while True: + print('traj_queue {} grad_queue {}'.format(traj_queue.qsize(), grad_queue[0].qsize())) + time.sleep(1) \ No newline at end of file From 79b60e47a81447da5749359025a2a085e5b4224e Mon Sep 17 00:00:00 2001 From: quantumiracle <1402434478@qq.com> Date: Sat, 26 Jun 2021 12:39:20 +0800 Subject: [PATCH 05/11] finish distributed rlzoo code --- .../Kungfu_dppo/kungfu_dppo_learner.py | 141 - .../Kungfu_dppo/kungfu_start.py | 82 - dppo_clip_distributed/dppo_global_manager.py | 99 - dppo_clip_distributed/dppo_infer_server.py | 135 - dppo_clip_distributed/dppo_learner.py | 129 - dppo_clip_distributed/dppo_roles_func.py | 84 - dppo_clip_distributed/dppo_sampler.py | 55 - dppo_clip_distributed/queue_data.json | Bin 94690 -> 0 bytes dppo_clip_distributed/run_dppo_clip.py | 126 - dppo_clip_distributed/start.py | 59 - rlzoo/.gitignore | 8 +- rlzoo/__init__.py | 0 rlzoo/algorithms/__init__.py | 28 +- rlzoo/algorithms/a3c/__init__.py | 0 rlzoo/algorithms/a3c/a3c.py | 550 +- rlzoo/algorithms/a3c/default.py | 752 +- rlzoo/algorithms/a3c/run_a3c.py | 134 +- rlzoo/algorithms/ac/__init__.py | 0 rlzoo/algorithms/ac/ac.py | 374 +- rlzoo/algorithms/ac/default.py | 576 +- rlzoo/algorithms/ac/run_ac.py | 118 +- rlzoo/algorithms/ddpg/__init__.py | 0 rlzoo/algorithms/ddpg/ddpg.py | 548 +- rlzoo/algorithms/ddpg/default.py | 652 +- rlzoo/algorithms/ddpg/run_ddpg.py | 132 +- rlzoo/algorithms/dppo/__init__.py | 0 rlzoo/algorithms/dppo/default.py | 668 +- rlzoo/algorithms/dppo/dppo.py | 0 rlzoo/algorithms/dppo_clip/__init__.py | 0 rlzoo/algorithms/dppo_clip/dppo_clip.py | 0 rlzoo/algorithms/dppo_clip/run_dppo_clip.py | 0 .../dppo_clip_distributed}/__init__.py | 0 .../dppo_clip_distributed/dppo_clip.py | 279 + rlzoo/algorithms/dppo_penalty/__init__.py | 0 rlzoo/algorithms/dppo_penalty/dppo_penalty.py | 0 .../dppo_penalty/run_dppo_penalty.py | 0 rlzoo/algorithms/dqn/__init__.py | 0 rlzoo/algorithms/dqn/default.py | 420 +- rlzoo/algorithms/dqn/dqn.py | 496 +- rlzoo/algorithms/dqn/run_dqn.py | 164 +- rlzoo/algorithms/pg/__init__.py | 0 rlzoo/algorithms/pg/default.py | 518 +- rlzoo/algorithms/pg/pg.py | 434 +- rlzoo/algorithms/pg/run_pg.py | 0 rlzoo/algorithms/ppo/__init__.py | 0 rlzoo/algorithms/ppo/default.py | 644 +- rlzoo/algorithms/ppo/ppo.py | 0 rlzoo/algorithms/ppo_clip/__init__.py | 0 rlzoo/algorithms/ppo_clip/ppo_clip.py | 0 rlzoo/algorithms/ppo_clip/run_ppo_clip.py | 118 +- rlzoo/algorithms/ppo_penalty/__init__.py | 0 rlzoo/algorithms/ppo_penalty/ppo_penalty.py | 0 .../algorithms/ppo_penalty/run_ppo_penalty.py | 120 +- rlzoo/algorithms/sac/__init__.py | 0 rlzoo/algorithms/sac/default.py | 728 +- rlzoo/algorithms/sac/run_sac.py | 164 +- rlzoo/algorithms/sac/sac.py | 572 +- rlzoo/algorithms/td3/__init__.py | 0 rlzoo/algorithms/td3/default.py | 742 +- rlzoo/algorithms/td3/run_td3.py | 166 +- rlzoo/algorithms/td3/td3.py | 628 +- rlzoo/algorithms/trpo/__init__.py | 0 rlzoo/algorithms/trpo/default.py | 660 +- rlzoo/algorithms/trpo/run_trpo.py | 116 +- rlzoo/algorithms/trpo/trpo.py | 0 rlzoo/common/__init__.py | 0 rlzoo/common/basic_nets.py | 298 +- rlzoo/common/buffer.py | 612 +- rlzoo/common/build_rlbench_env.py | 324 +- rlzoo/common/distributions.py | 414 +- rlzoo/common/env_list.py | 1804 +-- rlzoo/common/env_wrappers.py | 1274 +- rlzoo/common/math_utils.py | 30 +- rlzoo/common/policy_networks.py | 707 +- rlzoo/common/utils.py | 290 +- rlzoo/common/value_networks.py | 772 +- rlzoo/distributed/__init__.py | 0 rlzoo/distributed/dis_components.py | 128 + rlzoo/distributed/run_dis_train.sh | 46 + rlzoo/distributed/start_dis_role.py | 206 + rlzoo/distributed/training_components.py | 62 + rlzoo/interactive/.gitignore | 6 +- rlzoo/interactive/common.py | 248 +- rlzoo/interactive/components.py | 926 +- rlzoo/interactive/main.ipynb | 12112 ++++++++-------- rlzoo/run_rlzoo.py | 120 +- 86 files changed, 16317 insertions(+), 16481 deletions(-) delete mode 100755 dppo_clip_distributed/Kungfu_dppo/kungfu_dppo_learner.py delete mode 100755 dppo_clip_distributed/Kungfu_dppo/kungfu_start.py delete mode 100644 dppo_clip_distributed/dppo_global_manager.py delete mode 100644 dppo_clip_distributed/dppo_infer_server.py delete mode 100644 dppo_clip_distributed/dppo_learner.py delete mode 100644 dppo_clip_distributed/dppo_roles_func.py delete mode 100644 dppo_clip_distributed/dppo_sampler.py delete mode 100644 dppo_clip_distributed/queue_data.json delete mode 100644 dppo_clip_distributed/run_dppo_clip.py delete mode 100644 dppo_clip_distributed/start.py mode change 100644 => 100755 rlzoo/.gitignore mode change 100644 => 100755 rlzoo/__init__.py mode change 100644 => 100755 rlzoo/algorithms/__init__.py mode change 100644 => 100755 rlzoo/algorithms/a3c/__init__.py mode change 100644 => 100755 rlzoo/algorithms/a3c/a3c.py mode change 100644 => 100755 rlzoo/algorithms/a3c/default.py mode change 100644 => 100755 rlzoo/algorithms/a3c/run_a3c.py mode change 100644 => 100755 rlzoo/algorithms/ac/__init__.py mode change 100644 => 100755 rlzoo/algorithms/ac/ac.py mode change 100644 => 100755 rlzoo/algorithms/ac/default.py mode change 100644 => 100755 rlzoo/algorithms/ac/run_ac.py mode change 100644 => 100755 rlzoo/algorithms/ddpg/__init__.py mode change 100644 => 100755 rlzoo/algorithms/ddpg/ddpg.py mode change 100644 => 100755 rlzoo/algorithms/ddpg/default.py mode change 100644 => 100755 rlzoo/algorithms/ddpg/run_ddpg.py mode change 100644 => 100755 rlzoo/algorithms/dppo/__init__.py mode change 100644 => 100755 rlzoo/algorithms/dppo/default.py mode change 100644 => 100755 rlzoo/algorithms/dppo/dppo.py mode change 100644 => 100755 rlzoo/algorithms/dppo_clip/__init__.py mode change 100644 => 100755 rlzoo/algorithms/dppo_clip/dppo_clip.py mode change 100644 => 100755 rlzoo/algorithms/dppo_clip/run_dppo_clip.py rename {dppo_clip_distributed => rlzoo/algorithms/dppo_clip_distributed}/__init__.py (100%) mode change 100644 => 100755 create mode 100755 rlzoo/algorithms/dppo_clip_distributed/dppo_clip.py mode change 100644 => 100755 rlzoo/algorithms/dppo_penalty/__init__.py mode change 100644 => 100755 rlzoo/algorithms/dppo_penalty/dppo_penalty.py mode change 100644 => 100755 rlzoo/algorithms/dppo_penalty/run_dppo_penalty.py mode change 100644 => 100755 rlzoo/algorithms/dqn/__init__.py mode change 100644 => 100755 rlzoo/algorithms/dqn/default.py mode change 100644 => 100755 rlzoo/algorithms/dqn/dqn.py mode change 100644 => 100755 rlzoo/algorithms/dqn/run_dqn.py mode change 100644 => 100755 rlzoo/algorithms/pg/__init__.py mode change 100644 => 100755 rlzoo/algorithms/pg/default.py mode change 100644 => 100755 rlzoo/algorithms/pg/pg.py mode change 100644 => 100755 rlzoo/algorithms/pg/run_pg.py mode change 100644 => 100755 rlzoo/algorithms/ppo/__init__.py mode change 100644 => 100755 rlzoo/algorithms/ppo/default.py mode change 100644 => 100755 rlzoo/algorithms/ppo/ppo.py mode change 100644 => 100755 rlzoo/algorithms/ppo_clip/__init__.py mode change 100644 => 100755 rlzoo/algorithms/ppo_clip/ppo_clip.py mode change 100644 => 100755 rlzoo/algorithms/ppo_clip/run_ppo_clip.py mode change 100644 => 100755 rlzoo/algorithms/ppo_penalty/__init__.py mode change 100644 => 100755 rlzoo/algorithms/ppo_penalty/ppo_penalty.py mode change 100644 => 100755 rlzoo/algorithms/ppo_penalty/run_ppo_penalty.py mode change 100644 => 100755 rlzoo/algorithms/sac/__init__.py mode change 100644 => 100755 rlzoo/algorithms/sac/default.py mode change 100644 => 100755 rlzoo/algorithms/sac/run_sac.py mode change 100644 => 100755 rlzoo/algorithms/sac/sac.py mode change 100644 => 100755 rlzoo/algorithms/td3/__init__.py mode change 100644 => 100755 rlzoo/algorithms/td3/default.py mode change 100644 => 100755 rlzoo/algorithms/td3/run_td3.py mode change 100644 => 100755 rlzoo/algorithms/td3/td3.py mode change 100644 => 100755 rlzoo/algorithms/trpo/__init__.py mode change 100644 => 100755 rlzoo/algorithms/trpo/default.py mode change 100644 => 100755 rlzoo/algorithms/trpo/run_trpo.py mode change 100644 => 100755 rlzoo/algorithms/trpo/trpo.py mode change 100644 => 100755 rlzoo/common/__init__.py mode change 100644 => 100755 rlzoo/common/basic_nets.py mode change 100644 => 100755 rlzoo/common/buffer.py mode change 100644 => 100755 rlzoo/common/build_rlbench_env.py mode change 100644 => 100755 rlzoo/common/distributions.py mode change 100644 => 100755 rlzoo/common/env_list.py mode change 100644 => 100755 rlzoo/common/env_wrappers.py mode change 100644 => 100755 rlzoo/common/math_utils.py mode change 100644 => 100755 rlzoo/common/policy_networks.py mode change 100644 => 100755 rlzoo/common/utils.py mode change 100644 => 100755 rlzoo/common/value_networks.py create mode 100755 rlzoo/distributed/__init__.py create mode 100755 rlzoo/distributed/dis_components.py create mode 100755 rlzoo/distributed/run_dis_train.sh create mode 100755 rlzoo/distributed/start_dis_role.py create mode 100755 rlzoo/distributed/training_components.py mode change 100644 => 100755 rlzoo/interactive/.gitignore mode change 100644 => 100755 rlzoo/interactive/common.py mode change 100644 => 100755 rlzoo/interactive/components.py mode change 100644 => 100755 rlzoo/interactive/main.ipynb mode change 100644 => 100755 rlzoo/run_rlzoo.py diff --git a/dppo_clip_distributed/Kungfu_dppo/kungfu_dppo_learner.py b/dppo_clip_distributed/Kungfu_dppo/kungfu_dppo_learner.py deleted file mode 100755 index 6b917be..0000000 --- a/dppo_clip_distributed/Kungfu_dppo/kungfu_dppo_learner.py +++ /dev/null @@ -1,141 +0,0 @@ -import queue -from rlzoo.common.utils import * -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * - -EPS = 1e-8 # epsilon - -def write_log(text: str): - pass - # print('learner: ' + text) - # with open('learner_log.txt', 'a') as f: - # f.write(str(text) + '\n') - - -class DPPOLearner(object): - """ - PPO class - """ - - def __init__(self, net_builder, epsilon=0.2): - self.net_builder = net_builder - self.name = 'DPPO_CLIP' - self.epsilon = epsilon - self.critic, self.actor = None, None - - def init_components(self): - networks = self.net_builder() - assert len(networks) == 2 - self.critic, self.actor = networks - - assert isinstance(self.critic, ValueNetwork) - assert isinstance(self.actor, StochasticPolicyNetwork) - - def a_train(self, s, a, adv, oldpi_prob): - """ - Update policy network - - :param s: state - :param a: act - :param adv: advantage - :param oldpi_prob: old pi probability of a in s - - :return: - """ - from kungfu.tensorflow.ops import all_reduce - with tf.GradientTape() as tape: - _ = self.actor(s) - pi_prob = tf.exp(self.actor.policy_dist.logp(a)) - ratio = pi_prob / (oldpi_prob + EPS) - - surr = ratio * adv - aloss = -tf.reduce_mean( - tf.minimum(surr, tf.clip_by_value(ratio, 1. - self.epsilon, 1. + self.epsilon) * adv)) - - a_gard = tape.gradient(aloss, self.actor.trainable_weights) - for i in range(len(a_gard)): - a_gard[i] = all_reduce(a_gard[i],op="sum") - for i in range(len(a_gard)): - a_gard[i] = a_gard[i]/2 - write_log("Kungfu learner allreduce and average") - - return a_gard - - def c_train(self, dc_r, s): - """ - Update actor network - - :param dc_r: cumulative reward - :param s: state - - :return: None - """ - from kungfu.tensorflow.ops import all_reduce - dc_r = np.array(dc_r, dtype=np.float32) - with tf.GradientTape() as tape: - v = self.critic(s) - advantage = dc_r - v - closs = tf.reduce_mean(tf.square(advantage)) - #c_grad = all_reduce(tape.gradient(closs, self.critic.trainable_weights)) - c_grad = tape.gradient(closs, self.critic.trainable_weights) - for i in range(len(c_grad)): - c_grad[i] = (all_reduce(c_grad[i],op="sum")) - for i in range(len(c_grad)): - c_grad[i] = c_grad[i]/2 - write_log("Kungfu learner allreduce and average") - return c_grad - - def update_model(self, param_que_list): - params = param_que_list.get() - for i, j in zip(self.critic.trainable_weights + self.actor.trainable_weights, params): - i.assign(j) - - def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, param_que_list, - batch_length=10, a_update_steps=1, c_update_steps=1): # todo a, c update step - # todo max episode - self.init_components() - a_grad_queue, c_grad_queue = grad_queue - batch_data = batch_s, batch_a, batch_r, batch_d, batch_adv, batch_logp = [], [], [], [], [], [] - while not should_stop.is_set(): - #write_log('grad_queue size: {}'.format(grad_queue[0].qsize())) - if should_update.is_set(): - write_log('update_model') - self.update_model(param_que_list) - write_log('barrier.wait') - barrier.wait() - for d in batch_data: - d.clear() - #write_log('get traj_queue {}'.format(traj_queue.qsize())) - try: - b_s, b_a, b_r, b_d, b_adv, b_logp = traj_queue.get(timeout=0.5) - batch_s.append(b_s) - batch_a.append(b_a) - batch_r.append(b_r) - batch_d.append(b_d) - batch_adv.append(b_adv) - batch_logp.append(b_logp) - except queue.Empty: - continue - if len(batch_s) >= batch_length: - write_log('batch data collected {}'.format([np.shape(i) for i in batch_data])) - for s, a, r, d, adv, logp in zip(*batch_data): - s, a, r, d, adv, logp = np.vstack(s), np.vstack(a), np.vstack(r), np.vstack(d), \ - np.vstack(adv), np.vstack(logp) - s, a, r = np.array(s), np.array(a, np.float32), np.array(r, np.float32), - adv, logp = np.array(adv, np.float32), np.array(logp, np.float32), - - # write_log('update actor') - # update actor - for _ in range(a_update_steps): - a_grad_queue.put(self.a_train(s, a, adv, logp)) # todo 这里待优化 - # write_log('put a_grad_queue') - - # write_log('update critic') - # update critic - for _ in range(c_update_steps): - c_grad_queue.put(self.c_train(r, s)) # todo 这里待优化 - #c_grad_queue.append(self.c_train(r, s)) - # write_log('put c_update_steps') - - for d in batch_data: - d.clear() \ No newline at end of file diff --git a/dppo_clip_distributed/Kungfu_dppo/kungfu_start.py b/dppo_clip_distributed/Kungfu_dppo/kungfu_start.py deleted file mode 100755 index 95f8948..0000000 --- a/dppo_clip_distributed/Kungfu_dppo/kungfu_start.py +++ /dev/null @@ -1,82 +0,0 @@ -from dppo_clip_distributed.dppo_roles_func import * -from rlzoo.common.env_wrappers import build_env -import tensorflow as tf -from kungfu.cmd import launch_multiprocess -from tensorflow.python.util import deprecation - -deprecation._PRINT_DEPRECATION_WARNINGS = False - - -'''Design multiprocessing of learners''' -def learner_run(rank): - from dppo_clip_distributed.dppo_learner import DPPOLearner - l = len(str(n_learner)) - from dppo_learner import DPPOLearner - net_func = partial(build_network, 'DPPO_CLIP_LEARNER_{}'.format(str(rank).zfill(l))) - learner = DPPOLearner(net_func) - learner.run(traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, - param_que_list[1:][rank]) - - -''' ''' - -if __name__ == '__main__': - - n_sampler = 3 - name = 'DPPO_CLIP' - build_env = partial(build_env, 'CartPole-v0', 'classic_control') - env = build_env() - observation_space, action_space = env.observation_space, env.action_space - build_network = partial(build_network, observation_space, action_space) - - traj_queue = mp.Queue(maxsize=10000) - should_stop_event = mp.Event() - should_stop_event.clear() - should_update_event = mp.Event() - should_update_event.set() - grad_queue = mp.Queue(maxsize=10000), mp.Queue(maxsize=10000), - #grad_queue = ([],[]) - - - n_learner = 2 - update_barrier = mp.Barrier(2 + n_learner) # InferServer + Learner + GlobalManager - param_que_list = [mp.Queue() for _ in range(1 + n_learner)] # InferServer + Learner - - process_list = [] - p2 = [] - p_list, sample_pipe_list = make_sampler_process( - n_sampler, build_env, should_stop_event - ) - process_list.extend(p_list) - - p_list = make_infer_server_process( - build_network, sample_pipe_list, traj_queue, should_stop_event, should_update_event, update_barrier, - param_que_list[0]) - process_list.extend(p_list) - - p_list = make_learner_process( - n_learner, build_network, traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, - param_que_list[1:]) - process_list.extend(p_list) - - - if True: - for p in process_list: - p.start() - '''Launch multiprocessing of learners''' - #launch_multiprocess(learner_run, n_learner) - from dppo_clip_distributed.dppo_global_manager import DPPOGlobalManager - build_net_func = partial(build_network, 'DPPO_CLIP_GLOBAL') - global_manager = DPPOGlobalManager(build_net_func, build_opt) - global_manager.run(traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, param_que_list) - else: - import time - p_list = make_global_manager( - build_network, build_opt, traj_queue, grad_queue, should_stop_event, should_update_event, update_barrier, - param_que_list) - process_list.extend(p_list) - for p in process_list: - p.start() - while True: - print('traj_queue {} grad_queue {}'.format(traj_queue.qsize(), grad_queue[0].qsize())) - time.sleep(1) \ No newline at end of file diff --git a/dppo_clip_distributed/dppo_global_manager.py b/dppo_clip_distributed/dppo_global_manager.py deleted file mode 100644 index d554a65..0000000 --- a/dppo_clip_distributed/dppo_global_manager.py +++ /dev/null @@ -1,99 +0,0 @@ -from rlzoo.common.policy_networks import StochasticPolicyNetwork -from rlzoo.common.value_networks import ValueNetwork -from rlzoo.common.utils import * -import queue - - -def write_log(text: str): - pass - # print('global manager: '+text) - # with open('global_manager_log.txt', 'a') as f: - # f.write(str(text) + '\n') - - -class DPPOGlobalManager: - def __init__(self, net_builder, opt_builder, name='DPPO_CLIP'): - self.net_builder, self.opt_builder = net_builder, opt_builder - self.name = name - self.critic, self.actor = None, None - self.critic_opt, self.actor_opt = None, None - - def init_components(self): - networks = self.net_builder() - optimizers_list = self.opt_builder() - assert len(networks) == 2 - assert len(optimizers_list) == 2 - self.critic, self.actor = networks - assert isinstance(self.critic, ValueNetwork) - assert isinstance(self.actor, StochasticPolicyNetwork) - self.critic_opt, self.actor_opt = optimizers_list - - def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, param_pipe_list, - max_update_num=1000, update_interval=100, save_interval=10, env_name='CartPole-v0'): - - self.init_components() - - if should_update.is_set(): - write_log('syn model') - self.send_param(param_pipe_list) - write_log('wait for barrier') - barrier.wait() - should_update.clear() - - update_cnt = 0 - batch_a_grad, batch_c_grad = [], [] - while update_cnt < max_update_num: - # print('\rupdate cnt {}, traj_que {}, grad_que {}'.format( - # update_cnt, traj_queue.qsize(), grad_queue[0].qsize()), end='') - print('update cnt {}, traj_que {}, grad_que {}'.format( - update_cnt, traj_queue.qsize(), grad_queue[0].qsize())) - try: - a_grad, c_grad = [q.get(timeout=1) for q in grad_queue] - batch_a_grad.append(a_grad) - batch_c_grad.append(c_grad) - write_log('got grad') - except queue.Empty: - continue - - if len(batch_a_grad) > update_interval and len(batch_c_grad) > update_interval: - # write_log('ready to update') - # update - should_update.set() - write_log('update model') - self.update_model(batch_a_grad, batch_c_grad) - write_log('send_param') - self.send_param(param_pipe_list) - - write_log('empty queue') - traj_queue.empty() - for q in grad_queue: - q.empty() - batch_a_grad.clear() - batch_c_grad.clear() - - write_log('wait for barrier') - barrier.wait() - should_update.clear() - barrier.reset() - update_cnt += 1 - if update_cnt // save_interval == 0: - self.save_model(env_name) - should_stop.set() - - def send_param(self, param_pipe_list): - params = self.critic.trainable_weights + self.actor.trainable_weights - params = [p.numpy() for p in params] - for i, pipe_connection in enumerate(param_pipe_list): - pipe_connection.put(params) - - def update_model(self, batch_a_grad, batch_c_grad): - a_grad = np.mean(batch_a_grad, axis=0) - c_grad = np.mean(batch_c_grad, axis=0) - self.actor_opt.apply_gradients(zip(a_grad, self.actor.trainable_weights)) - self.critic_opt.apply_gradients(zip(c_grad, self.critic.trainable_weights)) - - def save_model(self, env_name): - save_model(self.actor, 'actor', self.name, env_name) - save_model(self.critic, 'critic', self.name, env_name) - - # todo load model diff --git a/dppo_clip_distributed/dppo_infer_server.py b/dppo_clip_distributed/dppo_infer_server.py deleted file mode 100644 index da57268..0000000 --- a/dppo_clip_distributed/dppo_infer_server.py +++ /dev/null @@ -1,135 +0,0 @@ -from rlzoo.common.policy_networks import StochasticPolicyNetwork -from rlzoo.common.value_networks import ValueNetwork -import numpy as np -import copy -import pickle - - -def write_log(text: str): - pass - # print('infer server: '+text) - # with open('infer_server_log.txt', 'a') as f: - # f.write(str(text) + '\n') - - -class DPPOInferServer: - def __init__(self, net_builder, n_step=100, gamma=0.9): - self.critic, self.actor = None, None - self.net_builder = net_builder - self.state_buffer = [] - self.action_buffer = [] - self.reward_buffer = [] - self.done_buffer = [] - self.logp_buffer = [] - self.gamma = gamma - self.n_step = n_step - - def init_components(self): - networks = self.net_builder() - assert len(networks) == 2 - self.critic, self.actor = networks - assert isinstance(self.critic, ValueNetwork) - assert isinstance(self.actor, StochasticPolicyNetwork) - - def _cal_adv(self): - dc_r = self._cal_discounted_r() - s_shape = np.shape(self.state_buffer) - s = np.reshape(self.state_buffer, [-1, s_shape[-1]]) - v = self.critic(s).numpy().reshape([-1, s_shape[1]]) - dc_r = np.array(dc_r, dtype=np.float32) - advs = dc_r - v - advs = (advs - np.mean(advs)) / (np.std(advs) + 1e-8) - return advs - - def _get_v(self, s): - return np.reshape(self.critic(s.astype(np.float32)), [-1]) - - def _cal_discounted_r(self): - discounted_r = np.zeros_like(self.reward_buffer) # compute discounted reward - v_s_ = self._get_v(self.state_buffer[-1]) * (1 - self.done_buffer[-1]) - for i in range(len(self.reward_buffer) - 1, -1, -1): - discounted_r[i] = v_s_ = self.reward_buffer[i] + (1 - self.done_buffer[i]) * self.gamma * v_s_ - return discounted_r - - def _get_traj(self): - traj_list = [] - for element in [self.state_buffer, self.action_buffer, self.reward_buffer, self.done_buffer, self._cal_adv(), - self.logp_buffer]: - axes = list(range(len(np.shape(element)))) - axes[0], axes[1] = 1, 0 - traj_list.append(np.transpose(element, axes)) - if type(element) == list: - element.clear() - traj_list = list(zip(*traj_list)) - return traj_list - - def inference_service(self, batch_s): - write_log('get action') - # write_log(self.actor.trainable_weights) - # write_log(batch_s) - batch_s = np.array(batch_s) - batch_a = self.actor(batch_s).numpy() - write_log('get log p') - batch_log_p = self.actor.policy_dist.get_param() - return batch_a, batch_log_p - - def collect_data(self, s, a, r, d, log_p): - self.state_buffer.append(s) - self.action_buffer.append(a) - self.reward_buffer.append(r) - self.done_buffer.append(d) - self.logp_buffer.append(log_p) - - def upload_data(self, que): - traj_list = self._get_traj() - traj = [] - for traj in traj_list: - que.put(traj) - # print('\rinfer server: updated, queue size: {}, current data shape: {}'.format(que.qsize(), [np.shape(i) for i in traj])) - write_log('\rupdated, queue size: {}, current data shape: {}'.format(que.qsize(), [np.shape(i) for i in traj])) - - def run(self, pipe_list, traj_queue, should_stop, should_update, barrier, param_que): - self.init_components() - data = [] - for i, remote_connect in enumerate(pipe_list): - write_log('recv {}'.format(i)) - data.append(remote_connect.recv()) - write_log('first recved') - states, rewards, dones, infos = zip(*data) - # states, rewards, dones, infos = zip(*[remote.recv() for remote in pipe_list]) - states, rewards, dones, infos = np.stack(states), np.stack(rewards), np.stack(dones), np.stack(infos) - write_log('before while') - while not should_stop.is_set(): - write_log('into while') - if should_update.is_set(): - write_log('update_model') - self.update_model(param_que) - write_log('barrier.wait') - barrier.wait() - write_log('befor infer') - actions, log_ps = self.inference_service(states) - write_log('before send') - for (remote, a) in zip(pipe_list, actions): - remote.send(a) - write_log('recv from pipe') - states, rewards, dones, infos = zip(*[remote.recv() for remote in pipe_list]) - states, rewards, dones, infos = np.stack(states), np.stack(rewards), np.stack(dones), np.stack(infos) - self.collect_data(states, actions, rewards, dones, log_ps) - - write_log('sampling, {}'.format(len(self.state_buffer))) - # print('\rsampling, {}'.format(len(self.state_buffer)), end='') - if len(self.state_buffer) >= self.n_step: - self.upload_data(traj_queue) - - def update_model(self, param_que): - write_log('get from param_que') - params = param_que.get() - write_log('assign param') - for i, j in zip(self.critic.trainable_weights + self.actor.trainable_weights, params): - i.assign(j) - self.state_buffer.clear() - self.action_buffer.clear() - self.reward_buffer.clear() - self.done_buffer.clear() - self.logp_buffer.clear() - diff --git a/dppo_clip_distributed/dppo_learner.py b/dppo_clip_distributed/dppo_learner.py deleted file mode 100644 index 32e8a46..0000000 --- a/dppo_clip_distributed/dppo_learner.py +++ /dev/null @@ -1,129 +0,0 @@ -import queue - -from rlzoo.common.utils import * -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * - -EPS = 1e-8 # epsilon - - -def write_log(text: str): - pass - # print('learner: ' + text) - # with open('learner_log.txt', 'a') as f: - # f.write(str(text) + '\n') - - -class DPPOLearner(object): - """ - PPO class - """ - - def __init__(self, net_builder, epsilon=0.2): - self.net_builder = net_builder - self.name = 'DPPO_CLIP' - self.epsilon = epsilon - self.critic, self.actor = None, None - - def init_components(self): - networks = self.net_builder() - assert len(networks) == 2 - self.critic, self.actor = networks - - assert isinstance(self.critic, ValueNetwork) - assert isinstance(self.actor, StochasticPolicyNetwork) - - def a_train(self, s, a, adv, oldpi_prob): - """ - Update policy network - - :param s: state - :param a: act - :param adv: advantage - :param oldpi_prob: old pi probability of a in s - - :return: - """ - with tf.GradientTape() as tape: - _ = self.actor(s) - pi_prob = tf.exp(self.actor.policy_dist.logp(a)) - ratio = pi_prob / (oldpi_prob + EPS) - - surr = ratio * adv - aloss = -tf.reduce_mean( - tf.minimum(surr, tf.clip_by_value(ratio, 1. - self.epsilon, 1. + self.epsilon) * adv)) - a_gard = tape.gradient(aloss, self.actor.trainable_weights) - return a_gard - - def c_train(self, dc_r, s): - """ - Update actor network - - :param dc_r: cumulative reward - :param s: state - - :return: None - """ - dc_r = np.array(dc_r, dtype=np.float32) - with tf.GradientTape() as tape: - v = self.critic(s) - advantage = dc_r - v - closs = tf.reduce_mean(tf.square(advantage)) - c_grad = tape.gradient(closs, self.critic.trainable_weights) - return c_grad - - def update_model(self, param_que_list): - params = param_que_list.get() - for i, j in zip(self.critic.trainable_weights + self.actor.trainable_weights, params): - i.assign(j) - - def run(self, traj_queue, grad_queue, should_stop, should_update, barrier, param_que_list, - batch_length=10, a_update_steps=1, c_update_steps=1): # todo a, c update step - # todo max episode - self.init_components() - a_grad_queue, c_grad_queue = grad_queue - batch_data = batch_s, batch_a, batch_r, batch_d, batch_adv, batch_logp = [], [], [], [], [], [] - - while not should_stop.is_set(): - write_log('grad_queue size: {}'.format(grad_queue[0].qsize())) - if should_update.is_set(): - write_log('update_model') - self.update_model(param_que_list) - write_log('barrier.wait') - barrier.wait() - for d in batch_data: - d.clear() - write_log('get traj_queue {}'.format(traj_queue.qsize())) - try: - b_s, b_a, b_r, b_d, b_adv, b_logp = traj_queue.get(timeout=0.5) - batch_s.append(b_s) - batch_a.append(b_a) - batch_r.append(b_r) - batch_d.append(b_d) - batch_adv.append(b_adv) - batch_logp.append(b_logp) - except queue.Empty: - continue - if len(batch_s) >= batch_length: - write_log('batch data collected {}'.format([np.shape(i) for i in batch_data])) - for s, a, r, d, adv, logp in zip(*batch_data): - s, a, r, d, adv, logp = np.vstack(s), np.vstack(a), np.vstack(r), np.vstack(d), \ - np.vstack(adv), np.vstack(logp) - s, a, r = np.array(s), np.array(a, np.float32), np.array(r, np.float32), - adv, logp = np.array(adv, np.float32), np.array(logp, np.float32), - - # write_log('update actor') - # update actor - for _ in range(a_update_steps): - a_grad_queue.put(self.a_train(s, a, adv, logp)) # todo 这里待优化 - # write_log('put a_grad_queue') - - # write_log('update critic') - # update critic - for _ in range(c_update_steps): - c_grad_queue.put(self.c_train(r, s)) # todo 这里待优化 - # write_log('put c_update_steps') - - for d in batch_data: - d.clear() - diff --git a/dppo_clip_distributed/dppo_roles_func.py b/dppo_clip_distributed/dppo_roles_func.py deleted file mode 100644 index 7900a45..0000000 --- a/dppo_clip_distributed/dppo_roles_func.py +++ /dev/null @@ -1,84 +0,0 @@ -import multiprocessing as mp -from functools import partial - - -def build_network(observation_space, action_space, name='DPPO_CLIP'): - """ build networks for the algorithm """ - from rlzoo.common.policy_networks import StochasticPolicyNetwork - from rlzoo.common.value_networks import ValueNetwork - - hidden_dim = 64 - num_hidden_layer = 2 - critic = ValueNetwork(observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') - - actor = StochasticPolicyNetwork(observation_space, action_space, - [hidden_dim] * num_hidden_layer, - trainable=True, - name=name + '_policy') - return critic, actor - - -def build_opt(actor_lr=1e-4, critic_lr=2e-4): - import tensorflow as tf - return [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - - -def make_sampler_process(num, create_env_func, should_stop): - from dppo_clip_distributed.dppo_sampler import DPPOSampler - process_list = [] - sample_pipe_list = [] - for _ in range(num): - sampler = DPPOSampler(create_env_func) - pipe_a, pipe_b = mp.Pipe() - p = mp.Process(target=sampler.run, args=(pipe_a, should_stop)) - p.daemon = True - process_list.append(p) - sample_pipe_list.append(pipe_b) - return process_list, sample_pipe_list - - -def make_infer_server_process( - build_net_func, sample_pipe_list, traj_queue, should_stop, should_update, barrier, - net_param_pipe): - from dppo_clip_distributed.dppo_infer_server import DPPOInferServer - build_net_func = partial(build_net_func, 'DPPO_CLIP_INFER_SERVER') - - infer_server = DPPOInferServer(build_net_func) - p = mp.Process( - target=infer_server.run, - args=(sample_pipe_list, traj_queue, should_stop, should_update, barrier, net_param_pipe)) - p.daemon = True - return [p] - - -def make_learner_process( - num, build_net_func, traj_queue, grad_queue, should_stop_event, should_update_event, barrier, - param_que_list): - process_list = [] - l = len(str(num)) - - from dppo_clip_distributed.dppo_learner import DPPOLearner - - for i in range(num): - net_func = partial(build_net_func, 'DPPO_CLIP_LEARNER_{}'.format(str(i).zfill(l))) - learner = DPPOLearner(net_func) - p = mp.Process( - target=learner.run, - args=(traj_queue, grad_queue, should_stop_event, should_update_event, barrier, param_que_list[i])) - p.daemon = True - process_list.append(p) - return process_list - - -def make_global_manager( - build_net_func, build_opt_func, traj_queue, grad_queue, should_stop, should_update, barrier, param_que_list): - from dppo_clip_distributed.dppo_global_manager import DPPOGlobalManager - build_net_func = partial(build_net_func, 'DPPO_CLIP_GLOBAL') - global_manager = DPPOGlobalManager(build_net_func, build_opt_func) - - p = mp.Process( - target=global_manager.run, - args=(traj_queue, grad_queue, should_stop, should_update, barrier, param_que_list)) - p.daemon = True - return [p] - diff --git a/dppo_clip_distributed/dppo_sampler.py b/dppo_clip_distributed/dppo_sampler.py deleted file mode 100644 index 65ea98e..0000000 --- a/dppo_clip_distributed/dppo_sampler.py +++ /dev/null @@ -1,55 +0,0 @@ -import multiprocessing as mp - - -def write_log(text: str): - pass - # with open('sampler_log.txt', 'a') as f: - # f.write(text+'\n') - - -class DPPOSampler: - def __init__(self, create_env_func): - self.env_builder = create_env_func - self.env = None - - def init_components(self): - self.env = self.env_builder() - - def run(self, pipe, should_stop: mp.Event): - self.init_components() - write_log('---------------' * 10) - state = self.env.reset() - done, reward, _ = True, 0, {} - write_log('going into while') - while not should_stop.is_set(): - write_log('sending data') - pipe.send((state, reward, done, _)) - write_log('recving data') - action = pipe.recv() - state, reward, done, _ = self.env.step(action) - if done: - state = self.env.reset() - - -if __name__ == '__main__': - from rlzoo.common.env_wrappers import build_env - import multiprocessing as mp - - - def build_func(): - return build_env('CartPole-v0', 'classic_control') - - - remote_a, remote_b = mp.Pipe() - should_stop = mp.Event() - should_stop.clear() - - spl = DPPOSampler(build_func) - p = mp.Process(target=spl.run, args=(remote_a, should_stop)) - p.daemon = True - p.start() - - while True: - s, r, d, _ = remote_b.recv() - remote_b.send(1) - print(s, r, d, _) diff --git a/dppo_clip_distributed/queue_data.json b/dppo_clip_distributed/queue_data.json deleted file mode 100644 index 2058e92bfe09b3dbfbeb372356e8056f499e494c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 94690 zcmeFZc{r7A+c!*PC@Kj}gcPMgsmykjp;C$zB@!u3SRxS-Nv1MI$vlr4GQ~R2-8@w? zWR_USJS!6Qy6)?CJ@3}@T;KgX-~0aYecN`gKhEFUhvQi1d93~1*Rl5FcbpCkx)eGQ zT?)O3p^4=!GwWT3rshVwZdu;6xOU0h{F3#ui{?g#rY5&7%q9x${@}E`; zgDRafh4C=G0cEKRg-L_LETT%U%AmSemC=R5@=xc>7S?7)%P6ecOX%q6u1HdrsnV&^ zyHJ*EP}oFN8MNsaw6YX-H8r(kzZPEqIH=NDP*xbY9?D-$M^{2LJmjf~L5D9ZTW9KG z;go{$o4OtDuut_uzPH0OlXaL_cClHhRMjDw{gQoM zdn9%{wA@%R(h4GO%ie0L+=G+5TQBNw?!pUiYwdQv3qd7XWv|BPD!8&E-)nWYCCWvK z?G?LLhsi!v?daAQ*d}(2MLH}NoYH-{YgAlNe{?2jV!JESnd?5t?thNOI>(o+?X7}4 zoWEw1%3TnJI-OQw9h&t$Uq3$Wi#OXPwjX;N3j$0_^}p`-K|0O-9bf7FaMjxl$B!}k zqVsNpeI`E(QIvPGmWA%7yjr1-vD{zNhT_0Awu-vnXbd!quo8zS^o6#RFDw z8qGX`o2#d&Hrpm(2c2mvvsWS(D&jX4=}zpwCitUBHxerI-z~M;I0(hHQ_JFAQ&IU= zmr1E@FMd0q)Sk602^vRSc9h?ofSDym$WZbbOGpKX>3YT=&-Z3uYSv zvEx+^58okM|RwQu(|Z@XJnK;mh}mVfnArv9HhifbpKW zzWUF6ta9GGN#&H_Pg*Y?sF5&{IZ^?K zQg)Wt?wW?Cr#$xR=c8~9eRC$y8*k7&5$?a5?h(FIA>*%I_J*mCtKBAcL}Biw+e5Vd zZy_Y6u=D!~XKWRdXj5pb!BRP!-O0bgQQ?bLls1bG+)+y$-Js`%`d62VU*>lPr6X_8 zI@N@u&hqsqxzb9}|C)*glZY1z$Ln06=ct2SM>pxpMMR?TT`s-knnoPBqW5dx#)sJJ ztKPYSpg~pPcgc22EZ!_(FW$VBh7Nw7t0n0!!sE}pggpHa?Al*H7p{g_HcQWYF6r3ADErA$gobllz1swOqCw<}=5aI0AxL=>YbR@( ziS$lIQ>HiiaId&i$K=o}V0lV=UQag(Roge~T4}w(te$%kwc<3K(Z3-o<(>&sxkb%C z4iAHhiA5yqK`NRtzmw0j=|c_brMfYxTo8Q9eU>+85=vKO{c7RL#{+EK55fvZaMjoH z-HD3%@YsF!IE^*~lZU?Q)VUX8{6+O^=sbaa-wr)rqEHNUMdwaXM1F#+xTf|4#}W)o z7OKtrG=d(d)rV9#E8zB!S!ArnEJSG4pH*>v%T~ z+Q%FY?sJSmgK%Z};wu$EmoK>c09!0dZl7!Z=39+V>|Qx+N9F(RPh2qQQiru)Wy22(D6x`llEGf`}X}RcMTf;-~XQvTu4@($2o36hT z_aGFWcYIl*DNz9#XE)Bq35KCr1?Q{t=j!ppG~M}wCE;-I{(HsPP0b*oT}0#cj6~;# zmGZjhyK$d!_h9uywO(`(n&$EK|v;yqS*yBAR6LoG>cYc>?Mv2Qx8yF{2 zL4oxj0%h>1RmM1#4Dc=|DVMbqzmY1W~{{Ci_;cOJSTmDd+ zzW~xx-OMc*QXySM+iK)`6TV}1-r6UZjYUENdN=>*Y}Z15T^wsP;@CY1nm6YO3=7LS0;KVP)@V+OWow_ct9R)wS`zj5!G1PJi< z+S1}w34~8!=d}t%gw{u zeJHV?r8)6e7H(k35;{co!hu*X{kM@m;Bt2)U6X@`FH#;FY!!cvSKl)}WMplH`!A=G zHPviEaHVH%;j>0eb$c>y#hQjUowl_KU&{mjGdg+8`Q_0@cd2>Gno`v2SJ=RBmx`~R z1kNovSBVvuiqdl!%us%MV(WgZMx0TIWEyr$!@I}hZ<@8HLT=v%ux4_>OA?u!B@HE5 zXeBipye|!f*0hJ0r+>g>Ar>2*8J?h;%*dz5T8(Hv*?M8Wbvlj-z5l@(9{?K9bkC1i z`{N_c=@Y`8op@rq^NFGJ=_vZ(lHJnRwXpFx`}5eZVJIi+<&$qO^_32qSi)-<`up z?}oa9hvTWj-H$8aaFa;ep5P+%2={O6{n?A@dE(lSdc2@w|JT__{0vO4(cWQ?OHk9K zd0GM%K4081PH)9aP;n|tczv}WmW*Vja7~q=?W&r@j|_cSUM}=ZFD@FUmVdY5I@tl{ z7Ji5NKUd(fA-=#-eLsM z1~X0{4^kC7A!&52{VT~TWc@_<$y##+X-W?DQu-OtqR{z4<-{QHk^c7vHL6j!o9BDT zrart>|Fp=F%z=oSj{B@7G_aVx+cMRE+(pt@XnFBbmXicb$Ol;fHDkqm7=AMZNu^s0!}@e zPUU)Ch8OHK-g&0iqFLtv%S9zWh?eN;x=>36CR*)e>e@CeoMIN|NUFt#x-0TkN8SP# z$GFdgPcB4#&OK9d;0wNZS+!R3N-ciiJ+k>Ca|^VkkY=lvW&@p&Xviy$K~13OHuDJmHn)iMkazU7&tVOw^Di80Hl& zh@~mhkjc5Sk=*tYd@Tne)<_RSE|>w+S#MBF5S(}CZo;+R99El0V}R=SBq2$l8}u*V zy<>UM3AD!7>f|0N!^fA+ZBFSY!@W23?oTZmz{2g%`@_yA^gkn2w9GaY zzD?cLbrS9Y{--a`1ncNxZS=>}91qIy=J45FkJ?{@aMbeC-v*kX{SHG$-K9I2FRK>K zzR4NhGG?4^Y|8{{bXbP)A%wxl;x_KCmbeSw%N~1IhSHv<7ihLwaKE5$&OxjVo}9K3 zR5NhIFY2pZtu33-LqY7qzJ>U|d6*@NJkbZzYC>!37`<^b^}I>P!u%+B$J?bT;Z#uB z$uTiE@EHW8UQBH2^g|JMvC3bOP56)_r}e^agzDw*m}r*PfkPg^oCuVMT>yNm2ct{?IUR0STX6I z2}5De<8f1L71-P-V>wS(3`<%J9*v0gfSE>+&Xuzf=+M;m<|^+s+<#nq<3i z>tEIi=OXVFo9abik(7SXWN-yWD;`S!QdSB>p$sNbCVg<5+8?(1Migq#8CMcFno;B3 z@e(C!Ihb3kec6#Q4*dQG=lxovLG0vRp}~8%px1D|*-j<|;~oib-sEcs{yps}3=8vh znWn@Ucgu2|3~#B9;0i&*-t?^z{ARn2ED|)gRq+L+Hldi5lJ{r9=cxa>X|6|y z28zSq;{v{if?Irz_cvb}rnet7=_q-O*MrkXXYLKcw%!l1rCfes;(3UJwWbfB*UvMY zt+oLDnzrwPx)ZSLotEaA4`vW?$oZMjLVLcjrBkk%L6A{twm0M2AlQ0{@z)dg(S;+G z+LY3SRqg>Qru?xmi{*Q}5Y5q*kHq1CT~?v-P*a>S?OKa+sJayIq*r&^eE zO@0vO;ERIgQWSx*Cj2^u=Qkk@hy{=*d`U=o*GBPz!%qN53YX= zX0swEdruF8fcMSg1u2o3V_`gc=t~p+wBDBK{5BiTzdNuopXdgfx+Ys>Vl2)%T;^Gy zM?)X31}-k%JeZO%k!tuk0+CMz-*F@)W6~u*n{tMJe71BKohEBONLGX}SihPA&DyUm zk@e}Qky%NooEXKkeL8^@?IQSkOh`Lu;yY-K>Q)yuW@FOlF_Vf-{ivWUGZs`K9X({4RK5mAIf=W05B2ux;VqTUCyR zyR|bu)JKDTzy6fflUiUWGsd_$Eb)vk>mG;UCY;kt>7l)kgU_vNxgW3Uf+#L~_6%D$ z+!c0etMg1bmeZ`4$MwDfMq1Pk&MO}w&)FtMSk4=zvm~9*Fg4>I!AjoeTxrm8SaPSm za5uPHq>}|JFPzwa<`j{BLj>G#m zH))5TZN_s&-WE-7NT9ik|<>M>tP>!g5RN#b_Jc0WaoC+b8BYaKnKowXKSIFGqS6FvKIsF8s3=Z>N4{{eM z0*A(qRnKe`@r4=54i;BIrxj9dcCss38fFX+41R4{eAK1kg)m1 z(>*kh-1MpCt9>DyJQ}81#s3*8D=jFi42+i;|D#sMVZ2Z%TdhIi{AZ!;5oL|>pOvv| zk`%5#D`VUm6rP33nDKwEiLEtwd?=NXj_!||*&nZeg@1be$8pjBKQ#aKyZ=+a|7rW* z{TJoG!~goM|3kajZc+2Qyr^Hq-|dU?BL42Ts9(fI`ywvt7x8!fqWrJ$|Fr$T>i_TT zi=VrQi++pp@A$iYQT`qOuHB;Fzp^j-{T=>y+yCyjC@=Q=yS%7h#J{sI>KF09>;Jpo zV*CFoF7~sCzqeo1FXHd^zmtFWUz8W|U$vvGv!Ja1D+g})BL{BOpz!{c1Nn^E{!DCt zb~pzA#y>{j-{bJ_IQVxQ{D1jAplq_B@c)(A1pY{Df*O>~e0Uf6WU2Rr`Nr?Zwpnubda`lLf**_M9zee1Fh9AigJ!A>CQrAJFC<#vZ z7{l^gI`AqZ5tb)jfY}2lV2fJ7lx15X(4Iw8YVgLty$TJi4$g(yH_`c4NI=C10Cc;3S zVJALjzlJgc!N6Q;h!Y&XIC?S!sN806Wq+$15Fzj93fld}I`AZuE zfpIf){xvEcsJwgu5?{FCpu0O0Eajn2%@5r1Z#fv zz-C}O`~?Y79x&8n14d((e97g%wq+aO?RI_CO23L+={|TW*9)hg`XG;9CPrM&#GBhP zF^jz&EtEcC_1iX-Dq_s9jAzVmOJmHZ?6aWk|0`Ax{1K}X8WhQY#;WQP7mAeeUe|y8 znX+#SqHg%Sm+G-Jo=A;xBYs5lkmnf2h*t-~iCeu73D2l))L7FcRIQOPB52r&XkWXA zy7DF`wX>d&_{7FWu>6W7j=l~h`uL`a^xe}$mYWb+TE2zkwHhU2etaPw-Vr443G-9i>rtR+`+&JZg39|@j~X#`Efh)_CbMYtdTk*C^OOq>!jBT8M^$xZg3 z2;X#BQuW7rvfNRe8nB+sT`l8JOdho%ER{H^?)t2GvD!NbW8HN`TUi3Jb~2h+F*#50 zh|CjoKZVIpfg|bd@C8xC_vi5EO9-hlla=cjeKvrfy|gcL(0%Ak+e<+ zQlIG-+0Lm$PU!&YK5~-Eb(mZF!tKX|l+tZNV-Gv&o!w37eSJ#AH(C?SOBAWgr#^^y?X^ntoiKcU{0=%h-~ zaT3jxCBzJ+i@H~%ny8J6C!Do65c`(X6P+)PQNQK1Wb39-Z7kJbB(?=-Lwi5?C_ht>xwv&4I9`q?4s*N`j3k0Ezr z&%(IM)W0Qcn0QH1kL9HC2SrlhLFGFe!d?DJ?KydAoTeTSBj2Vyso!mGN7FF$Gs zCf1c?$DOt06~jqF+^Uc;>L?_xwXY{Edaa1J;wxpfVjKx+FF9GRx*Ec`AcSasAWR;- z#6d<@8Iy7Che+dHex$0#ZBmI}wGvfE3J%&=NbzWBvT%70>@ z3QMu&%~_ZeUR67Y!f0NqNrY0~=H+U{@+oB^jENw=JtYYZH5T$a%X0ErcQ%o8Fq1I9 zb&$y1yoU%IBZyl+(ujQO5#qzOZ3IZ)pz>FyODB9hM~q2tC%$g4B21oU5S-tx6Rh;Y z#QT~Cg4-mW&`x|nSO;z)PM%##+&NCoHLLa^zEk%QUtb6k&(A;4i_?iF4mB$f(Zmu` zFENj}C+R{hbmh+zS))qG^q(cH*>Z?F)>5K0h>_IOT|s7M?I3e>B*`zy^Mv$!CQ@cg zH1X+75+PE#nfSb9BN3vQNEmR$6CqRdWJ)j{X=am7Y#56mwD&QSXS8OC0KI*r+;S0e z>4r;Wt*JT*OQp$(0U>f(3mtV2r;l{$8DV1A!)=7CS|TCQlR|h$?Ix}d3K46S(}|(I zNyP5(rKFS45|V@C4beXkODM3a6Qw#U3B@HP1ep>~7@aa9V)L1agO^qj?NOC7%a>{p zLUZ)QI#ViPzsjFzeYKK|y);Kqq%9~i21dFRS&_x6lH4EpOZpXZwTI=-eoe=Nw-dgl-047fk+4r2`NL7$MIxZ~f)`MEF4m_6 zWMYzpLC1*Q2b{c6ZzJ_B3@0zHwD2w}#XvVT=It$6IJSBqtn$1sw42y)nv^dTurGa| zpDb&{4c(GGh6P#p;>NK)&lR;G6YLb>;2(p0Q8xr1o0a0FvoC%;JDG{q^>MaG*&;E@ zT`_uNDu<Dc2QH0xJy<)4T~$?oBUnHDXuQDe5tqbL_kRX^}7D;dQ%-p8o-v%-r z!FMnaZMD0h39*CutazMbFLE{cU5p$GL9e6lDf?qufSE%>tp9xhzDx9;(p)-@bMxPr z9~=2&ZU1gjEIx3uJPERms_E)#IOq()hY&GFin6NSwZ{d8F6sUM%?nM zpe^>mvhGb6D}kzg?M>rw0$g~ydhpSf_bA(N?z*1Sm4)#heRi_*A=IsW88i4i4(Oh> z>0Bzz0G}ld=49h#JhMSgR7R{4taNjF@861tvvk8tb=Ov)_Kq(M+B)ZeY04|*-Q#8$ zq}YzT--w0kg8c1!6xz{7D@Ja{(P@F`!>O3UPzi=tw>Mb(MF6A2g!>A)61>mL`;w!< z9m0gxK33Rc5B`Cv{f?bsP&Kh*@_c*&tP>e%9^B#qH_y7i5M5c0w+Cw#B~>H9ZYoyS zK%xzF_1r?{%pXFBe9eWio%bNuM34JhRSeL{?GM>tRt(AxhT~p24?(*?&BTbP0AqzM zaW0fN5D^o&6Q@U_+WIb)9bY}bHqzyL%gJW=rr{>yE*%T^ZrYoyStuxrb(nN-_Vb4e z8=r4z^{#-S@5ePuc0_{l%mwkRrYxi{J-XEI*KX)3^N zAMg@#1tT)#`Kn-7SdM&|StMv%&D^UvQvu6vb6)V?7X}^1IZZ{=amXz-xGl6S)(ohKp|3ek5u_f&R* zsgb48ZKp_Zi1rM$J+B2zBr^&nf5igVhBq{hXEo4DQJ&iE5(yRer1m`JsDNk6E1zBd zut4}&S6aun`YJYVtM(3f91V01e$GuTWAgbGNuf*16X5t;Mc=J{b#TW1lupEgeb~kM z_UTm>*y+@8YN|gO;#;2hF(q~_5IcnFG#C00GSbd`#?g!^=RH37txtzzfy)y@0*9fq z?YspuYbY##s4g;mtOCIdTd zA2z{}CZ&t}yQrW)J-um2AMs^z_)~U`NYKlfpb41M;5xnX2aiKkc*8kiEg;&2ULq@^ z1*PJ_g3glZRzx4X6XCn)cRCj)Cue*2D|e%5Vd10ao~bZ#*!=XaZ?nqu)Ne*U(WgW+bt>2@1Av{-Sd~6C50x#hA7@f$NXmBW^b`p`Y*Ek1UZg zC~zF}402nLuge`Vp7;np6>>AF+F4*{@l9mqTq8(-+8Y-rkqu$Ds?M5qr-0A;!Am`d zbD;cJ7~hZ>4d`R)Lclc}94D2+0^=%B!eDHWZs)>+)d9I(E;Wr1ChE%jz%dKxI1fGG ziz7i$ICSP$?;dIxlb?fV!Vfxbh z8^fQTW0~EQo9Xr;tQz#L5Mh6hq|i>C(1iu~pH(S=C(NU<-}#|sA8mnzDsmj9kwiAX!v|LQ#POT$U}sAYeTj>QsSt?#;<1jpJCZ zbCI0yeF?T-=~oI0^5s-(k&75Y6 z!HZi}?o{5ZL+7HSuZ7lxK-d}oXR_)sxJ^`_c2OuE&2AbjMVUa9P}(gLygL|{@8Ub# zYgvV~jg@;IUya8P^lXENIIF>UAx}@9d=5Lp+*aLx9*w*#H@SK0WAOR6YUR6Z$@uDE z#-R`90g%~IHX0gRv~Zt_RM%XK#h-IlSS#0vP92It?E!(HAoant{(J*07l`nw6iCG4 zw(p+{O?xo?yp*s1=P*bW^2Jm2MR59v)7D+m|G{;j4Z{OKT9kl78V4(esx(k z0Q=fo+I0-CkyCVZ&OH1JroAk5DqxR+YmbP>f)6@DijI+~EFudh+y={yb;dBGbbu=- zHWBzXT%jqH_X2&7^mA;_LzmNAq%Ph0irs+@y|dwIu=@DM?34bT&~9H7V69hxU5(4) ztIm((j~NlabLJTUR=h7bI(uQn(|uNBOEHGLeRe82;Tv8mZvAYgm;-4%z3xJpoiI7b z8lGQLhO3TsPVm=HV9-+96>?o3$UptLwth(;i0k>S6s)Mk0=E>4N$nZTS$=E&iJC8@ z&K7f?a5ckrN49pkXHlSd{hj&D?jT4YPU~Kl@&%h82WCZlm|Mt&9OMFR^T=gnT)8)g-8@ z*F^yJo@#;SzV{e(S&y@&ClK^s%l))gMKmh@z2w}x=Xx(2>dR)2*)U~g7?TPdUhXVn}@9Wtu+;5%@&O*^(w(Ly~S3wTw ziaALH+WA9Fq)e~MbOmzR9`dIbi~;q0Q9~taGn{xC5x{!Z2ZC7NFz`AyW12jO3MeFi ze6`V3ZQK_q={tS>z4k*GxB2{jNU#zU@=u@Lo1Y3~v8i;`0s(uS=ZfTO3&h2y%kMwj z4Qj^Xi&1ZHd`k!A8{WHY>v|z(_fl>${1P^Ob5!17Sc#(dE@qr9&4L80WBY{ewu7x5 z@=Ir{8bvgmlkgn|1xpm~ft1(Rid8hmOuCaxEiZO>Z@?lf@u7XeO;{RPe=etF1>~ zDRiNW(U&5P?0i`DFrR;5d;*qj6yV?M8;Tw+%c?(aZo%2TOBGLI5gZYfntwbv3|rY* ztdGq^;?vE!PG|JGFzT9(`)Z+5$c|q1vFO|sPgXY`%AVjf^u>c#HsTH!daP}X*b z^8-$A%iiAH5()J-1O5jlYa#kB=lhnrNKCjYuBW!L4ck=iv@KhhUp^94cyL~-3-+XM zSh`z14v*-(Q22hV8!O+t33ae0gU+$y^G}$Ef$kb{$oIo5EKk)aJNa|~S?rQt&TV@Q z1NWBEbgU;K%tFgFIrcT4sp8bXa;_U69U0z#!srd$Ut#bq_QD8=Yg=3F>d(R;jh-ow z@B!4gXt-}!frRDt|TY|4t3vKR$ zy}yzoLo*0imA7WyawQt>bLZ(1DfL_+0!Xi@)fj=@;?Kra%X5Kw)pbw8XaM`g74JoD z35Vj+=54zUO@P+Py*(?QX2B1)uBzhG;}{hZvzxCz37+IdnH;$A6~tQaW(P5(0qyg! zv4qE0Y`deh+E5`A_Ad37ZIqgTpw*KuO|nr71Of`bmirjKNmM`!t~~ItNUGbkX9UKY zuMrlO zV`S5{-8BNLc?_@1%)Ib2*PYVG>w58#bG+pKqa_d>l5t)~Vgd|yR|-?4LhDg6!V&E~e}Wm@2AHbK3G1^TgxN1qZ){w^CXSq|5|JCg~3^ zd;1zkkB+&=ycxr|hsygb9#_GOmq&E@4voNG+h2s}mN$6e6jR#e?gc{7rjt6WZK~m! z@O#gSm3?rKy6k&CKY{*!&bvIlIxs|Um){+;8rXQDlCkA;Coq4M|0Y+4m>|R^-(Ope ztF+UO+Y8r%vR}XL2E#^pzj=Fp&(nNV;9+`nk?%21?pS?HE_C5O7*y3(T~!Jv`#yLC z2jrvK-eXEHFINFakIYa`V=Z{;v5IqtHo}^&+o|+3h?+emXWuqu<1#KPeZr|aIRB%W zDPcVgOy3{iW0%Uu)8j-ajkOhbyf2<+TlNuN?|-0DV$=tzW?WLkCkn7UNG2<0bst`_ z%Z%$}sE131OTKSO8-epndez6Ii%^+yZ=2w`aV+HvZ{0560GFJcP4C>F09J<4gUU%;~BIikMl?x27u%ZtM?JJFYxVY!JuKqGSK2M+Izk08MJT8;IC|~#=MlP zOfCogk$+5;f#`QfGEC1X%H9j^-VT;2c8-8pT^ruH$Nu>G{)aXT@iL%!)pMPa^9F-{ z)mJaLtMShCmf6HwAN1j?+!(mJ3Eoe1q~+{-43*B`*RPiAM0G|Mc@?=w`1A7gE?s3B zcs=(v8y-Cc;+S-ErR@OHjj(N_m>mMycHZ>XqvVf?iAx@^84i9yr9X8B)zL(z-Z1KMW8L9~T__KaH}B$?SXFi)rAyvR%U z8nFo!qCe!mzdjzKO8rc2Pt#!aMbB#oDzfm_fpP|(j6s|e(B-mCPXU{x@=k!4KwcK4 zPe<$udff->`Gg{_GZx{EuaXDa_@~fqvuZ@&t6X3S5tL_fp#jfj*NV@wr3)J@j1Q3V zW9Y8)T;^>w0m3;PQL;o6gw*GJQEDj1sGHUj6^sjnFE#xVc;FA3C(Wvxm%fDZF$tv` z2Or|pJ$2bN;c>vXr*8YpWF+VFz^7mm#1yiq+MKI8)XIKr*C*jk916H|rJ+aev-+1t)5q$ihxaH=K zM2HScho?$2z&C02i;*WBi?=BK$`|Rw1FGE8p64?mOv~n%b=$%Qpy$@CLbY+|mXn&$ zJlTmn%NyOFW#&O$F!xFi=}`!&5ag-$dWFmzOQU5#sZvQ*^cl}~Ni~2?UJN=@45f}ab4*$+?QU5#s-p`_aQNJiJ;-Y>L7xjzy zyZ(21(QgqK?f8)F!7hm+|9p;b5i`SkDvyO?*~ff@ddiu0@#BD3Vt&=K!5|2q)Vu<#WFe$#)cy zaC2vh+_mXcd8XwA7zw-}O!ai-?={D_fo6=s$4Z#~ocS{mBbg2DWVFx>>J^^3G6G_0M|P6VqL z+qf>I6ne(?UNwLq5$aKRlGV5VGxkcwIxnX<l$LaE=gs&P)haPbB5R-A53`Jr%>tlBvG3-JRodhG>OV~7IMDfBQarg zlH6~`PmTw0l3uOtgfsU!a)fyc`TUX>IWm2ov|IL?^y+XSpI&38(ilRd*GV2AW)npS zZi!9Q7k7DO>ZeTzJ6TzxZ512!*SVY=?q8>g`h&ZPuKmMAltV62Cbpkg%g9SupEjW0 zEqapIk2i^(A< z9wej=(+IJ#Vq$H_R`TE-PO`V7gqU5DMCe@KPV|fr)GP@OqHtqAHK>G}Fp|uo_9VO{ z;@BP$oTV!X4$EGuH@6nifHl;g9_Pr|Avsc_^$EEy){Hz!oFiAi0MeV_BDp`Xkt5~0 zq^;#)GCt9pOi8#y>IlCkBTW3rN1yyiEtz}d`t@dHMzkt<@udWHU+xa7`r`*g)a&hp zin1m3L4yEwgkOfRsxc*2eX^y}2~_2EHu@2lqEraY1$I%*h4)0%fncIUN}W)rUrXwL zsv-I~^vJj0H0lO)&l6_RtsCQ?GppR~w5O{P6ECp-BC$>hdh(!Nuh%m^*X z`&i|_<^$2JZUp&_lUZ=KE&(#*^InC=44i`@np*MC>pY#{y z*{r@yoM$^p6f7`#ZY(g7EH_*wX8Dc~EeGp}i}D3TX09n=G7N;>^++n$@aeql*AECO zE=7Wlt&3n@olkfZ0;JwYc5>~3LE=koE@73mmGmfGPTotIA+kraiE1x4YR>!TGRHba ziLZr{#E5%h2hsl;YH_}togdBH>AybZf zkW-}($X3)R-}W3Jqw=ppu? ziN>>Oe0@nX!7JkHHavN!sYohm2Er;Ihtu-#h=~t%+ z%i)>4{q_OUYLyR&jHRl?U3Ny&&9jo298)4a+qp?t;zZh0RLDH;2=W?_6-gF6ATLuC z$+tR($t%$uh@=`(=_gCLtRjfo&p8CEItv-r%S_t1?jhrw_mGB1&XeAP=gFb^-K2?(2J8nt0R;lane`vV^?wE$ zYraR`!cU%9b<=IPBGx0WdC>F3Ck~mj(yW&V+@7x zVzX-ppDqnJ-K^%mIw1%=d2pSSY=jj3{X)#PMKJT(X4f8;dZ2B!5G_f`hn%>(X33?U zz!h7eXmq+5NbA|mf@=#a1n=#Z*KH_-mbJT*vc?;rZGe8b#G@FhS0(EwE3|@r?#|C8 zOhr(6u1N-O7_N@V-MLAzvre#LE+)w>z36&eNmHXIxceh`ZuZ*1X7 za5U)Mw>|j$p(#qGy)-t;D?$r}-2xM4w3joNOOFnhT>}ysF?m?@sBb zPrQ&>#P*8y$B%f7TcG@ItUKE5WBAEh(F(`RN1P@l18}{Y%J7NtEb>60rB>X>W-h`IC9;RJuA2#>F!T%JZeafnsMiuIZ8H$x)sU%NgjovS`)%0W86rT$*tXFT-aSE2f#>~hvr&-MIm+3=S_I*7GUEFVM&RoT$; z1|F__5foz+i+8fabDlhm#;_w=`9|9#ah*fDwj4_g2oajL^E->cRA|sT;PV zXsRB=ztT;xddEW3Dw^Y5YYcw(D70j`9*@N`rdnG~QjuZJtFRTyo? zXsWV00q-)(5?lAYh1>TeE3U;aFd6nQ+1;@z2HCEBWjE}J#p99BV;{KXV1mvvQcyAq z9t7F$jD1uDKWbkOrgX$%^0{tN?uus2IK4)MxjY)q+c>%m9cuue!N6N>bjj#-h1c>D z^$XHH>MRsI7Z1N!okvEmb%M>+Hu|*93-MVMQTF!q7%tnHJDW843ih#Z&H7$#0K?E{ zJyN>aXtr)>M?2HPqk2Yi`BN0qAc5^}jN+1FDAVbt=|^&veUn75q&Ti9PC|lAs-b-w6~RdccZWH)Vj~# zNGccuxJ6bDJ`X9d9<#oNQH7&}pejp(r7?FdZ6qZ2!_X<$|V?fm^( zZ_v{;$WU23fIU;@Th_0B1{zoV1|x@?K>uZn^v7{^_}Hr1vgb($c5E8plV%HsEw=pM z1u|)HFU@LG{DdRsx{DlO>hH%e{hALEj#1Di5*qPhst;(V3JzMW2*h0COsjaxIF4KC zkIvmsf{LH%KfdSCU@OB8`zY-=oRg)mUwU`|#{ve+_2knaspl7i^;{n)=uRA>*H6Vh zz1ao^wh83k{7S7#E)#N$W=oV0jzYY~moDS2S$NrEv{Yc(G)Apa4_G3f3pXXwUKAEh zfFg5e>j!571BEjbFAC1##-8GnFE`}_R2lH^J2MJ;V=+8O4;SG%$7R&@D`zm~-MgnJ z!VBTzqwC98i}b^TFRn|-jb*rES=jjA-BTzY<+;x>su+%ra{AQ%qQOrCzI*qzD$$W! zSpL547>bDSZggOL03~dS8@5TrB3*sKD6?fK9Lg)1wL2RIEQ)m()@s~`FCRXYxYSqS z>5ZeOx336;)|A1X;|BGhd_XAu?hYfk$K|D;Gt!DD+6EKZZ6YC^!`#eVuNzb}Vq#Oa zTO(&eWXGGYRoHZ4)sEz!@j!9+sVb~!fPK|{YQA=lkS*|7rNHSpG_o^Yx+W?a{OZ=Q z#F}J)u~ym)Y5Evlh}7_PA|LS35Nq!V%Y{ep?O*l0av@$aIcDA2_IcyGo{=9tylpr_ z4OHipP6Ll6W3()jFR)xB`;xwRAWlua8Vol0fNo=+U;3&uAY}Y)i>zxaux^|?Iv^Q} zZ2B1UO(_ADuFoEAX3T~!H-0)7GL^u(p(f7Rg-4}bF-exan)U%VoS#LJwroJh61)cZdz2=>u&c53rv|=IO4ZjLjeBOp*v(1M$D3TCqdw)4&{UD4+b|-(jkbuTp zPwD@7){QxRMN{k-3gA?B%Jm1rlc1bYa6H_4;Za;sUj6AG7Ovl6$}RY2@dC4oZB))^ z1a22}2BnmxW7N8`Qmr6;f z2uXSFKcV>K#g%8<-hr`=LvVo#eWnCXfnUOuuu)qWZo#neJ0v=wA6a zZ2KxV>=`+6kncbj-1sI?u*WnB^-h}>*@+CGh)^NNVm@uMl_Vap?J3acq={&S**n$ z>kB%0N+T5{QaZHAg~Lcwcy8EQW7a$LqSgw0|Wo@)f@ zrP#h{KaxLPjCxK5mHvD3q4Gcg_xeR2VON8hUod+LCase%RvvW)EAb0OYxD}he*0Ix{_V;8=_tBFJUsMhqN<)fb69v%o@XgK#hh30r;=t+Ek%Sx0{@TaU&;+Xm zNuC`81;8AAc;14W)yPXcJyjGOj}KhVyB%BE1^&Bsim}QTzz;h=YvIB+yrNMnZf+BW z625OzW}p|2Yd8erY#t2CpP3X2{DKBu>W}7i2BM1Tyk!pzhG3V8`2zM0^wKoCcQXb| zhj7P&gSx9<+MwUQS1U6r=*pBI#(M>XbHQYn(TNc98*&%A7%+R=1DpGaklgNZP<7bQ z7yls*JY!Xd-+%al)QO~=Q?cPN+vy?jeBLBvrB?;7xEBje?jGzco2PN_gmGs0*%V0E zy|+P>XBrlZ_MP9<=M0xlyw35LWul3)+w?9!n+xiTUlylI&O*MZt$ZWljTf?pmYcdV z({@CixjZkHe!o7vCqQ@iJZas_t3yw7reJ5x>%-j>^JxzPJgJWbNQ0(x)Phz6+Hp!6kRazdD#KpuM%+K#ZVbH?jM{{I?kXpwOV@6=3I>8JDXY+Zwip+G zH0b|Q6@#73?Z!p4Y6!S>Mmn9Yyj5=YXN#!ufGxqDO{rU=@PRS6zA;82$7Ube%|(7t zpPar&ytEwh)&0Vb`$eJJsffN6UiCO;<`s0wEC{Nd_>Szo*$itM9fUm&#Gy&|y=TfB zKVxvPV)fhaVUVUUzd7Hm3k3XJ=4p`As8YEiv$n({C<23rt8*P1ew*AF; zWqo<(uCh^FesB1b{}2_9%RV}?Qltf1)cu|$9C(9=&&#*pST~5ZyZ79GcIr9a=TdKQ zJ{SR_>BFXR&M{EtU(59Fcob}?saRa+{|vXkz9Jl$SAZ-`Z`ThjiiJ;2FLX+R>fvq3 zkJYk6_So^`_UkX#>am@-U&}u-9x5U09qjrH7W1xF7=^i`NW-pEBmO&E=_|iU%rI#KWoNcC7hRB z`K(cA_`CXw8xa*UsAW?^+kZ?CHA2&XPcpW zUGd3YNP(E$$rpRmYVmwt>6EgJ6{J7j;e9r}3pPs|hSqx}f$#1kpZ835BGXPY&Yh$O zTxT=!Z2L+d$yDBYX(N4;Hn&I2Ow3oD=pag+1;gOF`$wyfQC*-sKc|WJNGN>ztnA-b z(t$D`e_9rp#6X|*Pfw1{URcAyM&so30TZLPcv?vx&YCIhb^MqFA|JI5js|zZV7kYH zQf3Qy-79JpC)9!T{i*X_GN(bf1HbIUkG;U;_%0tnXJci;d-~;E~buN&^*QUC9W+HA9z*<97*WI zYcayedK+PH(<0?1k@U|;xSWX1Bt0K5iz>4Y>OdoX6Qiqt9_3&A{iy4@iqVZfYe_d5 z$|c5H)LdjNSTXQ#@wxtg)&ASOe;d!>Gw^S`f97Y5XCQ+=*Jk+thI9FUvl;%m$e4Go zpJ6lN44mtqYyXYM@Xy7+jc4!}aR$!y|Eo5GHy0W6GH`BuuFc>vkP)A2&-MSaIDtTxxByG3_k<^Hs4%7gU>)le6G##GmsHy;9Nh$o{RsC|F`&D|KH|i z@EACkXJlYvbn8!IyZt+{-BB?z{FB&>{;gd4ed_+L*!jl=^zZZV@AL5g#^=H4u8EQH zpTuVJJF(qUF}nXJv6;?ME-@10Ki=Q}`jZivi~p7LKVHvcu>Zfve>46EcaCEolhFec zBeOrp{_yv)o2wW-`g81$8Oo($Wj*Lyql-s}oq&z~I9zG>M!gPem|VgO(E+7sw(vHL zef>pweeoAcwJAYCVgf~J%o>=t3W4p^7RvFJH{o;DJg5saq%Ykofa}F5%7NQ1q)^y4 z7*?N05qDw3OM26khvxZ&>Z~g6>k6SzMqiWbI$UvU`k2&P`}OFRN5kNdG$L@vHB6yz zGTMDNn%r0vh*9*)y=QaQAZ_?;u+2CF<2Nu2;E7GF?0$`6Z(?J#l20R&D>Q?xfuQ^JBR zVOwViC0p5IF%8cjEWk=27mLcw@e;FXlVHAbb~xTk&3FcvKN^6y;WgkZx&>KL$Ki9h0^IOd1fTVSFkQY44EG4ZNB|RD z4gW!5dBg(h9*L9>e4!MN!8nS3}LVe=B<5hxLEQwI%kS9X!Y@nW%+L)VHe2kDB-%5ynWToD!U7p)V z?js8hnB}zTQi133} z#BAGYGT6S4kZIH)TR-!YZ7S=?%m=-M_91QZtIQ5khSimf_EaZ-b)=ANoi=2k={wS= zK7b4f875WNrIXTr0@MTa0%HlbH;GqYRuNhUW{6I~B%;zX^O zF|k!7nyOf3ksI0+NW}e8CTJ{cNL8M3lq^BA^eiQvh98rc+oZ_Q z;2@H{Y|%jxxQO92=|Mp36JSw;(lB{5g4ma3>zsC#gVDh zZw-|>ep%+kQUO(h?P?Qsb?nAmp(b0x=8Y<0Fw#r35T%5p{Z{hD>g8ln=qPddegUyv zB8mvBIY{)6Gm-mv3W%+yl2n;~mz3$fC{li?5F$bDD3T?;A1@+fOmvCy*nDbX@-{-Kn@uu$;0{qH%R}7L zdPA(T4J8aySCab;=aZ3vC&=TUM9GZ3R-}r_Mbh6mfZRk{lSh3r$=&z8$y~M?a#O-< zk|^#a4RmQ_LNgQ2^L$T^U0FtNJk?EZHe8L8suSdX{iS$wdnfrhO^_;)te4|`>lvZr zrAV|NKR|Wb&XKcu$$sL^z9WRmwKLR%ty^+WG6fJP(=QVOn^uzFmo^ca>(xp5_)R3c z(OU9gS`&dQv`Miyn@RStE#&L=79uzNI{BtyD>?gqH%V%C5uE2tNt)OWQkzAZ{Gl;I zXjq=6+HEh$iTx%{-0E9Sd<)Me9$yP5$g)F3M$l4Xgz8OQZpC&;G(nwk7!V=SwL6I` zwlrd&qByaehl_~!^`r7=3FVlDZ6r3#AEutTmP1_o6iFyaZXk)9^T~Ezb+TJUjLa3` zAPcVjAOfGCA|um;Ncl|*NmZhUxV~MUWV7TWw=ZEL*^ho8+SeW>uQYBZbz6QCZ54Hd z(K8bxo8QW%=YNz-w!ib+3l$?fMt-9!m+TqJrIYnKieoLQ@W|zMjl@bCqD zat8!gY1l0YFGTzMEK;!7(Y>liAUH^UQ-yyoL+wZ@TgEwHSG*gqt~ABZr`D5oJB81 zcqm%$c54`Nm#{2}7z+j7g?r{@$`qj`=F&I^yuo3_v%U$dLC1|pO5$(`7_MQ-T&@uS z`FrIpgcSn8;?P+0f;=zehf|f>4k1w1`NO1HqX_p#OvN}?1;Fzhv(&V@8nBm5JJ%f_ z1S|B4S6|!HfKBIm!cXMWn{3FuD0Oh{gz#8VIpR#%oCRJhUwS@yx&O*DIu)xE`{7*&Ma zQXFZ|&66NwllhY~Jav$QeXPHi4hfbVYD2z*a4sV4hy;%nrkGL`tt}hyoS}ZjEsY#_^g&la znWr0`==#6Oba%$$);BXQtevR6Yq^mAX97N-AGF(cXc#!(^ILx36ic7j z9|{M@xq8-K3Bja83S;_fp9A63E;Hj@1V*kZ0m#(|OV#aeSIdUrjJM!5#d&4$Y$Rvt z%U8v4`jXP<;^p}uq_z3$y(@v(`0$6r(8(r<`BlAcVSh0^DOF%LRLjJa<~0U33Z58g z(#^KBq7!ySWf;gL6~mf~35&(UYLR=hQNV7iXZUlZP2kLQBkat-XfW7X1ky_f_v{&S z2Q4O5rmdV;kn(%T*Fx~ODJl|IYDyq#pXSK@lfVS?E*bCO+@=9o6K}8Q}94$`} z(<*{}7C$%sifcvn^?Z}!4nDA1X*1=-_#l+c%M|rJPzXb3H*=rs_>3BZ#?GP%k~kycbk zLd<~aqNUJ|^{M>U8teqfP77se-|PW0v`6ubBo!PE`v|%HD#ioGvXcttG`P7;_Nu2J zy@|s5M=W2Ib3w-?b>-Ri7A)R(wvu-+A6AKpQ}*kRLQ=`mfti)r5Gii`^O{gM>O4xM z6txvW?L#K63s1-4D5`B4v`YsQ5t%pnPX|$d(_L*P8%VRC<@>?gk6mj=Elxgv z4@-*8*;!mBU|7NJ*X_7RIK+3v)i-4njpaX>8a%E83trn9pQI`1h}_Gau{H>oYCb)r zbK?hnkMa6N)@cpU_l{rgc+Cu0-B@RJ#4Z8FpS$bafoY83d6g6;)C@Vh z-?2VHupA1HN`^h{8(wr$LK55zt|dT zGy=h6cz1`-{C0G!Uq7m|FBYOGQ%peglQ4HFtK+(on64Uv~J{6e>lm zOv9EENIMhQ@k;PJ*mox!%c0+=Vnd$PdxSI5)~-z}h`(A6dGB`F_bW|;>jTL2B zlsh~ysKG)@kZMwtZwkg+ucuFH#JZt2u~qiHjy3#TY9n9M6AX7()k+614Z)q|=hxgb zF2^-#-NswUXMp6@^&461Aa#3+`#Y&HtfpEmEc@n$E^$6mC)c^aff&1H8Ln{fjMlg5 zHVi`xmW6wjjEiCG)AMp|U+5);UrDJ16;$E$kVe|Zh!C83xwN~-*c}g;{8$x>*Npulr;ahRhtcz8Je8m4yqw??t|CmQPY$33#js z@3%{hgLAa5WHvK_?ZLO2&$@IWttIl|3E_PBnZ8YoOK1cRap+dA=cJ+g#^wk4rOmkL ztQ^g9un>+q`CbWn+6UB9gN2v3<)LVaZbskNw`lv>=Da9d3Ft}g)EDmU1buF0?t58z z=;C10(~K>I`)o&VK0Fiw4}}JfPHd|}50;IhyFR(V z#KQf*KHG-iG^;wjN^&SH&$(t0_AU|lqR5RgA};W1gJz1nK?UC0DfY%yI}En^XE<)W z(*Uw%FKmB(e*w?ZXHV|s3PqtqV~)2sN5SU18mwf0I_T_Nqg4IO7EGNzx~sJcVZZ8L z))A&?*cRJVeKNEZ&zadQc4Bpc4WkWbD}w8xAXH}X4qZu`6*W^1+#hi|Ug)Bkj3=0= zx|zQdF9h2y+p_fz(3?cA`+5G&EWLJ^^bvv0Pkcb1H{EU9t9s!4K{*u19Rx{5QDsW@ zt!T39{oqQCU>K8~I@G=_S9Xte$-j0o~wEqc}1i;P1Q(D))7Jw#g=M7d<`uAsc{x6B= z2wcJ;y4FA;51wBY-{ecLN%~RyqPciLJmy@Ori((q;-*iLM>l8|z%9OAD`t|rfcE9# zl|Io_{JCg&GC!dc<6{=eJs2tk-(~z7uO^$JHhs~_9U>W6+H3J)k9aMf6yCEpnWY$_ z+nucR3%g*;m5`>vicFmHm-#WS_z6Yb^!7gYE`ir=j~B9SZGlyr)Gyv;%f%YDxILTd z8qlN3g!x)RDMaV3lxc0I|NhRE=k3O+s8}bb!mH4Y-nACL7R#1FhIB-p-L^j9Io?>k z>j({poCCVayU#@`ATA4-fwk)=wP;w+2l_x+k>?idWUiDJ8UdDeooxNCvnqi9?l z9kcJXj2p}f8$YaE5)Ot=&AGz#W`IHtU!+=(N2ArIZ^BHc==b+$o0X5>4Tnd&ZXVq5 zwh{~3f4y{HABnV==Xsshxk9ADPU@?v5K#QGUn85|oNt*&=SsoZH9L}dM z_@)yc3c4Vg_^7xXU*(nz(`aFs#N5iszN8sw`H4FnZwAAO3Gt@4AKOszOGfbC(u`Nr+^!(&`hm5DLRY?N9kVpn4@a#OIfvFSJ;-@|q32HhK)6w* z{mjdfu6*0$!kwe_8e3hu)n`@dHHCsM-E-260w#ayo`|9$kXoDECYGCuTe#ffI_1CP z5S}rKDM*5$6{T^03%-Gkx%aBV)*MW7k?z`iZVKBbigBnZ1NvKk>2M2thndLvxTZ1> z8!87M+&j!fE4<5lM`eTn8M~kQql%MY5yEw^U|tEn*67h*(8o+OG%Z>!I2a7W6Fnia zromvb&A$EcFKH;SuirG+qmR<=tM0&zdKv3)QVa z<1!rm5MKvd%Z~^&KJ|xM&ba8}Gr{n}W%K3p8=axONBND*;ushVJ-&jKKLA#RpPms} zQ~}ICo^DCp=mP9=$?`_dm1v!G>MTolFobV9IWVy*7#^x94!$&g0c-X>D-DrNfYD#Y z4|M7I@~5ZhI;Y7(bXFvEpS9V8;X)&3wY+)=(DMBewK52xl^l$luE#GoHfVm#bcgSP z^IE3cK7rd=@witxUeIKJb3Zd{5sDo-7sb3J1p3OH*mvA+0G_=y8RMGt&y{JsW*)h=5m*30bWzLAatXum?0bV4AMsG!>`m|3o#+Jastat_(3Rt1PkEI+ zHiToIM*!D-y+Uy9E@L@8^9ovvb_74VQjIcfci((?7>eP2XXC%!3dMJwS|=JUZ9qCy zio_J8WCF+o9DlkOTSOmzZEh~6{EyfFgZDoi z|9!}RhX1@W3NSGW{B!Ig;t@&uz%!^+xj1Y6xvp2bl!~+DR~h);$GKE$GFj-T9Pl_xHe(Q2HE# zR+LcV)ljA(3+2&;Qq8*Kl#8NbP{kEYES*QCuzp_$C0q|kl`l6amCm!2Qk@>swlGL) z^(`vp=UZVcatS5gU$>;Z2r@z~eOt1lvP&vTBmlP-jgX&>wTbduMVK%khSQ&}lP{-6 zP)7JJ&X|56pYYU^f`9F^No#|7;4MfOZ zwq9a^{zLK_H$O@6N|U{cQ^X?KXQU4I9&%{!8tN?HWR97)5D`z=Nq9_t$&Iohb3Ur< zAs%<_BrK_k#5?z7B6PVZv2PRA>M_%NPWP`8+*>d-q} zX%Pf(P8^}l;=$tRBd;w8)bR*)T!vWXxJBTlp~Cl4Aile|Gk zNaqE6$+UCEWQLk9x%S5%(zKC}%%WZ)XLHraZR;G#`#lyUasLe2u~CssX0s(z_3o3Q zCnCrV^3LQ(r+m_za&}D)5uq3Fu7SZf_xfbOKx57Kpt3ok&HZlH`gWK zIXAIzC-E)kB#~BJl^c6GB*#bkDk0~!od{q1omg-;gSgWyPncS*B(5*%B_0MR5dGGB z$!FIWlZs34k({mvNN+PA(m3ZT*>7w}cG3Sve#zbBR?b;sap_&MzhMuVnCMF;uh1cj z+H%O_$#!IDMH6Wu8A2ML`9YrAkVP)tw}v{r&RXJ9$N|Fhl?V}Yi9$WE-!HKs%!bI? zewL8n+)n+=`0C=(I$Hc*>)8A-HIju9_d_Yr#pbBKVjG{WWmQDW?&5b-M~ zhhSTqK==hLCLcsFlNnZGq)7Wta#!dQa=!Z{A?dZB>|V)FW;(Hx&o7J;9>3~{L#%GZ zhvpNN@@<6Y#|T2_>3Sl$vX@%S{hs*N8c4`^ z%_p4BzM~#7iKKo$Vnhw+u_bb!n-aqdW2rga##El{9%|fRE;Wc=TSx*olGj!-k<((; z#Hz=RgtOskqC=IHywFljSlNdYnW+~D))sD(-Sj0Kia zO!1J$-Qwi6{2F4d`$Dpy;~Y_U*^laT?TSRlT0x?5^-^N$eH4|CWDXuM@W0u^`!No31Td=oG?nKkTxl+NlC7!3Qoh>3u_%l>8h>-Wt3|nzE7Pk0@6%NUW8ViH#w}StrThDI|qvTS@m`Q^Sa`6l)XnJ=wI z^1pgcj(R;MA4dC;Y@{nGm@Q0oe$*{7a>a&7S5zj{1a?xZ3O60h+ayIqvq%z)AD^J| z-zt`1TP96tIqfHmVoZ!;e=C>b{wSAT|ITmmDn<#6{6<$UB{Gyt?lI5%#G4FJyIZD) zk1qIBD*xtc{^}(<_YY24E)76tYV75E1$XfU*U`a?BjtFUrnB=)mIL0%ktFQGtKoC4 zs8ywk6;%SY= zWtl9cyXoDmR*&kL?{UC${A-ne-fG5XG%wT^1%-T2QRWM)wUSKGTAByae?CU-5g{j$W<{)Sb2Si^C%7sUuygW($ z^w1Bp&27dn^^~JPnULSYJ2r4Upypt=bT!Pz$EwbZ1fj96TmK{DW;|+8e1WO{73kkk zs+{%i07r9ST%obdk32D29y@v zPr_26o{7d(AN(<>uPgp70i>mzzrTr#2vWbZ8Lw_cVA$3GqB z>S(UU^ZdT%PInVgZCQ9m$^$?AP<8qNulj3!)s7#f0poWNd^6D&rEUmyaZHXmx&J8-T;T+Nphz?iv=OEOOG9wv}3HVmsj}7 zSR5HYb+1{n2q>qI$@^Z3fQfx?V=CfG(Q9*8U9xX9swE@~Y9~+rb z2A*UBcm#N1|myDt!)`Y(U}qIGH{$wES6B?FZNHU;r{Y|kb%loUDbp%k zRg92XN#97me+sTWcyKMxhrrE?Fgls94>t|o{8%AX2p6qBWJZbnfHmVUoGyK$f2HKo zV7_@^7*n^{9Etl@0^YLcQ(V2K>DBn9c4@rTglMnA?c3wS;elD>_hbH6Xb{H|JG%85 zS{kTnOta|0tS{F^Ir`rAlsBC>^jWO2Zja1Y;g|1mXI4j(U)5u<(&mVZachA^{A_-P zHm`8Uht{(^hq~}uh?d=qoHK06pn1+)LVps!%!l>a3V(ci!f#T5){jZzpJ<}%0-!nj z2TBr@E=;VJiBs_f3&V|AgD77F>~h9}nu1ZZcJyKS8LDni)%Z z2C8>FT>rgt5Vv3WBD;b&1vI5b#W|Z>VA<*;*9WrbHGl5(9O}~U#+w7XwwP(Af#&=x zZx#1_f()Kzkq#&6-9mae4w_vVM#D(%dz#*vFtmU3kg!GH@5ayLU9$N&rJUxN7dDPc z^G+7l*5v|wLhwa(y(2Ns7eIV|sbwq`& z3}x;gY>eCY1Haqn9TIB}hq3wUTOTOALYl}^uQAiZc%x9bdCxa*oIlyJfj=c29*1jv zSu~<32j_kQ=kTFxHm8b+lPYoEvDgGsaNR96ejTEjS(<~nOzGq_Cd#S z&*IQ4p}?9UI6rEr3{CDoyXkCk2U0mEUq6zlfo~j7tEVJ_!QV&C&1bv`Th+HIHg{XW zwnn})HS;^c(=}nGHorG~U_ZF`xI`EFYMc=bnQ{l^#`v^d55K}uuN?;uq}alC+G3$g z(@hwo!(mC2r+2^Gxg(_QbSETfj@nns7(goDqE98#U3g#~=cpB53~2Wc1=HC3fij+Q zWUIR+hRXR(2rO?#t6Bq=S&n3AwTO>;FWLnSi(jwXuN4eOKVjWL({R+eVEfbWcra9} zJUwQ_R|>R4V=r3!g5c6Im82QIO58~;{JQC2DBKN+Gk<-j4W2km-Je?R2hK~={8o&I zW2drS7)?3~&VATixw)+jwy)q4xO~_bUM)GQa4fO_de;^1UU@YJhL!!^_1#Lw9p!l! z59$ZNGYvn7OOX-C_2m3^KYEu;(fk3EHz}16sMvhp_Ju!qXUQI{dO-jE%KO)!5{QGL zD6ga<>kjZ;H=(2-?FAOAnBI(CiNtp9LK-Y5Xos0wYnm?Y3z8eg3f#htHSWkS0%-vR6r)Et-_sVIR+}-q= zIqj^Pw%sH=5vw{R$1x1&)}BsW&3_HM+P+@N4y4zZTD|?@iUs)~J~_gyyrc)_>p5qy z++l$)>e@ulq{iXEz{$7QM+#tZiNw=ck#>-I=g6UZ$qKU~YqA&T=0VXZUkw(iB500i z?(1G$1I1Uq6u;Fo!`JyEo0d+*qe6}M%-yd=@Y0d_nYKQ?28-p1_%nN+ATiAQ)o@E5 zJU)L~FaJRa#Og|`@2qM7{|{%FBP5K`cRGIOssl-QCthK~*`O5s_GE9^Gtdprsb5A; z-F$$p@-@-42Q)Ch^nU(2nKCe+{rX^@N*nN=diwGFqi|$$x2d*veStT$R_N%`d@z3K zrMyFtJ4!yinN#i;j>auU&o@3Q0ik8|7U+V$*p8>&zS!2Hl!&n3`&3BVPs720Z7I-$qzVYXUsBvLp%s_wW9;5FxwTjhf; zn47sM`d&m2NH%8^~!6U-uJqqi}8s9!WPGay*rqhx|zfgL9oUCgGJ^yg}JgfUP2)*_r z=@C5zXspwvH<><(tO5td^+FQBfo0=?v1)p^nn#V73Kov#*a zZ+m4B^jao*y9Exfqt`G>n~ew*XtGD|YGzZ}llqXQDR;E;N&$pUaDFjWkHklhFY^i9 zqIc7Z&w5wuWC|i)Gs?~ME{5ulP0sKJ#o&W??@vE03`YfF-|GR3O`yb&(stgd0KQ+9 zPQK3?i`P#%x;;NtkJ6y8c23R$9tj_9kv&xp$lF?~b%2dbncAEvd%rc~it@OR ziYEMRdApAG;Ehvuf;w8Wjv36NY*lG`S*b{a? zkjDl#>V9H2mkL74tVj8x++Z+rIMD0b;f_}wj0?DZN-+3=L&*-;K-~32kT4ak1yj9d zFTQAR>~ns14rCfJGJ|)y|9WqfP#Qk&w1ckn9QbUTu{Rj4B+4)EwJ1R?$p^AJ$+o!6 zF|)jG<9itYAZ54tlGN?Tff;ZjFt{i+& zLjOLqS$=OHz3_2iZ?tJsd4viTMjke;#Ne;0b@B_}j5oP_1UM zUAvu%YYsnYs;74WT~I>UZCXhKCAkHkKHqu+2Q;>e+GQ)CyA=eg z@Ys6i+efz*!lpMYf>uFYK+Vov)^A6{D_W!9_=H=r>Y8&WUq%s}Vwx`Ry-iob2aXBa z#^zyshItrMZWXpe|C^ii`|{=$#cm>f-H^@lW_VgLA7e!X@~X^R@yDeP>X|>^fR;=9 z&hqJDh?LB-xU)7GuF=eE%QiaVSi={C%={>{c+{QYYVU-T${Q8W_5{J(cR`dG$r50X z9~Q0Zjlz|}S2HbqYH`oSTM``AAy91=#JOD09<3Hew7Kk!LwSQS0j9V}6kfGPL@t4@ z#N#9E>TJqTvOHWeBqk2kn69fSdsV=#xJ5-fd4fR5w&UI7lqQ^@303)?h{48%TVH<{ zYlrILX_Ec3C%8FXed~R;3`@u}p@->8i=Qr$y|im@K#Wa%SUDDP>zaR82*-qExD;%0r~Q=qFpaRs-|x)E-={_?ys_t=I!_Z+BXV{8_zrCVf62RMoXsW|B8WMy9bAgH?_f- zZ)Uyu(?neTEq(u|m0hSfbBp)Gj|7&bnB6$_2-xC-Rrtx9*93FR&%l()NVR6sC2 zkX?Tzeh}D|1M~`|Nj&;;rN&R+UVK!1drR$_d>A`@AY#UA43_N<;49skhX!LOZfnw& z!>^`e7hlk&|DPN7t5hj&7%EKa-6M1Iao^)HwJzIc)YJ+d-^fu4JHFcwtSRe-DziT8 zGi3$1L?Y!`vuhU)_4oA3Zz_WuE&A8{ZjM6eX*stGl@ZWG;f&SZ84W$V^LU<|jDm@K z(lj+rXW+f)w1Ve&BxLXLe6)YdZq!p|??@*Xjmc zIrg%}?tXYAoQ)SO&A6TdkM2rW-T9UV(nFDI!UDmtzqRn|+b5OquJf1MRBZ~q_U+yy zb(1AHnvfBg;}Z&o8oTv6-6Mb(z6d^OO9sK~c7*jg8r;~jb&mv@g!j~2UzTZk@dDgy-9WTQn;-sZe2Aa3-?n?mKh+NH~NGhMnDYp1y;@Wyti0NIEFdZ}RrE zw_ucHVwC*nQGUwrM_s8ZMrnW6lF}KWC8TrR6hM$4|YMjCU8#3l$ z#2NO#ii~j#oI8%;{~P|RdgZadVL|ey*QkGvW;Vo8KtI#3=JmV$1rS*s@iOa{eT?+GyiwW|3)^-H!&*sbL@q`kG)96sQAyZmoSt|(Y6)vaA6EM_{l(BQ7LHs2!b8B z1qOlMu0`Az6i-f3CTjA)DcTh{<8mm84Y{zQ*A_}rbSPy~+2Cw^A6iVUr0VL@;UHdt ze3^dYjCleS)9W0KaV3*wa*<#$Aqa+1Yw@^o0DN{{2wd40FvY_QwuVzF+3xPRt;!jS zSKOey^rOb7m~lnlXW}tfkxlW_%vWSvT!1d zi0uYBdo}#NH5=0Ivx5ekGoF!10Tu0PirFqIKBFs@mZTU`WQ%*y&oB~Rcl%4-wy>lK z1O$Mm&LXl-*I!Dl*9&GS*(7~^pQti(gm&GH$g?epTrg?__l9&)b#@g7t3QN6W?$6) ztcgxTS}?UNAAMy#Fp~b@kjIMNta}~eEq#8Vw@L*^)d3t?%|hSQCj`EdkKmYd5#?$j zD_r`11p;51QC2K0r99R>4h6R^O4-3(ioU84+$x+Qc7E5BQa;2AgEi@7t2Zlo{arC- ze)oFZxjlo-E;gcA`sg6X(bbqLX(!bd=7Gz4H1Va@B9iJ%-~nC_Jeib8)`<+_qct>C z>f}W8du$-2GKkNaFQ9u-IVGU?Ii+Bc8)_(ArNmtelu9lp@Z8szQk#}m63gTV@cE)o z#CNwSa&>ta`J^wLE*Ij)fe=odU$7h%hSkxa^#Yz;sD`7X&KP9vjBmfZLVI;8zLBRt zmg>#Ih^QVsW!{5t)qBvW)Wqn`p9`?;_XSw4V)XVuF2LW)r3!{}X@A|V+>fCKxqCde z5wFJN2>Gv9b63=lBi`rc5Vg5nMCLJ3BB!=A_w;-7TpGtpVk&nx zu|7D6c;22uyrb9pnK`b+?nLx?x8pYhJq${6kvA!zanq zRkMPUOeNMt?1i(0ekcoR8bv<^*;fgU`SOIBpEGrt{Wpn>K64Zqp`T9^OKZ_R-ZJwyI<#z55oSWKI4b_TD@!r|<0> zZZL#oN@fkxKm#d6b*2kU;pNsV4Mj3U zD_?^InTj!@xe|B1uahQ0zDEf&UCAYJB9EN)V48D#MY6D7qokGg#z zmRR~Gk&(-U;mcylIBV}j243GLP3wvoW*SzEr5B1BV+L-FPhVRZ91LfM(pnFac<~Y z&82u)evzJEStLk3n50RX61n6q#_=#&vc-Ot496NLvT+V8QEd4@s*gsJU1k2Psx|C-OnUq~TCH z;|LW<%I><7f|c^b>|8iQ?8r+}`Ti;KbY-SK*?uA7FU6_mOFY!ZcXO##0iA?rbq%SW z{eWnQa8i7=?d0o|qO9c5x0zSFc98BJ{3Lc(7I9RHC(4g@5rOM`WSu+*!*Y#BmibX7 zvQ2#@$(%mM;Pe>H(hEI9bketxi)?O;kE#n8#iMIU)1r@zLs2;luZHCe;|f#bx4$cw zD*vins`{PZs@08aX7U?dxl}t-x#V=f<*+cXE8bZua(30eGMbIW^ypc>*ARM5!ij(*@k|Vp5XG1!~~6#QmaT0?$a67K46>;aehZTJgrAz z;cNZ_zBTa}lbFJ-$vTMLBVSi*w&%golc`QCN+&^U zE`~Q}8b4-mG07Jz8|^53?Tp(czeLQ_iUiHFc^@6EqfxLTm-!3v!iGc4_XFx2k(256 zp0W9PD6F_+B9{@3rjO0lIZiT^AK02(b+l2J}$p4AB>I$d`IIu-XV{!Qo_Aj zH_Wdz5!^1?3Yk4Tvlixrp`!oJWBUto@scXL*p6?O=#%tAM#8!jg6HG5yL%$A@%FXT zE+5}vhUAMe#UUeH7JTQhaa9X&oz}RvJSQ5Z5}r{+qzxBtn~LfxHUp;X!X~eF_dv+N z+FSGG}n6u4^m3;F+jz>Y~OL;D7ntzwulmy5({8DVFErJ7ak`Tk>V1dSz5GrG)IuO#YSt~YTdnZu=59rywq#_zdn6^| zOD=xUtj+ZChy)Lc^ZUd4H=rIYC;vI*HKtm*{TiQshkNtN;&aBLAz)$VMb~|85LJ6N zU?;uOt82@?%T%Ea#T=KMJM=If?i3#Jx8wf;ZP`ipMT9a?S8M0`7hH8%vC!(x)fKNn zqE~*mu6{c_N&d#?)R&2NlJk6-)Z0)f&eJGuUK)5kIU`f=&Q?r)h&B*?xkSgTW} z4m(YZUq$$3LBW1Ob?^0^AhEXbkp5jnRhx00?8|M~wB5Obs;0nbS$<`PRX^<4p$0ec z=HT~0_K5`!Mhs=1e|q8h{I$jtPHDj)nb-AElTV zQJ{YKrn3Ts!42oy>U&2Dz#--lOU~z4aA#5aAzV%G5vOt*lSabfJ%8gbL9Ti@VJRKN zdMFkS7{0k4Jx~Zeh-X)$ev^3DWKFRIil(wbu(Z`~}SFTl%q-Gi-xsJq4$Q^)L5J z^#gU;K_inj98YiO{Cs)WH&l&0ckggr4oI8|7oM8>45|TaqTePHkon|xmkjFx^wYLF zXql4-<(#J!j})}QoHfnT!Lw5_;&VE?@4GH+9_tr05iJ1owQBla?A?%=FxAr+n}Lf$ zjV%;y1~KZ{&NK7g)7W>{O?%^aG@yKTpv~S-k|5OoLjPxCJB;j|aGp+�R!YsYg$> z@CLD5rot`QWp4c^ypamdO~nrepT5o;7VnmXOT! zRjPV^8l2qX-o&c+8e|!LEpc|~$XwHHXxvZ&Tdv!FRk)c(v*4Xh(7#=TkE#zmoMVxO zk)c0T-yD4pDI6DeGTWp=*Acw^%kBeS>_4#eo>>asJSenN^+P+%6U?a2+w~e!4I-2~ zl4;D2JpS?#lO(iV_$+G05}Iuzd_&g;MjJ(Yj#R8jEvUB{dh6x6`bm zJ1QSZL_(2M(&JINPq<_0@-&YR5xCmn(NnJ3JwOA$?&r`8f-e79%^&>zSaDRT>~qR< zq$1J{W%AnLyT*@+yDUzy6wOW_z1@jti^1nzvMbK-Im#};)B~+qsuL)>3yeH?6w(sE zqMGp9&o#SG*jvsUbRqt2y_MTyQOd_5{ zmxbe<^l*L7!{aoY)rE{h8^b|eV$N{=-T}I@M=97nHi?Go{a*U%-3Z2gO0{|~5D)o% z&q@UfXiPTNAttNY894Mhbo0~ZaXkB4CV#1C3Rv!Jo%8Sn-Tp(rzutpv^t~Tm`6=le zvWrbh#rkAIX2#|9I|X|{WH98G^qV}CyfN7}ojiyQ6SBd>ffV#|FwQNRZilaSsb9|1 zMCBeH$GSpOd(ifhtku5SY*;_!s3E1&1E#EwmwrAjMwaPi`(iYPQKlg1{vKU=+To+I zrHZFJ_~~A4NR;%)YP=?!{X7b1y*G4tRPq9pI=urAm=~h0*~P-VEdltU+xu4h&I;h$ zIMn-YO90HeWnz3exgM{@ZRh`V${XJ(`7fji?7(Dnp6FhG57-%QxToksA=)=4SX{g6 zfV=Lh6<-!FhqsZL7MgQhq1Pwo^s$%qI7#N-e{jhXw-=0_mO0Q0H>6TI)96Y@ty5ZE zyR-_?!2M^DUFCVGOLjTNqFn~PFO6#IADThT4^u6sn0lHu^{8yEzB$N;Nvu-d)e3vD zO3p|4G#~yS=gYEd@kkVCJh-?qsBs9YE>6k4WuYq#?#sM-`n(^DW-qrDX-VLR_xLj%z#W6TX4GHBsW0>Iw!!X)hw3W(L-YIJimFvSS_Hpb70YP=>L|x z&S2szti1NK+f5+~MPsJ>y6eAV^P&Z3Z*xb1>mA<%Th|YR-0khV1C5d~<+#(v>YOPw zufN(m(v%1;f!r~#X7__q#~jXk>I^I}x8V0O8$%T_wZqZTH0!b-cSN1QFsv6A*>BpL zgL0djS8RScjdqnvM4iU^`iQB0H%LStfZ5SFO*0 zUiL+6n4W%t$hjePm8F5u`eV!@(LWyzogS(XI|5g^2NDu#j61d#U8C0){;<>Pbl8fai#xaE(D%2~)~_R(Cu9Gq#2~aKKw9?%uT~(5zLPl8p<3&&3kEe%cgZ_x0N?2kmoV zzmormJ)O7{PG;OjLZLgea079wlBTYVxAAL_s-++b8Ln4Y!xRtuLr_Xu1%Ex ze43>zm2F+v8ybUBVB_)pb6-HpBWjIemmjc|bqD8Wdca#_W--N|MKGbQ$np4N2LuZ~ zOxIQOhs^Lx9~j|DaKe@vq-;Aao>pWqLBh%qIvrA$0T<+EM1>UF-ap%Xop(}% z9V-Zn-f|O50?;x-i`9F?6rSqvh{~G!!mQOP&jQw#!sEz=9Q|BD6FFDxEZPB`dAX80I=RUHVyOGhJbI@BoaDQi$$&jp`d9mz8)5cFwo~#$`KTt| zdVRs3ZnSk3bzT0Jf{3eU-dB~LRPq<3%-8uuEQtc0P-~^WNqf}H-jFMp>W>Ds(hd6~eZb!;f5oez zNI17$x!34+5xA-CnfkiJAD3U)5|@@)g}rgk%v-CYAw+R=IKNy2+$!hzAY>ARb&U#_ zQZ3q%k+CvZB>5HCCCshLb}a(C9tEqtXCkmR(B^SMZ#ACyFzZu`XFMe6Y}Lxy{0JxC zS1(z0DH^lQYj;~+bj5`O0se*N@xZx$+fGi&Qp|royDz{c8ns>>NZ<>sga&SN0p1I* zK|moz+^$<8z{r4~GnLR9->h&bd7yJw;&j;I{c|y-1Y2dUaYX zr4G~lY8>uM#sg3DR(naAhsb8zakrA~C9WJ@C3+_=8I8ZVj9Ku|^9=FcvpwIJ;OV>D zFQ2M@iPguYoMi`T_HQuV^R8;a`VeWR!^7&%VFUc%lejWs{ae!}ZatA=Od(YS@R zPE?_C7;o%VTXa@44Wgn&c3X2y!pa|`RZ{^;xP6|{=2Ii1*j5L8{jU(pm7`2cH~)e@ zb4NkZ&(t<9dN-d6ukM91sN$dsX}PF-Ji_M3*IN zL@vfzF1F!a9f-zeQ~kQ)?_fOIkXxJ1Y2BtT1(NbTOnnB72_0kNtub zOmiYZSSO%SUiU2oQIU=hGQ8+&u?9kA@GzmJ*j6qT%EQ^NNLg15ih` zE?=zVDQJ(rWgJ-;11%gWbItC*h52b-ld_}!_{Cb`yYJCzG=67lT=&;he*N#Mt_F4E z_kXP=HO^En{Z|W7GfP(gD*ud|(P!jL?2JAm|FqBOGxE>4KkYOAKjlo^jQl(QjD05V zPdVfNC;4Z)8U4?8e`o(Q?oa*Cc7N)B7e5pCKb3#>H>3YuKY#lF)Mwhw*#Ak+#Q!O0 z+Wl$&JAEepPdO9+clH_o|5VQO_up;zck%yZ|I4KNz=}8n^$I*gAeEwoY~9Pk$x0uD`Ebn#qmyiT=ym|M{NK%tKbsnN|MlH_et-8~b>lC8efPeZ%B3~e$7DYj{Aely z$;{(;+vbDZ*bW9JbGe|Nq?_EZcOO3N$wE0oWXZz5wEdb&2F7ke)S}l=em;Wo z*U*4pQ7<9Zb|G#s+YMf$G|kS6BRFc$1zTeL;5d&9`m21Av)JYaUE?(5^*lGZwQY}} zI-no*$ERc$ncs%HHm@LcZz}n)L<`)WgoCGII5q7n3wK%qKq!F|TO3wHxVS4+Mx8;A zm<~Cui}t{~$s4DxJIVo?!WVi2O82cG*+%KJkS{rkhlSXv%M*J*bz&`uFUz7Nc(~z( z^q`#m1#TSj8IoIT>Myq_R0p?dhsrfOf0LaR?Sn}s^W_$%#SvSZ91M(eBCpp3QPw*L zPQu~qZWSi?I!c~8p z!FsK6_o@vCh~OGYQfiz_(o~YkG2unjI;Ppw=nfgm*Fl_m_3;X|DfuMT;p9mD5U`}y z%@JfYD-UGF4)Btn3{G;mFoYEC45t~^hKcpzA4EvqlPI0`CC{R|87r?fGtT=xBPDKb zq@<>U(1hTmlG~9iPkBuA%EuVrOB@-_?apK#u^}hF%q5qEj2I7dU5O!+6)7<3CFj~3 zh}d~wvZv9A?EE@Nimhu&f>#7Fczl6;Wt$=rWmROXDvdlpews{Z*^()@ye#FWGNKry zONQ(hQ$>0Wq}@c0dNRI>iV;1>sHyYG+U9nMh=ois?x&hDOtDM<&j{8O8w7{C7wtfdg_#Hcq4OGb3b)cA3TyZA?KQ>SqRo_VI;Z&0Jz@8L3 zuBFc_A9X zV3pw~Y6=%+Og26te9;C(>Ld&G@pdy|8<3~`ver@e`R`LQ3l33FD`_UPT{NQ@lQDH+ zffALsQ;51K#6fKtyFxki%2C@#+$jyS8`K5iBuZx2Qz|CVl}dVZg(~w*q=aufQ=J#w zs4-1FN}T61Wr^D;aaj?Hsh^3`*>;=yoVkOdawQq^{+qM1k_3pdsSKHQA%cWfr<3Q; zmy@D2F;b`ff_!UEB)f1P(YNL$?j3%_tm!4W`i_O{Jjg}P`}h!zng}As-A}|4ei4;9 zyi|GPCd#8lg}UQ?j2f3WrCi+2sWUxKsa4`GRBvo3wfRg46)aVfrP}r}OM3AQvSGG3 z8C(C0+*K!}X`=%5vT-rm|6DgC+Fe|1edWx`KLC z@R(Zp=m0gkFp83wH>c9G9H@QH`>EX`npEZ%F6!Fe_0&9-JhFO$9<|PQ6}4V)8+9zH zmh|8DXMBIj%t$f2O7v>hkUk+M>WW7S*}M7PxY!~7Nu)c4UJ=V@BBW>O-qp) znPK7+URicd5W}C8_UYU}GWAfyn#u3JI!L|%`j|Ze7Vh=GMF*W}7 zyK-stugazGzw_Iey7Bl-exoawCT1#^x(CvhOspybkDH^tNyq!4c_GiKh*1hh6Wktt zcsYt}rpl`Y9+$xDn%tHTs>8rOc**_Va4y>3{q%C>mq{#H9LmIgs~kpW7jO1A?1u-e ztJ*pC6rn)b!AD$k$55!v?oh0<2`ttqZIe704g>ttd8w1(DF0P~SK(YJZr0F9{}^`* zPWrXK8+OgZ+a>erpztk?kr>C9j61(Y@3*n8@d|jc~eJ=9w z-Jn+RHF0;|eds1`g`jO0G~3WeHno6T=sE5Wsq4%0?g0Z%-k)|2&(Pj9W+ZNN9eQo? z=KS%*0bQ>QGSWKe${e2c_wLQ|#^_46vtKmxFrXz(sm|jP%IvM!Q&Cw0{sWIzm9F(e z8M)ZY3!G`zE)BC&&3z}K!RURz-NQ8eb(!N#x0EklEoj-XFrWe|f2zyP36R8n+ZL=l z?%seZdCCbJ)I3mmbnn{9(k8f|&M-T6&Jsm$*E#V;eL_i_E~msgJDl`=XI9JK2{9|< z-*hkVLc7DQ)9R}SP)%X>&_x$bcp~bj_M(c$#E~=p^=TpuRgZ5`NdEQ-FZZuq*@5(J z#%vz;l+B&M*^`|6h$$Aw_n#|!nK6Ke^>j|8?e~VUr#nv<8TP`gH!P1cmnCC{@b0_Q-i>+-|Z94$Ux3#F)OtjC(w7*ZoP-Daqv)F@+_Cd2%L7% ztK7?+gGLX>ZmP2ULZR_PQSOgY;LWd)MFBLXP0-4;2Hmj&JgzvH=stG}^%E59-sWdQ zm@~&hcBc_AucFc(tCXUE())Q;vP{`}IezED*MtG5dg_nG@oq@<>6gC^al;_wu>?JPrv-Y?%id1q8$bGR4oR0%^o3%@+c4TKX1 zmRQFwZ^WDD2UOO^SYgoTTWanP+d$~3Kx@)f8gr#^-a+O49k~C_iP5S43-GqiFR;0= z7sh1^4vz8LLfm}K$6GiXQO2zD_zz2W&>2_QC1ThHE^jo&LW57kRSTcH&vum2D@Gnq zuSEw!LjQ_gns${Cvw3M?)NEZ8GU%iV&%5D+&d>HP6Tz@}6{dFI_r-o)iyfZdFQZ#= zaoF8}Lhwzt%~zwLHWRk_Yaj8e!F*AsBhkHzkjdFAk(XN!g-zNX1)H9O_>Nfzir%)P z>tVl(AvY~ScA-Grt+nk?wXel6md6#Y@Mqq8*Zu`LrYBIK${mWurY@dOuLsM4RX%yM z_u<3$Z3*}2N|WwK5rS%0{GnSsCNpJqJ7nzD-Eytp9^bzg-v2G*3;NB^wN`u>3MGAS zmpwSu4JQY8>UT*6;$A(k==1)=D6*lb@{r0);5_Wpa*(?nF2sii+pxYwF>VL`R6lI1oql7u zG9ACn=8UzNH;&Iel|S*G%7BIDVOqCLyJ7oj`&Ca%5M%AGP)!XZsQJM`sX~#!kB9{k z%oNSOV=E`Ge#yk-ImX7e>$uE>Gz#6?@UY=@zjyWo)sR}s4B_Iftm`H4aM zhDW*^-C*wx($}jQ0qc9bWkl{pgLkf4$b$2JIQo2``!%{l&m+f=yUi-#>QJUp*p6t} z_wsu7ZLJ!#+Nt>1^Mwzn*=&mP3k-(?Y3HADaYq2tph3a;3%8*)x4UKRzAr3mbKYtl zPVc!!L1ul?2w1HoST){X1JfJ$CE=tO)FwIjrc8%{mC{Z|$C+qQTlea!a6Zk>!F3r0 z#l2xcUp1#QPX%m0XyKQY91Z+xJ-XsFM$fR|o;YFV0N^uKK3Hej1ln_}BTh@w>?DOT zS^g2N__KrM@j4olWc{S9klsWGtUvsy@%_nUHUbW!C>kC9QRvFPNG_O~>zRSgH0~!_V5ka7?+*WKSQU@wsXXDn3 zslm=&8w$3mN5Rtbt;Y{aguza!o8kM662MyWT4V=x4qntg7XI-)7oQC;TpvVZT#4Rx z)*TA0hj!n2#c9x~|=uK$VRN(HM6$v`mcy&gDB4PT4ki^X@3e3hg*G4dz zf%@C)9a>=(!0`K|>F^;6&T{6@JGC(%73^L_*{VK*;$Op6ym?Kq(I}^A7$RZKa~%>} z-h_wVSa>@+xPxrauU$rWDuDNcNb$ZmG}{#MZI;r?Mh8n%=YXp|@U3Kg-r4kUaCycP zd)p!ymY(Vyd*oUOk>@H?=Ir+c$58KJ9{)Vt{Nt%s%%MPd;N|#eABRaae19kK(IdVNRrNP9YUoPS%1>-^`=&dB>jN1423`O+ft9S1m{hgt++En%AmovA=k>@1E7KAaYL> zui?ix@I8ocY48FcaLLrDdfDiWPG@|$Iv>P?fw#k*5#1&@Qnseo!qpoRm_jsHQ8mb; zG&^t6U?L0-ohpgx?|}Xr)7;|nuHf=@>bf0M2llQSWq!bu0&}k3P~_j-3+AsrAANG# z6sD6_?ms3$uWWZXz1LF9fTKQ=a|Qc4fP2<2hkf=>R2-1?8_Rd zIU>C<|GA|5rwJdtWZb&zhf*JM+~4E0_%H?ke88XU%n zmx5dO4d%k=+C2|XZ0m&}9X5B7+5{}O{Wb60a38L?S9s?mTLDx^o;j=}GYD$QH_pGi zm4e;ebH}~!jv)WKZ%3Phi=eqItl{3d5s>@vMM^p`3r#NDTw{GUj`y@33NK`p!o!p! zmsD-OLr$!4tbJh)+Dcz9Nx>hO9(%IfMxz4GJuLm{5IF(smp&0;?aWuZNK2jgHD(L+&S;JK7mWmRmCmoind$ItGCDuU4+tQAI_~wDaBRWHaJF{ zae{m_c+)0S2@)OeX4~D@#x39W)sL@s#b8ox3>RsAkhmV`|v*g{NJz@P?hrZf})_+TcU6 zA+ze9C%Bl8wcAXh0|y(ID=c^r3~M7QR_Z%^f$UEimOVXwc<{opWgezIxN`9vm8L^6 zFzZQ5N^0ORH2#>=q~I5hr7sjzdT9(n5veb=g=>;wv~Y4YEFIR%~DY7U_Y~3X^fsGl5*vk zL-ThDXYKXXi9`>4^yHcWT{*JhweUStJ7~!aZ8945K%0n1JFN37@bjEuqXCHkT=*dF zr_RS}5L8|s8Q12EYg1%1?HZc!_*(W1dton3n$(NiY)fN;dMo5z)}`5abw(S6g*q|Z zJx73Y#MThjS?^7|&7bjxf`I28p#TH2={ zy0=5By4E5AHyVRfS77i6jnQGfZ^z=%ZW`msujk$OvUW5{Y)~Zh{lRt&8_d4Y2{yY^ zHkb58p#H>Z%i|CFu%{Tf?HjZG5vZO04O^2p+ zH-r^$4Z^7nBFkt{BAgX+-ex1;IEsaQm}PN-!1=<;`)WQTAY@rt*U?OnFS31>@~$7K z_DCC=-sC`n&5`+&Sv1?`-IRL&fIOO=u{fqpnmIe>-c5^z#sy$v&K$BKYy=i>n17;x zy977x7G9R(`4bmbRjfY6=Y2tP^R58rCJow&>to6|K%v-q8{^7uCnRRdSh zH7N)PN)~HdpDPB#<`;(QCQkV2(z3aWzf_^9-?m`(l}~VCsPs^Jd;^GU&bv7K zmOC=VW}J1sMc*H${0wOHGDn$>VkSmD?Xau=mca!jn(cOv>ij&`K72JL{l1y?7W{H~ z*}kc<8?NxYAByyiLe7RawHNA#F*k`%Hb=-4#2wc5tqN`j>#h8`M~>5&X=c~P{oeMW zQ5%!)1IHlXh+Y4-sjC})S!F)CGMI{SmD}>g3`g+LC)rA4>nOOh;Cy%i&7Q?~{GF|t z5W(98{5uXhjpO}q3M#5{iSR;jQ-62nFm%-%z5C267uPDXe$dJNfy#NaDIna}>BnGk{B_u`5213Y!Q*Gxz z`=DhaWG-6#5I?W4kiE(E670T5S_G7r!<#qtw)~5H@!g@mjSGe>F>JF}0 zmebt`TasrrtU2=nImMdKw%qE#58IL&cD+x4%TC2Nws)0-qks69?Cx;PmcMO0m;QZC zTP@`z#GC|~4`OfF(QJsKUq=0}CAr-4ps|97p~y^w91@!mb^HR{aW(dp0p4L5o$dw%8TTVAvOOas&|o+uMv=!>edC|qgLfi6~t z9=pA)Fs?xF_`&ibcz4C>NMv3JW?XsLDVAG{D||&>>JHsRqW}+u*$gZ6y8BVSZ+QeZ z*!`@nlZ(I{+(pk>nd^?%M#40m1Sir)Z3;@eOsKc)RepvPwysj2m!G&?CjI9b|#;y6C4AdWTTw zN6(dmxHwQ)B7-{+{4}en8b$-iB?lJgEy%(NiKfP_8J{te-$G*D>`<8Av~u?xlY&gFJS8R)Qk(9E91M9=@U0PlP#*?PTQ-4k6 zPye3k`k`+8^RKm}Uw>9E%`9Ba$Uj%SX6%0_XX5|#&)8?=jQ^kX|J%6#Zof1A|C9W) zzrTzBGwx4)rrqD!XZ$nr|2FQ=cK@W$eBO+liJy^w+W*x5jGOWQzm3y|5VQOJ0oY>&*(F9#y%tew9n{& zC;zAIOn$Gro0>4w$C>nI!a^%_hBINMrMk&1`Z$y4OlJRfvD+&o%$oV2f7i17$Cdc^ zb@=yn_Sb4h8`l(-X7>X2 zFjc{)bBY+!>j5ifzkzLc?UD7GJvd7gL!omrR)^mL#(_L|vw+^ZqIC|2QyKKc_kM6> zSB41QG|)dKNaMTkPQ37VqX@U&eU249br%M;Lr?erS^7fum5y>=yNyP5)?5(~(y(}HkQ zFahFL*;0oNGlL*Tz>v^1b?1J(+?;&@a7Aq|`fS@O$J^=xo8~y6g+LwSJlI2k$ZHf4 zdO%Up_d%qs1Lq%~rY_6sfyWmo*sHw<59;oR{XzFZ_26SX!4LooUOmtjrysOBn80|%Z{RQY$L;RRrEQpr6E;v68+^Q_B;UnA9tw1x14gRZ5Kgf z!zA@Xn2k6d+z)MS3aEO=j@r6K08~5eaK)Ti82X$El5-Q$e)C@3o{=oK?OYo+;vW6>35Dd1||^95SbO zQAr#_ly%c1?Bw2oY=;1yX2oOw3TrGbx5Irmnvu^t7FS=2LrL8jBv%`8BCrux-fBdX zrSu>(HCaX<{~D5{O`Mo&Bt9Rajzqkw?QJM zecY0o6!4^$N*tjqn}rw^HH}$wo7WMKKt8fiG?)n+T$9lu16dt|MYi+sFoq@GQ** z(z_{-r^GHtk#v7!qkg>3CmdM^sIwu{_HunZzuna_np<5yl>>J$^6s zOPPa$aT{vG+YDwb5)r))6gnve%TGXijEjl{{6=a*Vn`e%{|fGt5m8H8+aXzm%b>-Y%kS z2fImlT{2Oe-cBqxuOL$TOBi#R-)1`c%92{I6=Zv%IOCzTtaR!-E%M7wj>PEfW2E(_ zWlp_UB$U-=lCra%oOqo@!8Nh?@|HJv)f};ZVj!y=PeoOBa#R z2Lp^K*(}1PA3^-Da#Oi}tkk!wkwnekmy}3lGhXpJG7cCAkZZhd9+vwWct#n1$-uxX@=g#lMmUlMAV;e%8EFy{H#o3g=x?-~6#Q|#QE*JHk-HxjHwvW1d zG=lne{|+!=i^OXWb3B12ji$GS2Y;}O3$Rvm|K!Xay!YPtOpE({QbLDh}#mSS<<9G za4ywzEsxyKbs!v)l0-ycKBdc*N18>BP{uKw)Q+7Flz5dgH4%J_Qq$(7#?756c(ji? z!W>5>xZb5Qr7Nf_TRka`NRO;@x2>|=TRn*aw>~k5UPRSNHj?Y@USzJXK2eS2Wt0|S zmZ)tY5s|z}Mqe+dN{_S>2ljCC(&{#u^?o&_qx^~VXX#KsLpM^~wY-!_eh=|%$R{>K z7Q~6MnK~vlL?q7NqTbT%bUp?wdcqG9P3;C~45ETw)Da# zmLT(`AfD;H>V`y)qbYu zVoKjp#UtfRnsP2(hM@d$KE15inHa6;{4Nud-#mDxr#^)S$xkR@odP({zi86h`a3+- z7btEm$-y$+J@OX7l${fHZ|A1TVp!0>E?XyJ0oEcf}^Wmb|`K&^j#^C zS50qdH|6mW3j-ZagNmx)5G=bQOlYM=vjZk7#wWAsTx1K6O*;Sw8SrqhA5%q zWihV#u!x6^(X%fBuSe7$4L@6lsT$La!Soo2x+46=m!?W#FrthWETsW6y64Z&ZA`{% zK}?!b=VO4kv_W_Q_Zw7OCMKM0mV~h(lWjjeih;#$VnGFa6maT|jB>9G!m%sK?UVNs z@q@m`c*JR>r$+~vFK0%=?4_m)qdt~n^}M-e_G^={U($b|=W8yEL0A2*YvFJ(=_D6Z zV}WT+<5)FA$gA2#n#YXski%S$hE2LmQIl3CBoaf=F4baCN;LoYKy*>0q& zFy=Y%-G)P9j7DT`HlAiZ`wAu6OO%a-8z6*Hn)hI>CrlQd&oI<0M~(1b*G^hS(-ql= zpPZ0Mgs1&y6Lz|~fcT)ui@MN8Y!ToYe>zs)D1+`18;?q0c}b-x|@Oqy@f z;;Vto{ObFA$AakP4MEnbIpw%i!_@L4y}ZSwyzeZF>t$FxX_F~06^e&OxtVw$He$h~ z^)LJSn|Nx&rDqGOs^Lb&!GtF!k=S_p+-G1Z$FT0ZhP)GMz|>uPs;65RyClN(L=vL0 zNAt~;oJAuR7s5H`ZZ)`>BPn--R73v4LJ5tpukc$S+t?1)axDGvQ~3S%`@nR^Cw`<% z7UEP=T5XmmVC%h2lLfjyC{|p#=7Riv2+}oI<8`57OBSb_QIZL$T~P4+)x2_aF|9NR z`hFCcZrVKejJ*jiM(hm*bkK%~nA#O=w1zU2>tpw_(-`(IFkTBy4jZGZ3|+@8J3`bLQ+Y!J{8iSfqbe z!~2+nI$IN8<9g$s(#Q1mYhBN`!{yvbVA?RXQSgT@Fyzj;id3dxLR4LP*)< zhPoHQ*r`YkEM?B^}DTN95c{f%zRG&5g!bxk)A5xrg2{{Re!${SB(oRo%l+kGtfu) z2_N%V0q9(M%(8K{7hdrfW0CyWjzJA?Tt614WA~TAz9j|Ts5jnLqt6wL#V+fsvkhx7 z{DJc=mCxxYzvxig!S*t|f4=&agG3nW$S=vzmGeUV#(mGLXJw+xa%Dc&R48g zYhn1UL5j1as}?2O-#T~lW}<%E#c!gd44HWMTKnhGfD1MUmiqNa;z}V&raQhV$hdx2 z-ny~^RqtPxzIiAP>^__8?Y&$B6`je)S>B~$e}rGc%HbyT;j@(UPfh^0px_O1;T@ni z-7I}#Ne0SrJytf9>qP%=xpNLQB*Q6_=IcAczk(6xylgvF2I_sRxR7Goi?3^nG@f~- zL)&4Mk0Y%k;KnY;qph5cx1G_FU2hOKHuM~yGeSXZW{`rxtO_YA8mweg4)8?J=T}YdlTBn$L<)t+fIh4Qv*&G#jC|lpm8^;Tzx)+udo{ zD1mHTK3vXjk(ezV)m&+{2jq{{EW3Fv9<$GyZ%HpdhV0s%6=BVE`?=aL%TttTsw%hb z2ZP^$Z}nNpme+Ui$T97}d3{ajuF*a6f!he4#z$mt$!`VARkJw7q@Ut$(YyQj>Ca~@ zcQzV$^aN_J&pZ4f_YI^-EQpt1=s_?4@u(=%-+zC$IZQuthwa+>n@^0ig4JH9xCka z>7H%Pxck{vgDqxnWhk2KKK9SHUbXn#nQbh!^A-BVcV(>mR0mpGj_13E{K42oJzGYp z8+n`b!fv{xvH>q*r8$Ch3UO!cTq@lr?PZ)4jfd<(eRpifgdzCc?lNP zAithg%)GSA$TagbWZ*v4vh_5l@y0X$Nbj{8zy8o0iyGa1HYI3d2kSu}vClbBQqU`V zr_>)muIiSS%&Nsh+l%*m9_0uA@29MIIHK@->*ClB;b3ems4qNgNVVY0&$# z>ELVS?$Igf4p?|bGqD^(P%Gnl@5B6ZEV9_UGve+8P&oQTF7SRSh`cK7oR=1cf(v7d z@i z3+-wTiA5Fv(|$4*)c554T-ojvKd6-y=sA*+h{fiI8&|K3!{ps+E7X_eLa~|2HIpm8 zpt#IYBtyL(Rbqam*Sv_q+WYgbI5tP3%J=5jV?yrGIC#S9lV~)q^9b5IzAP4>Wx6eV z^*skf&cB`fCFlm3QzEeu_H|TCE8^7=r)X?r@L4AGtq$^Z+0(0E*+FBZab)TkwK`*0 zK0ke71a5kLdhs>UP8i~yo{)>zM4sg=svtUu=}EWznC*h_v7yl5&9OS*P+WU?!FEp! z_#iY@*wKw^4-bEKnCFLUx4PbVJDCGk_M1Zbhl224S7p>yNZiC$@Uw~{ZS@W|OXo*d2;ycV}S@pXwOR_}3RcZn~@&>fQ5H#MR$ zNl3S!>2@)g%0K-M$$rQ*5pZw%egv`|Enn%NN_{UR=SX>vWnu4{hl^!>{gFX4^vf1& zsui1vpnIuF6e{aE_Qm;9yAlWYeN^ZO1cssmTzO}YqM+Q`&_N+rpl2n8nZ;Pcvu0_g z53PZK`o~RfauwsTc?P?-X}W@#q|U-(-x`Q#i&~)ASZ};KsvwjHQ7v)6_0I+}a3Q zQ3BUCc(?)olc|KH_)e_UTW9d$YACFceBb;rrVCiq_O47}F@vN_H|3(*`mmfP&$dOC zYV8#pu-vVo5qOAww^Z8gar?m8Dmg={fU99cZ=7;GBxmah-ihr3x#hM~#%sK=*=F(7 z<-R_wQ+9zR5pRIQvM%RK8%1lVEWZ>U9g0J#EAxl)I#JTlc+ri}WH?@|B)BE13t}Wg zToqKKk>jEJ&M4b{90=EN+8dk(ayG6f=8ralt1p;_u6To*4aLS)QC(P*C*ZmzCbPp8E#2l{KXlaimOYwF@`Ua zqQ&l#{}u8UL|2&BUb;{W2ZS^hD)pqIYUQuE;6n{)wO4$#LqsMV_0inKFWv+kW-|Tu z^mIHR>B(SJ`2m&U_5{hq(jZ%gYrTC;G5lB*`i@yD3q74z<-hy={{_xsj^_2?pmZO*NW_5Zk4Td`AF=pr%;th|n+csk)GRy=g z-WqcU`hb)1x1KDheEQ(@?nN$8&}DiST1m|GQ2g3hU4tVX&Tb#SQLSH@$Nk#B7hsh} z>e}^Ubj;Y?ClI0@k9X{F*poF2TDXKe_?sFqBfHl&;|d-5Hb={Ej4y&ocHv{YsCxbt z@{>J^YrAk;+v&h;I#M~!39XR6h)E&Y83-4H94ibcXzzhGPGS{+`Sm#7{x8t-A% z4}mI=VhUQlkv}$+dDZIASh?ouaC`L=tWWIgYHa9)ekMnm-C?0+BcoxNGD; z`3R6%bIm5XRsC?RsMe5KH6AZ%F%=h{9Kis&V^SWCu0XB2R)WC2>$j@)k`z>{8mwFaH^Uz&2l2Dnvb{B)_HcjPvWT{y$;_2 zItbnz`7D|I1H`YioDlk6gw3UCN7&P+v9V(EBfnifyvdQsX`5jt?)4De=L;o3MBui?%c5T|>E(55gfg+>RERT?frk zzTf-N84{h@>Prv0<7oV4b%V($XjGhPw%m{apH>|I0SlaA{F&qA`*Wq3>ioX9&^sDF zCA3++e$fCcMNLWrj@rP1ZxIFO_ctS-qpMcL<#^cIq;$+b`~&s>B&!PlWhX$b?8jz1 zhf-|cZl!IpB?%6l@89*D-UMSCIqHlK>LXjW!!f<*o;a17Fu(gi3Iwd0ckR)~D5~bP!bDwQ*8~x@_RAGFF)`f<;C_ck4khVUb7w)pvY)@A)_1@t7pujYhn^VY!yj24 z)Qt4PH@aMJ2vCbm=6&^mT5oDt8j#8qh+bcxmTdOx#t~81Dvk*{*lBNBdC6=9j5bh< zS;j`Zl>D>ezw!6_7Pa`rN1?LOCQAN!2(|gQYt?_T2dF>)Z(nombM@Ilv-Vm1n|;>* zH=H|eu0D6%T>sznS^F&h({X0~v+;BBZ~nRVf71WE$DKXiT>RT{=ElwHbL0PA{h!9q z_0QF3&ogVEi?jY&{3rix+${c6{J-m;d%RhFF8=L!v-a8eS$!7g+Gq8-I5&RQKWqOR z&W)SZXK{Ai-}Jfm+4xzU_5Zs#J8mxicgN4h8*QPEWNfsRl7CX&HcI)O>i8*CHrh_f zKdElVZ`Xf_Qq?>q2+{yZ?+NhL31qg|Bzlj?T=&SzAr+e4wU z(OydaNp%9hUH_9P^G}0}oiV*iedhig$8=HXkaT5?75reDN_d!y?5QO*a2Sr;WkS;d_hQJ-* z5v~WqgdtVWodytg}n%vwJ~ipbBygPDxD==)A|SiT#Vitj{O-ODKX@G?Tq1-#_sgpV7X zaPpQT3QR{~S78(yc|@Zly%c+?M_nHki!nKG2)Anvp~SNxER9=ApU7HDFCJe?Hxi*H znX!>5CI3uGvETEKnvw@8R5lW)=Qqp5W+O7207kmNn`#dM6=#) zLL^ym6CY-t(2Qpa_Nd7FM^jec53q)uEQ%Fg-`5w-6K4Xa7ov8t2A;!7?>Pi!6`Z&h^ zleVudg!bw5HDctsFLCF2I4#V|iI%O*PwbakL0s#Mr4=zerv4&_pWTaT$+Aaj?dQdb zPb?zDSB7C?-J(K5X%`R&q<~nleT2~PDk08YmL;mQWQiH;N?Po&4NZ685>d%>iAa51 zMce(>ly-1Pm@xL6M+BNh6P_DF33l&a#F+CmvF_bYa^)F*GS~7vx#7lXa&YlJlF4}^ z>2_)CkJDQE2bqx(=#4|@#`7l@zCq6 zNAGO1=AB@&Se|dl~F`vw0$lEO5 zde4n8vQi)LV%e51C-OiG1=Xl9gscht#$QI)G2X`XloepuuoQY=bxRpi+U!siH zh*(>8n3k6>Leo94l#u9^C(4b?X(tyO(l~AA6S1a8h$9aeX-xA|GaZCwh*v(x3A6pZ zS!VNDvN(0`5MH;o5v?m{h|zl)L{`WlvLbrffV> z94AFixi2P5JSPd?`dGq5)tca}n5I2=ww)N--kEjC$BSst;UJV`myuN$l8Nt1aRmQ@ zZ3O3p1QB|cnf7z~DA9NJ6tP}Fo*=ljXspHp#A`7jV&JU-p}8}UCUr`HxE8RAI4ban zsFzP9xE2-=`)pWA*24?Pm2x8FM}2-W_Qz#1$l?@f5^PO=ySJ8HcUhV2@2e*+JmVu1 z7I2Wyi%yaS9%Y1r!5T8tJkgxnYZh>ZQ_OET`iM6U3E zM7o$-lOi8I$QF4|axy!DjIgT9`u2?~^<*bUj0~P6xJ&!8IP(3nI`5q)Dp$!9uQYQB zPcu5v6{tqkrO6WTLyN|@^Lyq!Zasp=c9K|{SVzRSWfL7wglZWPUsUMU`HD_{To+X4_Ze!+CBT) z)%YYC<=*yPU4NIv72ZPWoZ`*6srV;jR#XH=l$02(`&tZBGJ}!ZsR!BN{15yCbxM(o zvuU-TXBe_2)_5&+$;242L#t{!jwc+5ytb_w4+THhHVh9(w%dE?G7bgc+;_rQu`L_> zQkFSQ%5~$J?7iow8$xmU!%A@;-uF-HV6{6-cK$D_<4dTu0=2atw@yT! zqQ9l;A;fEJHR1dOrTT|1X)P{B=2tseV#;6%p;rJ4Y8to-}oo6@)7CHqA>KrIT1-70;*Y=m<0D?Ce_irS=+2a2y2H_nulG*a<5%<#x_oq8DIShn z=oQq92G0lEs|8X(Imtd^MR_mWNxx6KQCy33_QRz-pQuh<+*b=rPiKJFp*R8diDCF0 z&&;@rtsXl+h#qZC?!^*rcD0A>BnVtkbH3x+4{RN3g(p@u;ENS^y?)wuET zy`aYJArg1HVTbR>j1BG71D~C|;uznImS1eo`jzH@YYE7-u{FT94P6T}ZZx8|RKv5- z&~6MWliK?(Jr{ngQ1}0G_#@CNe3sM*G~pM638CX7&8YC;SY7bLeAw?!2S=*2U5IbC zcu6MJk>O0`e3m0+SXc7NzB#%8772=pH}JfJk=M4(sSBua#rOB`Jk)~wS~5*4#0sH5 zx+F-4p$i=D-KrONqaG;fvTJdC?Z$P%Pvb3Oia^`6pP8ID2>cSCwn)u90GF*%Us6oM zv2}&6<*kE(prX>VDn;@c);6X88hmCB8*((QHeD~r4^Lmdoj3Rrio@kEKfhWHI`Y|* z47b!kqSIC@<3l(~G@N?P@rl~co3&zre6azf#jDsSRLX($ulJnHw&nQw*8QY`p?WM4$%@CkS|yVcUbq>D7f09TAROS&-_T-6TI`%O)?4?6fI`D17g5rTkO54 z8}=A1A<3^m5phVKbvWB* zsq@j0`%7bVyQnSt^nu>7yD>OMgf%NIj)M>uu?s(66$5{lwYtYSdwj&dExPb^1-hmz zH{{`Z4IIUy=Y|zJVBEdWWTE+UH2%=%UT`cHcdZ$`x+L-qI58E3YYUe{l=AHsr*P zdPD@0dtaJtC9`qWAmhGIaY49@X#;HWVEM zTlewsgk#XbJ#AK*5qL~mOhWWFmA5wr@Sl1dijk2945|gWKvAc7%?zVZ#b9wnlX4p} z`kp$jFdU3>Tn-akDPo9dj-lz3Q}HO8MXGA>7b9P%E7PQE2s-qp{mPjt!%Ms0JLV`* z1fmnE8Sc%=;K-p7)rMhsj5%^UUVj z07YDg-@JXrU<}FzZYcaT)=kxek{14ElYlP5l~1fT_u=GqGFZ$h3^!hP{IRoe08Gt@ z*yBUV$nc=cu9Vu=a#(|{?q0MPUa)R{ai6IhOrP4vg|AP?0b#zjP@X>2RO<^pIcqZ+%4|?f(D=?L-+j2Lt3mDjrFWq2H;--ZQomHvv z$G4x}hYvi!H{n+}eqC$2u^yc*(wEpgB~`0{;_0#m^i&IUG1|ku3{Zr zm*L=pOV;NoMV@&9S63$7SJ>+hUZLq{k_z+TrW*H+a9$r=_WjY@3qw`-LvMUWH#QIs z)LeMKM!Nw7LXO@RYjGivWwP%({5z=W449eVDz zgQ!>Nvzg>jNO5GXJa?fR6Bo3W?Ffs(=I@(a3ynU*!L1eT4;K1EWAm6|I)5`Jd0Ba0 zstUmxqkUPvTi(O7+rvFhk?t_%F5P>I>a4uvwtq8&VgSD8rN6aO&xJd{xyj#=a9 z;q!h?GZI;6zAIDpI8QZv+2PyVLVbS*KjomCfLQw!d*dGJ_oMZ}%MPbpF>&eehA^3I zIB~^fU&KvSXq@tA@6~rhpAxgoLN;ftU;9j8O?@8DKSzI1Uw4M;_*Ob1ys;ER=#37I z@2T-j=M^Wo=umx9EOD${2YCbIg6n5oamlrmE3Z6Vu-dDz%aFMM4c9p=TJ=N^AD$8Y zbg`oV_%7QioTzd`D^6FI&5PR5^+t|$8rv>-rhmARakv2@Y7ckr(Dz5~4r=QWF z$cZh={vkxlcpT^y>w=Z@92pk4hojbw#G>?7!{{oefga8tu+96 z_cxXbTOR7gNQWCcO2qs@q2hqH;g&*>b~qLE>{JSNu`0ARI(MK`k%VIFU?2$OmiE1< za6;ve)dRA_X}C){Ww_^?99a|;_QR(v8G&3P`Zce z-*(FnI?ip^k2JN%1x~zjMf@2UqH4>_pIL*Ap!lO($PZ)|dInzISb_%rgVv=~ebLDu zhxLXiqO3*2YVcm<1uTtj;(2$QP!e-Y*}lKUKIa|U&cdbeVNtF8s(fc4H0!pg^mpNj z9mkg4=*~c&5Yxx|_qKuT@-v&Zp9sf*Q2RI5J-ujJzE?cIKNo^l*sO}rpMZq5SG7;L z#p8-AL#Go~4dIo-pvUHK3&C7}Z)@lFpP+N3|I|%3dwh4{Dedr^C`h!VpK@BIg>;{V zqV~V!F(q`{cq*4IPL!;@aFXiWoIn0^<&dNnW-raMc=f6pXzpkdw$>4sSgqb>nHdH1 zhSHUG-_`>LzFW*pVp{m-YBJve-Y4ihV0h)-)(WsZ6+9ENNDnM}dLQdmS7Yf|h?1j{ zIWm_d{aWyvs(>hG+*WS6lXZ^C1_4i`A6q$l~Jk- z*uCtNyCK^HdyMHct06xeK;4jK<^x!+zRP0sd4JFhWlR3b(+Z^ZqGmSF2yE53vYG2z zH>N~fPm|mk0^Wl2eZ-V|V3T*yvX@11xcY9-g__-iIHhGjJsKMY4R`rVY`pq_JmctT zzC0O6`u3J}JCEW=Fo|dKPK3j&*6=v1_rQ*5AIY~$Z;>gf@SOLfFDRXoaVn-M8Nw`F z_C-haf#pqmzBPg*)+c>8y>?>^zpdG*6Tp)JDbBn}b*4kWA1ESKqLzcfI=mV8L#L?y zB|0|0GzjoM?0e^AZXbx=s81ihUx1UH)e(!{jw8v$$L&`~2d4A~;T=zgfa!<-qtu~d zXQZ)mC9b}vYcFBtRgXyPrOS&w&@!o?K zJ3RElK-$cOIkmMLzU_M0#P#SYm>yvcoADUHTMYBnUl+u{-G)9F`!#)Vr`U13Ej4q?kiE!aD4+2AEX2Cwsb?qwow(vEPo7p0Mj-m16aG;$BIUzK$m38;Y#j4 zsBKPLZ!;BvAN^`0hL?=us=^3kh5iii7XLNUMb*2T4xCm{_k4qDY%A!KqZ3qJrNmIx zH6(D42wNrb_dy^_roKPCMJ?W{bG$*LxUr~p_sz}OKpWD&zg=_)*81tUoOyzH{1p2> z*MdnLtVqbprsYETwUh`y=mTB1x2MfAbMcAH_Cw#uF_g}Y7CTd)56pJVq7f^4;Kfe* z>4QrO(RF%$PjAl<4l8ewdT2;>a#Y`1bK_bca0_iK5~fof?j&^5&uD%_yPFFW49X%w zE2cm-SK1d7NhPPv!>%Y5LF;>QVUbvm zL42(>>Jcxst$Zl{^$j|<1BA9TtlGZ zFz(l(83=nW~boUd=J7LF=*WIR-R3Ok82 z&x2f>@lLgT@VnqZu<;vSoP%yfuMeE z>rQd3mN&!H-itW>t{0!h+N!i2P5>eCChv}%QMmP8=;8f(FI=bh%u3U<8y}}~T`w3- zg&d~~rS#%2&}Bc&YL^gvU25&c9gD3QS~@Zz(ljWr!g36>CB|YFeWM7;<^|Jx zIfhWBu4LRNoeqW9dD%2$rXViXmHpX;D`6cjIQ`XL0tp|D^wSz|FEi*x;d(`W6oIQKYn_1U<&I5%!q|6AOweKzjD zi?ieZzs7%hA9K$)8~;!C*|=Gp8*d~_d5n!tQSwi!lcSX1sZO3kWg`Vj{z-L;zg_O+5|9oP|e|^;dd7uBe55Mogf98Ra5|zA+jZRbYPpVV?ozJLLcZNb`BNa;iNp)v` zyZ#WRDCPgf>wk>;%h3PD{J;G9Z=b~I95u?==sYF=T+Iba`F%C26e=5Cq~xEgx%Auh zFQU}bWpNO9&J8YWbmQ~vSg0~}0@akobmWYLVHbO_Wi^0XRJ)0Xc@}_YRbln#AV^8j zhqFIUf?$R}SOs5%4$J*ev&0L8t{j0;6COAx<^gMd?gXpsc_8WL0@1hG!SwSxsXk8! zIP~J1lxmNI)Z}_Qc&U>n_3{08$-%(~(EiL^N}A*)_I2pO34$g`(>h0fxpNup1p)~s zpGvYI;TQy&+#xF?c@ai-gHB2fd7f0m`VDKTy^=h*Ys?k<0w$%(&IiM zX?iUgPPI~?Cl5(p6ym|dyMp2NvZgQG0Nk)yVV@)Fa;QzOYfWmgI;}LcbH9 zFdEH=X>x6-VdMrbBUkXsxkYr-a3^S>-p88v_HfY81}s5$3EemVc90fJ1DwXbyaH4fR6!~() z&jr7voLA3>@%Ia1&#^M8kFP7Go=(1#DvYp`T6*BAl+xwLQk^a=Qh_@bN-=I;F6F8s zMw`g`LMy%0bMWwy!{n#JljN6m;^f(EIvL27M{d5BPa0lWgIh0gq1@;?T->UPAA?Wh z=iQgE<&g^-2RR{8>w=3PCgSmR3Ao}xGTKm&l*clfFhimZ6MUGc3Li{#tzjm*ks3A0 zjEyc+^3RmK@_XJ%0}vx{MVHHO_aL!+xah|6w9)U#A|z7f~sgmD^qz)>vmQm zUQHMfDvdF;AVfWC5 zh{=0O1QkD_sY_N6^DbEvt@D`33qL!EliYly@NQo6emX1JbFPGVdUOYQ*PfF+qrQ?1 zOim*nO)`=STxqe$2btI{Hc0<%(sFA)cZ<%wv&$F%VY{VbvOeFTRS3vvHx zBjF?$MdYWl6IF>lw9kogvKnebPlC)xB1ahB!1EsO;NR`ktlZgCW+eYmiccD2NqSg*!U9BpDGrK!qR!<#G#hw;sX zul)k@juDOEF?JyKWo#j8!-{A(g{!kH3dMb(QdwYOuN3Rju!W8AE8m7Nn4xeLCg9^ z5^ciHMDo6EB>U4ZL|7%&yG?LAxkj6xJeTr?sGKP#iUXe#Sqc0UHKd+4n3zgi# z3Oz|&IKoO4h4|A(bl8bTvkOFo=xbtZc``AswUB&8$dQ#cBIJ>NGcxp*cm1Py~O(g%gPb{@6%2JK@&sr$zNgT8@B5n@&XXzgB%i5^$ zf=Jc9O$c`{BO_bi6P?kgNtF^_@~+-eGPu8q*f?>Tv`^YVCf#r%j~bjNi=EGssZHz2 zNU5jfdAHN#eD~L+s;f15eWN>h^OrJt*gA#OXt5#t6a{D^TV=AU+C7M?U*w1nUs=gx z7fOh?-g2aX1Q&U-&7ORtdW?LZew;j9%}!pAvLgqkq{&FTFtU5?T~hG6J=q*|h|Kkm zAcuDuk))I(*|PpH8P#x!3`%1q^9o%_sh-26+#+|{S9vK~MdVfDsMT%4Zj%M=ex(5I zjPZHm=@T8|rkgU2opTMXBl9$&eM^JTJ4Fym5~W19|8YX(gbJY@`#vi(=|$EttCK{% z$7$louS`NAA(z;)mzk9AUrP4w6eJa&ijqIAm`N+kMPz;I0a9{-Ao(a-oh+MwiIim& zC!_gyl13U-@-jBkq~vd+l-BReHcOPc{yVQxNl%+XWuqIvUH_9PrSseQFQSxtF_(bj zfId8ldc9*W-zOOH=+&y$ivyd+El##$eb~aDSefhW4z&K90~{9zVYB95`KZ!RI2EUO z#=d2Lk|sM6p5_X6%Qdg8XXr!T^g#`tWig;AFL*{=We^xz z(hN;5!RyOf=7rrI5U}OAabkEPvMF6C*K_N{?{`$!`P-1Nwz_%k-tC`2K0d4Z z*u7*FW~%-dp}8ruFD30k;HGBw+G>Syo|hNTRPs3-X5^3ZUlo)j4w95p9|Y+ z*vYdk!_dcgdiSOe6tRq|C15Z6Gd?+m9dGN?)Ud0za>^R|3 zJQ0RUrH{+1^L(M~mT`E@J8B_VCB{`aD<5T+v!_<2h9Re8p5pa~H9)tsX9-vy1~Xf) z$j2%*V8znCn*_asadYg^J>k?2bz3X>i?39I;NrY2*`%&cYzj>~JtXXnqV{7`tZDtQ zQeo4|PGL`2Vm|u7W&LN28<{AW>AQ!c`+{0N>&rH* zb%$R9Qn#d5PQVM1b-Wi9oKZsMf|;Dc2woJGa6eZa2@$=gu39urLwMvwT9s1>>IX5W zKcja1{*Y`o^sA;8{_kEKUVM$=klMkouKF7j@fp{1>GdijXnM$ZaEk(hZP)aR6ssQ~ zryke+X(kmL$JJ{i?|;E(MjfjU3g>|YLl@7fdy|kRWL%xtmWg^gwFfn1KjTw>rg&!F zB1n47SR@!U1}5i!DvZiT;b#XsZ|^*3yz{EahN;mE)zbtSmIbI{nxBb+l|d9T!jzis zp+ZpYD3X3RX@=VmuV^wL0gzGn(&z?eDYR5g&2!S!m;ZjL?cR zx{?}!Uk?TyJ`q!bx2hFwYcEg@BnpO_ZFsBUz|za9rVUZZ`!iW*RYen;1{A84Kd`_H z=YtPr%6GvJF1ua+lX19IL;sr4mTrnvD83@<@?9`-tV9OCe(lbghgbC%*0k9zXeka5DW((1M>$=zECOxjrr($_{EiKYFPL*4*d*N&kS@pZ3`L2=$=* z45OEL*q2PWo9Z>~^pY+_hnD2beB*0F(Dc5^DwvBRN~RxV+H|!UXEuwzNU6<%ce?~_ z-{NQB5ozU%-<*d>42AqkmcPS$p-Q5z6?uSPV9D3X4hYO0-f}S|57m9YyYyc6N5O5h z`}{i#AR2N|Jv#buab*(U2wZG%2iZ_&c%ld8&=)?mF!vf3&hPdod z-Mu>}d!S4*?GyKU2iSGjibbC6LxJOCtGrfPLqN;v^6M*FflJYTO8%fRwrUThT1j`} z(MtXp>BnxM=%cnZdEv!EN0GzT)vh%Gg(b_*Yo$VwY8RXjbh)796U_)7Hw7fd_ zWo{g2aOp$_QQuR!C+N6aEy>!YzX(T6cb`+$utCPd-|h94E7A2ryXVCsI%>JQMLP^N z;>Fv3qfXTSmIOAetrM>fhCM2Wg=vM^$i4W9)gtQsOV`BRaJ}J-x?`5VPQ=v$$D3si zEyr?k&BD^}UBjPIUvpk1gQ_LoDjduI;o1!=Uur+dJerTpW4WA9_I|~CJ92l-Bkw_{ zLh`n^tphNiGtGB1uMkV(jZ|M#<2ThZrJeZf4OgTB+WEb{={LXVfW1)|$s#{679(KG`uq?k5S?!4+R2@cZg)wcske(z`Odk)szM zHhh1$ds7BjavH1)dGHaKlU|TKRAsomI_>c*I=b+bt`NUAEekkw9`8y!6EFaL3{s}0vvbWQ=$I_x+(rT?AL9hrxRzv>@B;PRPT?tU#7 zud+Dr+Gq z*GC&C_7@-rvvhOky*fPM0MU=SUg4z&eBLk3NRTVAo%d{8E_4c*Hgi6z!*AVGY1FM{ z=%(%SGg}hD*?>F6>}eYu&k%T+R#Jz*-e;dnsSDvtZ<5>*qX8fK@N=ISkZoM14q7Kj}r7iowHxd*Z za}$M?_XU zVhRny?|UrV%O;B9vG6awsAFm9_Q+$1op%UhuC^S?mMsE?^5>nBYo>u-&;F%S%NGw; z+>yO`G71i5ejHLy@&v6Hcf(VDd7+c#&CvC?e2`_o)4Y*+6~MatV)rWvPk16eyxN6U zgQp7KUSC3ezm61*|NK1J0^Dzx-wtee3Q5;B#qt((<1}mNah?VCI1+sIki~F0FcTu0 zN2jiXt3mfVzXdfYxU+Aug~WYCxlEs*V=bV<6*c)eLlcV#}`((ea)`NRI zLMt+<@nc$V%zMh8p^x+~J}>4POj%1y*c<-{YGt`obbe63y9zVx>|gKMGFHH?U0rn3L{C&}HDVN_HV~{*JRwWkxd3;k#bl)6Q+i-6;P|Oo~&?8^SC(H%?0Tz<&KBGy39TxY93nAIIuJ6g>4}^VQWM@KLYJ zc{fKZwejNhRE1MK=CH>_%JB7|sIvV}ey<4N7q(uUA>RYd#}`F%&AdU&%8b@kY+tbU z$cYUGobgaS9V5MaZ!0{I)d;>{mWJ9F_2T;VKOoDgg3iIxM2Na_V>raM0(^>h4he~8 z;`+N2^8F0WXo5Dr+m)%t5C%3X_dlP zgWX?X1X&(orxN>Tst z1->UA-@p}dCsxJXmB9PL)|@k#Ku_t{Rl3(ZakbBpO?CT{L6o)cg~9FD@O0(YXwzt)N+dF06t8nwzZm<{aBZcJMVnC;5I_Xpxm}?3LC?a@ov;cm&zpg7MRGp z`g1MX&>3HxU6PGUoz5n0%qju*EI~}f_<1x9J`5r8}7HrLZQG$Knx8B+AHj0Wz zPaczc5Q0IX!3;q<8gS@CUE^lIJDBk+yKJ|L53CGkmAGgUf==>-Z%!U9hJzeOEejR( zv3`Qr@0)BD{@Soqw8Ah5zi`SgxWwB8Z^|_t_g}ZcSH1@8t}N+7k80L-nI<31@p^k= z!>MAh9%OtMf-f-7AonqYOBEIvMZA|Pdx3P5h--D(+HkaZ%5TMGKYV2So;Nwc0VJ5Y z&b}sJ;HZ=8$}{doSh~|~S0|4j=6{2j z5O<07!Nxn-KOnp_)6WOh1Va~8+$)AFw#_`Bd0*hFXZyERYt*3AuDG`yPhQ}T8x0M| zC7Ph)N&OARvwpZ$*5mf+!`-MGv4dlGuLB-vJn+bip%}?T{F{t4oic~7qwsiO3kwyu~+Y> zp!S0=mLH`jQO06bHtl0D41YF!y-c7Nq*e9=wTjV@=N?PX!D|kE6tnRxyJ8?u$yZ!(#)Q6XK_bxRt$U__vKOMl_Qwo0=i;8D!4*CS#_?W) zvRXM?3Z$HMm$K#QhVsj=W^ADlQv}@>4_z8YS?Pk%ybb9fSGhFgs9rDBr;ERKURZ*6 z%on_W-|`K|Vwfx(S5f^e&Zx0k3q^w6d?N+3kq~5fyQ!VDwZPj9+~y(@Pw-26N&Drs zxfrR}B1|X<9TrY#eWQ0$70UHG zx1@XGfz3ZfM8rFhD@wK6dYLC0T|2P(X=xw0EE>t&el-YdPQTV)I$no5B=g;EGPf}= zXLa@?u}(0$NiAj>8|hN=&x*g^@AWNe@#_|a%0~K>{PPgX;J0fIQHnDCKfY%Df5ZRB z!T*c%-yJ_2H#>Az|NjbSkNY>Ad!E0={deuN$DKQF);^1W^UvD<7B}mk#lOYP+W%dg zd%U^h=KAOAv+;B7v-&K~jhk!#|Jy(Jd~@}==l`4jPxgQE|I>MA{eQ#19e*}{u7B1( zi?jY&oVCy5tbG>e+Gq8-_)qb({=cEoZOUV8bcd3EQk@~C{7!X76e=6trR1MfXZ+js zAEFfXDg1|c@?TT&KlkB3_u=;)_&R5miBo7`&6iM}uzJ`a|0pOkYT#AL-55A1o7gmI>k~+BNHE68# zgy2j8iR|%Mzz{d+o?1?>$%zKF0T-B54%VP^bv?ieE8ID+2Crkc5?tK2kEw zp`cGyX41H|Ns1x)6?AlP!BBOBIO}(R_|d}vD(YLv2i*QZW37@pd*lr{UGED?Bkoc) zjY}}-f+xIi+92g+cMQkwctBB7j6@`#Ey@W$1KHI-2;>T-8ihE(7v}(SS5EV#H5a9Olps?7 z4D_EKpbD-_0RO(Duwmp3;GV58Ksx};vC@!Ku@v~MxM5mm4-8t4N_FVZgA#HbNP zIocG(G*Kj{5Mw)~TuPd>SPFH{|9;*|MavjVL`r0t8Zsmi4)b?iuK995AD*wj>;3ZF z?|s)a*#(I8?2cvF*iB11V)t@puIP1Ku4ur(%5M98ee%BW86&qq6sJ~9OmiQN2pLXy zb5vNJ^aIsz&Ly#67ON0HiYY}+vwe_$hP%wx>Hpq{DT z%Vsp{8LQ;~loj%4IUzT5uQbHFm(&f`;ZtxXqhl>5k(7}oH+DxhART6EPzbvE*zSb4ru-7Du?z$X;U--cTj&eK#R? zbs|ORXbPD22`bIj^m1o5h9!N3+L<0`jkln&KT6@{+Jg}718rwv@HPi=5tq0bYc!#sVgX%|ItmlB5Mgow z;*tYrrbA|&Rfcy>!{+%VRioGA zBo`W@L!09IFDaZ+(JRmm|;(*oedf3qhn?_%XOfjWN5?u-jXhW?|F*@e` z%p3Cbkk@I%DOKnp*>wbLTlu82_Pv~==piJ3N^s}RTKwkT$Qh(+!1zi(SEn6@A$%Iw zj94?AP5|d{JrDnc&xh%8JxX5Nh{F4U^pilJq%H?Yf2td`Scg;1+p#q7-UP06S6Ic8 zt9#LNHw-rGG^snd9V6n>;kG&$pN%ie_sunz<(S!mnb=^=i)#7Y(;~UA^I^OUjKcIZ zZPIk>MAlVrGB6oOhb~H}A;+77GEY&0S`xL+NvCi2deNfh DDPG (for continuous action space) > AC - -Advantage ----------- -Train faster and more stable than AC. - -Disadvantage -------------- -Have bias. - -Reference ----------- -Original Paper: https://arxiv.org/pdf/1602.01783.pdf -MorvanZhou's tutorial: https://morvanzhou.github.io/tutorials/ -MorvanZhou's code: https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/ -Environment ------------ -BipedalWalker-v2 : https://gym.openai.com/envs/BipedalWalker-v2 - -Reward is given for moving forward, total 300+ points up to the far end. -If the robot falls, it gets -100. Applying motor torque costs a small amount of -points, more optimal agent will get better score. State consists of hull angle -speed, angular velocity, horizontal speed, vertical speed, position of joints -and joints angular speed, legs contact with ground, and 10 lidar rangefinder -measurements. There's no coordinates in the state vector. - -Prerequisites --------------- -tensorflow 2.0.0a0 -tensorflow-probability 0.6.0 -tensorlayer 2.0.0 -&& -pip install box2d box2d-kengz --user - -""" - -import multiprocessing -import threading -import time - -from rlzoo.common.utils import * -from rlzoo.common.buffer import * - - -# tl.logging.set_verbosity(tl.logging.DEBUG) -################### Asynchronous Advantage Actor Critic (A3C) #################################### -class ACNet(object): - - def __init__(self, net_list, scope, entropy_beta): - self.ENTROPY_BETA = entropy_beta - self.actor, self.critic = net_list - - # @tf.function # shouldn't use here! - def update_global( - self, buffer_s, buffer_a, buffer_v_target, globalAC - ): # refer to the global Actor-Crtic network for updating it with samples - """ update the global critic """ - with tf.GradientTape() as tape: - self.v = self.critic(buffer_s) - self.v_target = buffer_v_target - td = tf.subtract(self.v_target, self.v, name='TD_error') - self.c_loss = tf.reduce_mean(tf.square(td)) - self.c_grads = tape.gradient(self.c_loss, self.critic.trainable_weights) - OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights)) # local grads applies to global net - del tape # Drop the reference to the tape - """ update the global actor """ - with tf.GradientTape() as tape: - self.actor(buffer_s) - self.a_his = buffer_a # float32 - log_prob = self.actor.policy_dist.logp(self.a_his) - exp_v = log_prob * td # td is from the critic part, no gradients for it - entropy = self.actor.policy_dist.entropy() # encourage exploration - self.exp_v = self.ENTROPY_BETA * entropy + exp_v - self.a_loss = tf.reduce_mean(-self.exp_v) - self.a_grads = tape.gradient(self.a_loss, self.actor.trainable_weights) - OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights)) # local grads applies to global net - del tape # Drop the reference to the tape - - # @tf.function - def pull_global(self, globalAC): # run by a local, pull weights from the global nets - for l_p, g_p in zip(self.actor.trainable_weights, globalAC.actor.trainable_weights): - l_p.assign(g_p) - for l_p, g_p in zip(self.critic.trainable_weights, globalAC.critic.trainable_weights): - l_p.assign(g_p) - - def get_action(self, s): # run by a local - return self.actor(np.array([s])).numpy()[0] - - def get_action_greedy(self, s): - return self.actor(np.array([s]), greedy=True)[0].numpy() - - def save_ckpt(self, env_name): # save trained weights - save_model(self.actor, 'model_actor', 'A3C', env_name) - save_model(self.critic, 'model_critic', 'A3C', env_name) - - def load_ckpt(self, env_name): # load trained weights - load_model(self.actor, 'model_actor', 'A3C', env_name) - load_model(self.critic, 'model_critic', 'A3C', env_name) - - -class Worker(object): - def __init__(self, env, net_list, name, train_episodes, max_steps, gamma, update_itr, entropy_beta, - render, plot_func): - self.name = name - self.AC = ACNet(net_list, name, entropy_beta) - self.MAX_GLOBAL_EP = train_episodes - self.UPDATE_GLOBAL_ITER = update_itr - self.GAMMA = gamma - self.env = env - self.max_steps = max_steps - self.render = render - self.plot_func = plot_func - - def work(self, globalAC): - global COORD, GLOBAL_RUNNING_R, GLOBAL_EP, OPT_A, OPT_C, t0, SAVE_INTERVAL - total_step = 1 - save_cnt = 1 - buffer_s, buffer_a, buffer_r = [], [], [] - while not COORD.should_stop() and GLOBAL_EP < self.MAX_GLOBAL_EP: - s = self.env.reset() - ep_r = 0 - for epi_step in range(self.max_steps): - # visualize Worker_0 during training - if self.name == 'Worker_0' and total_step % 30 == 0 and self.render: - self.env.render() - s = s.astype('float32') # double to float - a = self.AC.get_action(s) - s_, r, done, _info = self.env.step(a) - - s_ = s_.astype('float32') # double to float - - ep_r += r - buffer_s.append(s) - buffer_a.append(a) - buffer_r.append(r) - - if total_step % self.UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net - - if done: - v_s_ = 0 # terminal - else: - v_s_ = self.AC.critic(s_[np.newaxis, :])[0, 0] # reduce dim from 2 to 0 - - buffer_v_target = [] - - for r in buffer_r[::-1]: # reverse buffer r - v_s_ = r + self.GAMMA * v_s_ - buffer_v_target.append(v_s_) - - buffer_v_target.reverse() - buffer_s = buffer_s if len(buffer_s[0].shape) > 1 else np.vstack( - buffer_s) # no vstack for raw-pixel input - buffer_a, buffer_v_target = ( - np.vstack(buffer_a), np.vstack(buffer_v_target) - ) - - # update gradients on global network - self.AC.update_global(buffer_s, buffer_a, buffer_v_target.astype('float32'), globalAC) - buffer_s, buffer_a, buffer_r = [], [], [] - - # update local network from global network - self.AC.pull_global(globalAC) - - s = s_ - total_step += 1 - if self.name == 'Worker_0' and GLOBAL_EP >= save_cnt * SAVE_INTERVAL: - plot_save_log(GLOBAL_RUNNING_R, algorithm_name=self.name, env_name=self.env.spec.id) - globalAC.save_ckpt(env_name=self.env.spec.id) - save_cnt += 1 - if done: - break - - GLOBAL_RUNNING_R.append(ep_r) - if self.name == 'Worker_0' and self.plot_func is not None: - self.plot_func(GLOBAL_RUNNING_R) - print('{}, Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ - .format(self.name, GLOBAL_EP, self.MAX_GLOBAL_EP, ep_r, time.time() - t0)) - GLOBAL_EP += 1 - - -class A3C(): - def __init__(self, net_list, optimizers_list, entropy_beta=0.005): - """ - :param entropy_beta: factor for entropy boosted exploration - """ - self.net_list = net_list - self.optimizers_list = optimizers_list - self.GLOBAL_AC = ACNet(self.net_list[0], 'global', entropy_beta) # we only need its params - self.entropy_beta = entropy_beta - self.name = 'A3C' - - def learn(self, env, train_episodes=1000, test_episodes=10, max_steps=150, render=False, n_workers=1, update_itr=10, - gamma=0.99, save_interval=500, mode='train', plot_func=None): - - """ - :param env: a list of same learning environments - :param train_episodes: total number of episodes for training - :param test_episodes: total number of episodes for testing - :param max_steps: maximum number of steps for one episode - :param render: render or not - :param n_workers: manually set number of workers - :param update_itr: update global policy after several episodes - :param gamma: reward discount factor - :param save_interval: timesteps for saving the weights and plotting the results - :param mode: train or test - :param plot_func: additional function for interactive module - """ - global COORD, GLOBAL_RUNNING_R, GLOBAL_EP, OPT_A, OPT_C, t0, SAVE_INTERVAL - SAVE_INTERVAL = save_interval - COORD = tf.train.Coordinator() - GLOBAL_RUNNING_R = [] - GLOBAL_EP = 0 # will increase during training, stop training when it >= MAX_GLOBAL_EP - N_WORKERS = n_workers if n_workers > 0 else multiprocessing.cpu_count() - - self.plot_func = plot_func - if mode == 'train': - # ============================= TRAINING =============================== - print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env[0].spec.id)) - t0 = time.time() - with tf.device("/cpu:0"): - [OPT_A, OPT_C] = self.optimizers_list - - workers = [] - # Create worker - for i in range(N_WORKERS): - i_name = 'Worker_%i' % i # worker name - workers.append( - Worker(env[i], self.net_list[i + 1], i_name, train_episodes, max_steps, gamma, - update_itr, self.entropy_beta, render, plot_func)) - - # start TF threading - worker_threads = [] - for worker in workers: - # t = threading.Thread(target=worker.work) - job = lambda: worker.work(self.GLOBAL_AC) - t = threading.Thread(target=job) - t.start() - worker_threads.append(t) - - COORD.join(worker_threads) - - plot_save_log(GLOBAL_RUNNING_R, algorithm_name=self.name, env_name=env[0].spec.id) - self.GLOBAL_AC.save_ckpt(env_name=env[0].spec.id) - - elif mode == 'test': - # ============================= EVALUATION ============================= - env = env[0] # only need one env for test - self.GLOBAL_AC.load_ckpt(env_name=env.spec.id) - print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - frame_idx = 0 - for eps in range(test_episodes): - s = env.reset() - rall = 0 - for step in range(max_steps): - env.render() - frame_idx += 1 - s = s.astype('float32') # double to float - a = self.GLOBAL_AC.get_action_greedy(s) - s, r, d, _ = env.step(a) - if render: - env.render() - rall += r - if d: - break - - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( - eps, test_episodes, rall, time.time() - t0)) - - elif mode is not 'test': - print('unknow mode type') +""" +Asynchronous Advantage Actor Critic (A3C) with Continuous Action Space. + +Actor Critic History +---------------------- +A3C > DDPG (for continuous action space) > AC + +Advantage +---------- +Train faster and more stable than AC. + +Disadvantage +------------- +Have bias. + +Reference +---------- +Original Paper: https://arxiv.org/pdf/1602.01783.pdf +MorvanZhou's tutorial: https://morvanzhou.github.io/tutorials/ +MorvanZhou's code: https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/ +Environment +----------- +BipedalWalker-v2 : https://gym.openai.com/envs/BipedalWalker-v2 + +Reward is given for moving forward, total 300+ points up to the far end. +If the robot falls, it gets -100. Applying motor torque costs a small amount of +points, more optimal agent will get better score. State consists of hull angle +speed, angular velocity, horizontal speed, vertical speed, position of joints +and joints angular speed, legs contact with ground, and 10 lidar rangefinder +measurements. There's no coordinates in the state vector. + +Prerequisites +-------------- +tensorflow 2.0.0a0 +tensorflow-probability 0.6.0 +tensorlayer 2.0.0 +&& +pip install box2d box2d-kengz --user + +""" + +import multiprocessing +import threading +import time + +from rlzoo.common.utils import * +from rlzoo.common.buffer import * + + +# tl.logging.set_verbosity(tl.logging.DEBUG) +################### Asynchronous Advantage Actor Critic (A3C) #################################### +class ACNet(object): + + def __init__(self, net_list, scope, entropy_beta): + self.ENTROPY_BETA = entropy_beta + self.actor, self.critic = net_list + + # @tf.function # shouldn't use here! + def update_global( + self, buffer_s, buffer_a, buffer_v_target, globalAC + ): # refer to the global Actor-Crtic network for updating it with samples + """ update the global critic """ + with tf.GradientTape() as tape: + self.v = self.critic(buffer_s) + self.v_target = buffer_v_target + td = tf.subtract(self.v_target, self.v, name='TD_error') + self.c_loss = tf.reduce_mean(tf.square(td)) + self.c_grads = tape.gradient(self.c_loss, self.critic.trainable_weights) + OPT_C.apply_gradients(zip(self.c_grads, globalAC.critic.trainable_weights)) # local grads applies to global net + del tape # Drop the reference to the tape + """ update the global actor """ + with tf.GradientTape() as tape: + self.actor(buffer_s) + self.a_his = buffer_a # float32 + log_prob = self.actor.policy_dist.logp(self.a_his) + exp_v = log_prob * td # td is from the critic part, no gradients for it + entropy = self.actor.policy_dist.entropy() # encourage exploration + self.exp_v = self.ENTROPY_BETA * entropy + exp_v + self.a_loss = tf.reduce_mean(-self.exp_v) + self.a_grads = tape.gradient(self.a_loss, self.actor.trainable_weights) + OPT_A.apply_gradients(zip(self.a_grads, globalAC.actor.trainable_weights)) # local grads applies to global net + del tape # Drop the reference to the tape + + # @tf.function + def pull_global(self, globalAC): # run by a local, pull weights from the global nets + for l_p, g_p in zip(self.actor.trainable_weights, globalAC.actor.trainable_weights): + l_p.assign(g_p) + for l_p, g_p in zip(self.critic.trainable_weights, globalAC.critic.trainable_weights): + l_p.assign(g_p) + + def get_action(self, s): # run by a local + return self.actor(np.array([s])).numpy()[0] + + def get_action_greedy(self, s): + return self.actor(np.array([s]), greedy=True)[0].numpy() + + def save_ckpt(self, env_name): # save trained weights + save_model(self.actor, 'model_actor', 'A3C', env_name) + save_model(self.critic, 'model_critic', 'A3C', env_name) + + def load_ckpt(self, env_name): # load trained weights + load_model(self.actor, 'model_actor', 'A3C', env_name) + load_model(self.critic, 'model_critic', 'A3C', env_name) + + +class Worker(object): + def __init__(self, env, net_list, name, train_episodes, max_steps, gamma, update_itr, entropy_beta, + render, plot_func): + self.name = name + self.AC = ACNet(net_list, name, entropy_beta) + self.MAX_GLOBAL_EP = train_episodes + self.UPDATE_GLOBAL_ITER = update_itr + self.GAMMA = gamma + self.env = env + self.max_steps = max_steps + self.render = render + self.plot_func = plot_func + + def work(self, globalAC): + global COORD, GLOBAL_RUNNING_R, GLOBAL_EP, OPT_A, OPT_C, t0, SAVE_INTERVAL + total_step = 1 + save_cnt = 1 + buffer_s, buffer_a, buffer_r = [], [], [] + while not COORD.should_stop() and GLOBAL_EP < self.MAX_GLOBAL_EP: + s = self.env.reset() + ep_r = 0 + for epi_step in range(self.max_steps): + # visualize Worker_0 during training + if self.name == 'Worker_0' and total_step % 30 == 0 and self.render: + self.env.render() + s = s.astype('float32') # double to float + a = self.AC.get_action(s) + s_, r, done, _info = self.env.step(a) + + s_ = s_.astype('float32') # double to float + + ep_r += r + buffer_s.append(s) + buffer_a.append(a) + buffer_r.append(r) + + if total_step % self.UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net + + if done: + v_s_ = 0 # terminal + else: + v_s_ = self.AC.critic(s_[np.newaxis, :])[0, 0] # reduce dim from 2 to 0 + + buffer_v_target = [] + + for r in buffer_r[::-1]: # reverse buffer r + v_s_ = r + self.GAMMA * v_s_ + buffer_v_target.append(v_s_) + + buffer_v_target.reverse() + buffer_s = buffer_s if len(buffer_s[0].shape) > 1 else np.vstack( + buffer_s) # no vstack for raw-pixel input + buffer_a, buffer_v_target = ( + np.vstack(buffer_a), np.vstack(buffer_v_target) + ) + + # update gradients on global network + self.AC.update_global(buffer_s, buffer_a, buffer_v_target.astype('float32'), globalAC) + buffer_s, buffer_a, buffer_r = [], [], [] + + # update local network from global network + self.AC.pull_global(globalAC) + + s = s_ + total_step += 1 + if self.name == 'Worker_0' and GLOBAL_EP >= save_cnt * SAVE_INTERVAL: + plot_save_log(GLOBAL_RUNNING_R, algorithm_name=self.name, env_name=self.env.spec.id) + globalAC.save_ckpt(env_name=self.env.spec.id) + save_cnt += 1 + if done: + break + + GLOBAL_RUNNING_R.append(ep_r) + if self.name == 'Worker_0' and self.plot_func is not None: + self.plot_func(GLOBAL_RUNNING_R) + print('{}, Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ + .format(self.name, GLOBAL_EP, self.MAX_GLOBAL_EP, ep_r, time.time() - t0)) + GLOBAL_EP += 1 + + +class A3C(): + def __init__(self, net_list, optimizers_list, entropy_beta=0.005): + """ + :param entropy_beta: factor for entropy boosted exploration + """ + self.net_list = net_list + self.optimizers_list = optimizers_list + self.GLOBAL_AC = ACNet(self.net_list[0], 'global', entropy_beta) # we only need its params + self.entropy_beta = entropy_beta + self.name = 'A3C' + + def learn(self, env, train_episodes=1000, test_episodes=10, max_steps=150, render=False, n_workers=1, update_itr=10, + gamma=0.99, save_interval=500, mode='train', plot_func=None): + + """ + :param env: a list of same learning environments + :param train_episodes: total number of episodes for training + :param test_episodes: total number of episodes for testing + :param max_steps: maximum number of steps for one episode + :param render: render or not + :param n_workers: manually set number of workers + :param update_itr: update global policy after several episodes + :param gamma: reward discount factor + :param save_interval: timesteps for saving the weights and plotting the results + :param mode: train or test + :param plot_func: additional function for interactive module + """ + global COORD, GLOBAL_RUNNING_R, GLOBAL_EP, OPT_A, OPT_C, t0, SAVE_INTERVAL + SAVE_INTERVAL = save_interval + COORD = tf.train.Coordinator() + GLOBAL_RUNNING_R = [] + GLOBAL_EP = 0 # will increase during training, stop training when it >= MAX_GLOBAL_EP + N_WORKERS = n_workers if n_workers > 0 else multiprocessing.cpu_count() + + self.plot_func = plot_func + if mode == 'train': + # ============================= TRAINING =============================== + print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env[0].spec.id)) + t0 = time.time() + with tf.device("/cpu:0"): + [OPT_A, OPT_C] = self.optimizers_list + + workers = [] + # Create worker + for i in range(N_WORKERS): + i_name = 'Worker_%i' % i # worker name + workers.append( + Worker(env[i], self.net_list[i + 1], i_name, train_episodes, max_steps, gamma, + update_itr, self.entropy_beta, render, plot_func)) + + # start TF threading + worker_threads = [] + for worker in workers: + # t = threading.Thread(target=worker.work) + job = lambda: worker.work(self.GLOBAL_AC) + t = threading.Thread(target=job) + t.start() + worker_threads.append(t) + + COORD.join(worker_threads) + + plot_save_log(GLOBAL_RUNNING_R, algorithm_name=self.name, env_name=env[0].spec.id) + self.GLOBAL_AC.save_ckpt(env_name=env[0].spec.id) + + elif mode == 'test': + # ============================= EVALUATION ============================= + env = env[0] # only need one env for test + self.GLOBAL_AC.load_ckpt(env_name=env.spec.id) + print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + frame_idx = 0 + for eps in range(test_episodes): + s = env.reset() + rall = 0 + for step in range(max_steps): + env.render() + frame_idx += 1 + s = s.astype('float32') # double to float + a = self.GLOBAL_AC.get_action_greedy(s) + s, r, d, _ = env.step(a) + if render: + env.render() + rall += r + if d: + break + + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( + eps, test_episodes, rall, time.time() - t0)) + + elif mode is not 'test': + print('unknow mode type') diff --git a/rlzoo/algorithms/a3c/default.py b/rlzoo/algorithms/a3c/default.py old mode 100644 new mode 100755 index 3cb373c..ec987c9 --- a/rlzoo/algorithms/a3c/default.py +++ b/rlzoo/algorithms/a3c/default.py @@ -1,377 +1,377 @@ -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -from rlzoo.common.utils import set_seed - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -entropy_beta: factor for entropy boosted exploration ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -n_workers: manually set number of workers -update_itr: update global policy after several episodes -gamma: reward discount factor -save_interval: timesteps for saving the weights and plotting the results -mode: train or test ------------------------------------------------- -""" - - -def atari(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict( - entropy_beta=0.005 - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 4 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - net_list2 = [] # networks list of networks list, each item for single thread/process - for _ in range(num_env + 1): # additional one for global - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [actor, critic] - net_list2.append(net_list) - alg_params['net_list'] = net_list2 - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') - c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=100, - gamma=0.9, - train_episodes=1000, - test_episodes=10, - save_interval=100, - update_itr=10, - n_workers=num_env - ) - - return alg_params, learn_params - - -def classic_control(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict( - entropy_beta=0.005 - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 4 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - net_list2 = [] # networks list of networks list, each item for single thread/process - for _ in range(num_env + 1): # additional one for global - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [actor, critic] - net_list2.append(net_list) - alg_params['net_list'] = net_list2 - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') - c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=100, - gamma=0.9, - train_episodes=1000, - test_episodes=10, - save_interval=100, - update_itr=10, - n_workers=num_env - ) - - return alg_params, learn_params - - -def box2d(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict( - entropy_beta=0.005 - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 4 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - net_list2 = [] # networks list of networks list, each item for single thread/process - for _ in range(num_env + 1): # additional one for global - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [actor, critic] - net_list2.append(net_list) - alg_params['net_list'] = net_list2 - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') - c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=20000, - gamma=0.9, - train_episodes=20000, - test_episodes=10, - save_interval=500, - update_itr=10, - n_workers=num_env - ) - - return alg_params, learn_params - - -def mujoco(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict( - entropy_beta=0.005 - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 4 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - net_list2 = [] # networks list of networks list, each item for single thread/process - for _ in range(num_env + 1): # additional one for global - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [actor, critic] - net_list2.append(net_list) - alg_params['net_list'] = net_list2 - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') - c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=100, - gamma=0.9, - train_episodes=1000, - test_episodes=10, - save_interval=100, - update_itr=10, - n_workers=num_env - ) - - return alg_params, learn_params - - -def robotics(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict( - entropy_beta=0.005 - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 4 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - net_list2 = [] # networks list of networks list, each item for single thread/process - for _ in range(num_env + 1): # additional one for global - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [actor, critic] - net_list2.append(net_list) - alg_params['net_list'] = net_list2 - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') - c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=100, - gamma=0.9, - train_episodes=1000, - test_episodes=10, - save_interval=100, - update_itr=10, - n_workers=num_env - - ) - - return alg_params, learn_params - - -def dm_control(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict( - entropy_beta=0.005 - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 4 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - net_list2 = [] # networks list of networks list, each item for single thread/process - for _ in range(num_env + 1): # additional one for global - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [actor, critic] - net_list2.append(net_list) - alg_params['net_list'] = net_list2 - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') - c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=100, - gamma=0.9, - train_episodes=1000, - test_episodes=10, - save_interval=100, - update_itr=10, - n_workers=num_env - - ) - - return alg_params, learn_params - - -def rlbench(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict( - entropy_beta=0.005 - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 4 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - net_list2 = [] # networks list of networks list, each item for single thread/process - for _ in range(num_env + 1): # additional one for global - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [actor, critic] - net_list2.append(net_list) - alg_params['net_list'] = net_list2 - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') - c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=100, - gamma=0.9, - train_episodes=1000, - test_episodes=10, - save_interval=100, - update_itr=10, - n_workers=num_env - - ) - +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +from rlzoo.common.utils import set_seed + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +entropy_beta: factor for entropy boosted exploration +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +n_workers: manually set number of workers +update_itr: update global policy after several episodes +gamma: reward discount factor +save_interval: timesteps for saving the weights and plotting the results +mode: train or test +------------------------------------------------ +""" + + +def atari(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict( + entropy_beta=0.005 + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 4 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + net_list2 = [] # networks list of networks list, each item for single thread/process + for _ in range(num_env + 1): # additional one for global + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [actor, critic] + net_list2.append(net_list) + alg_params['net_list'] = net_list2 + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') + c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=100, + gamma=0.9, + train_episodes=1000, + test_episodes=10, + save_interval=100, + update_itr=10, + n_workers=num_env + ) + + return alg_params, learn_params + + +def classic_control(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict( + entropy_beta=0.005 + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 4 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + net_list2 = [] # networks list of networks list, each item for single thread/process + for _ in range(num_env + 1): # additional one for global + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [actor, critic] + net_list2.append(net_list) + alg_params['net_list'] = net_list2 + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') + c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=100, + gamma=0.9, + train_episodes=1000, + test_episodes=10, + save_interval=100, + update_itr=10, + n_workers=num_env + ) + + return alg_params, learn_params + + +def box2d(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict( + entropy_beta=0.005 + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 4 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + net_list2 = [] # networks list of networks list, each item for single thread/process + for _ in range(num_env + 1): # additional one for global + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [actor, critic] + net_list2.append(net_list) + alg_params['net_list'] = net_list2 + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') + c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=20000, + gamma=0.9, + train_episodes=20000, + test_episodes=10, + save_interval=500, + update_itr=10, + n_workers=num_env + ) + + return alg_params, learn_params + + +def mujoco(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict( + entropy_beta=0.005 + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 4 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + net_list2 = [] # networks list of networks list, each item for single thread/process + for _ in range(num_env + 1): # additional one for global + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [actor, critic] + net_list2.append(net_list) + alg_params['net_list'] = net_list2 + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') + c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=100, + gamma=0.9, + train_episodes=1000, + test_episodes=10, + save_interval=100, + update_itr=10, + n_workers=num_env + ) + + return alg_params, learn_params + + +def robotics(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict( + entropy_beta=0.005 + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 4 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + net_list2 = [] # networks list of networks list, each item for single thread/process + for _ in range(num_env + 1): # additional one for global + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [actor, critic] + net_list2.append(net_list) + alg_params['net_list'] = net_list2 + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') + c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=100, + gamma=0.9, + train_episodes=1000, + test_episodes=10, + save_interval=100, + update_itr=10, + n_workers=num_env + + ) + + return alg_params, learn_params + + +def dm_control(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict( + entropy_beta=0.005 + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 4 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + net_list2 = [] # networks list of networks list, each item for single thread/process + for _ in range(num_env + 1): # additional one for global + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [actor, critic] + net_list2.append(net_list) + alg_params['net_list'] = net_list2 + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') + c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=100, + gamma=0.9, + train_episodes=1000, + test_episodes=10, + save_interval=100, + update_itr=10, + n_workers=num_env + + ) + + return alg_params, learn_params + + +def rlbench(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict( + entropy_beta=0.005 + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 4 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + net_list2 = [] # networks list of networks list, each item for single thread/process + for _ in range(num_env + 1): # additional one for global + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [actor, critic] + net_list2.append(net_list) + alg_params['net_list'] = net_list2 + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') + c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=100, + gamma=0.9, + train_episodes=1000, + test_episodes=10, + save_interval=100, + update_itr=10, + n_workers=num_env + + ) + return alg_params, learn_params \ No newline at end of file diff --git a/rlzoo/algorithms/a3c/run_a3c.py b/rlzoo/algorithms/a3c/run_a3c.py old mode 100644 new mode 100755 index 4684a26..fcb75ba --- a/rlzoo/algorithms/a3c/run_a3c.py +++ b/rlzoo/algorithms/a3c/run_a3c.py @@ -1,67 +1,67 @@ -from rlzoo.algorithms.a3c.a3c import A3C -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -import gym - -""" load environment """ -env_id = 'BipedalWalker-v2' -env = gym.make(env_id).unwrapped -# env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run -action_shape = env.action_space.shape -state_shape = env.observation_space.shape -# reproducible -seed = 2 -np.random.seed(seed) -tf.random.set_seed(seed) -env.seed(seed) - -""" build networks for the algorithm """ -num_hidden_layer = 4 # number of hidden layers for the networks -hidden_dim = 64 # dimension of hidden layers for the networks -num_workers = 2 -net_list2 = [] -for i in range(num_workers + 1): - with tf.name_scope('A3C'): - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [actor, critic] - net_list2.append(net_list) - -""" choose optimizers """ -actor_lr, critic_lr = 5e-5, 1e-4 # learning rate -a_optimizer = tf.optimizers.RMSprop(actor_lr) -c_optimizer = tf.optimizers.RMSprop(critic_lr) -optimizers_list = [a_optimizer, c_optimizer] - -model = A3C(net_list2, optimizers_list, entropy_beta=0.005) -""" -full list of arguments for the algorithm ----------------------------------------- -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -entropy_beta: factor for entropy boosted exploration -""" - -env_list = [] -for i in range(num_workers): - env_list.append(gym.make(env_id).unwrapped) -model.learn(env_list, train_episodes=20000, test_episodes=100, max_steps=20000, n_workers=num_workers, update_itr=10, - gamma=0.99, save_interval=500, mode='train') -""" -full list of parameters for training ---------------------------------------- -env_list: a list of same learning environments -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -n_workers: manually set number of workers -update_itr: update global policy after several episodes -gamma: reward discount factor -save_interval: timesteps for saving the weights and plotting the results -mode: train or test -""" -# test -model.learn(env_list, test_episodes=100, max_steps=20000, mode='test', render=True) +from rlzoo.algorithms.a3c.a3c import A3C +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +import gym + +""" load environment """ +env_id = 'BipedalWalker-v2' +env = gym.make(env_id).unwrapped +# env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run +action_shape = env.action_space.shape +state_shape = env.observation_space.shape +# reproducible +seed = 2 +np.random.seed(seed) +tf.random.set_seed(seed) +env.seed(seed) + +""" build networks for the algorithm """ +num_hidden_layer = 4 # number of hidden layers for the networks +hidden_dim = 64 # dimension of hidden layers for the networks +num_workers = 2 +net_list2 = [] +for i in range(num_workers + 1): + with tf.name_scope('A3C'): + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [actor, critic] + net_list2.append(net_list) + +""" choose optimizers """ +actor_lr, critic_lr = 5e-5, 1e-4 # learning rate +a_optimizer = tf.optimizers.RMSprop(actor_lr) +c_optimizer = tf.optimizers.RMSprop(critic_lr) +optimizers_list = [a_optimizer, c_optimizer] + +model = A3C(net_list2, optimizers_list, entropy_beta=0.005) +""" +full list of arguments for the algorithm +---------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +entropy_beta: factor for entropy boosted exploration +""" + +env_list = [] +for i in range(num_workers): + env_list.append(gym.make(env_id).unwrapped) +model.learn(env_list, train_episodes=20000, test_episodes=100, max_steps=20000, n_workers=num_workers, update_itr=10, + gamma=0.99, save_interval=500, mode='train') +""" +full list of parameters for training +--------------------------------------- +env_list: a list of same learning environments +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +n_workers: manually set number of workers +update_itr: update global policy after several episodes +gamma: reward discount factor +save_interval: timesteps for saving the weights and plotting the results +mode: train or test +""" +# test +model.learn(env_list, test_episodes=100, max_steps=20000, mode='test', render=True) diff --git a/rlzoo/algorithms/ac/__init__.py b/rlzoo/algorithms/ac/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/ac/ac.py b/rlzoo/algorithms/ac/ac.py old mode 100644 new mode 100755 index 7fc1027..bf56db8 --- a/rlzoo/algorithms/ac/ac.py +++ b/rlzoo/algorithms/ac/ac.py @@ -1,187 +1,187 @@ -""" -Actor-Critic -------------- -It uses TD-error as the Advantage. - -Actor Critic History ----------------------- -A3C > DDPG > AC - -Advantage ----------- -AC converge faster than Policy Gradient. - -Disadvantage (IMPORTANT) ------------------------- -The Policy is oscillated (difficult to converge), DDPG can solve -this problem using advantage of DQN. - -Reference ----------- -paper: https://papers.nips.cc/paper/1786-actor-critic-algorithms.pdf -View more on MorvanZhou's tutorial page: https://morvanzhou.github.io/tutorials/ -MorvanZhou's code: https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/ - -Environment ------------- -CartPole-v0: https://gym.openai.com/envs/CartPole-v0 - -A pole is attached by an un-actuated joint to a cart, which moves along a -frictionless track. The system is controlled by applying a force of +1 or -1 -to the cart. The pendulum starts upright, and the goal is to prevent it from -falling over. - -A reward of +1 is provided for every timestep that the pole remains upright. -The episode ends when the pole is more than 15 degrees from vertical, or the -cart moves more than 2.4 units from the center. - - -Prerequisites --------------- -tensorflow >=2.0.0a0 -tensorlayer >=2.0.0 - -""" -import time - -import tensorlayer as tl - -from rlzoo.common.utils import * -from rlzoo.common.value_networks import * -from rlzoo.common.policy_networks import * - -tl.logging.set_verbosity(tl.logging.DEBUG) - - -############################### Actor-Critic #################################### -class AC: - def __init__(self, net_list, optimizers_list, gamma=0.9): - assert len(net_list) == 2 - assert len(optimizers_list) == 2 - self.name = 'AC' - self.actor, self.critic = net_list - assert isinstance(self.critic, ValueNetwork) - assert isinstance(self.actor, StochasticPolicyNetwork) - self.a_optimizer, self.c_optimizer = optimizers_list - self.GAMMA = gamma - - def update(self, s, a, r, s_): - # critic update - v_ = self.critic(np.array([s_])) - with tf.GradientTape() as tape: - v = self.critic(np.array([s])) - td_error = r + self.GAMMA * v_ - v # TD_error = r + lambd * V(newS) - V(S) - loss = tf.square(td_error) - grad = tape.gradient(loss, self.critic.trainable_weights) - self.c_optimizer.apply_gradients(zip(grad, self.critic.trainable_weights)) - - # actor update - with tf.GradientTape() as tape: - # _logits = self.actor(np.array([s])) - ## cross-entropy loss weighted by td-error (advantage), - # the cross-entropy mearsures the difference of two probability distributions: the predicted logits and sampled action distribution, - # then weighted by the td-error: small difference of real and predict actions for large td-error (advantage); and vice versa. - - _ = self.actor(np.array([s])) - neg_log_prob = self.actor.policy_dist.neglogp([a]) - _exp_v = tf.reduce_mean(neg_log_prob * td_error) - grad = tape.gradient(_exp_v, self.actor.trainable_weights) - self.a_optimizer.apply_gradients(zip(grad, self.actor.trainable_weights)) - return _exp_v - - def get_action(self, s): - return self.actor(np.array([s]))[0].numpy() - - def get_action_greedy(self, s): - return self.actor(np.array([s]), greedy=True)[0].numpy() - - def save_ckpt(self, env_name): # save trained weights - save_model(self.actor, 'model_actor', self.name, env_name) - save_model(self.critic, 'model_critic', self.name, env_name) - - def load_ckpt(self, env_name): # load trained weights - load_model(self.actor, 'model_actor', self.name, env_name) - load_model(self.critic, 'model_critic', self.name, env_name) - - def learn(self, env, train_episodes=1000, test_episodes=500, max_steps=200, - save_interval=100, mode='train', render=False, plot_func=None): - """ - :param env: learning environment - :param train_episodes: total number of episodes for training - :param test_episodes: total number of episodes for testing - :param max_steps: maximum number of steps for one episode - :param save_interval: time steps for saving the weights and plotting the results - :param mode: 'train' or 'test' - :param render: if true, visualize the environment - :param plot_func: additional function for interactive module - """ - - t0 = time.time() - if mode == 'train': - print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - reward_buffer = [] - for i_episode in range(train_episodes): - s = env.reset() - ep_rs_sum = 0 # rewards of all steps - - for step in range(max_steps): - - if render: - env.render() - - a = self.get_action(s) - s_new, r, done, info = env.step(a) - ep_rs_sum += r - - try: - self.update(s, a, r, s_new) # learn Policy : true_gradient = grad[logPi(s, a) * td_error] - except KeyboardInterrupt: # if Ctrl+C at running actor.learn(), then save model, or exit if not at actor.learn() - self.save_ckpt(env_name=env.spec.id) - plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) - - s = s_new - - if done: - break - - reward_buffer.append(ep_rs_sum) - if plot_func is not None: - plot_func(reward_buffer) - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ - .format(i_episode, train_episodes, ep_rs_sum, time.time() - t0)) - - if i_episode % save_interval == 0: - self.save_ckpt(env_name=env.spec.id) - plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) - - self.save_ckpt(env_name=env.spec.id) - plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) - - elif mode == 'test': - self.load_ckpt(env_name=env.spec.id) - print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - - reward_buffer = [] - for i_episode in range(test_episodes): - s = env.reset() - ep_rs_sum = 0 # rewards of all steps - for step in range(max_steps): - if render: env.render() - a = self.get_action_greedy(s) - s_new, r, done, info = env.step(a) - s_new = s_new - - ep_rs_sum += r - s = s_new - - if done: - break - - reward_buffer.append(ep_rs_sum) - if plot_func: - plot_func(reward_buffer) - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( - i_episode, test_episodes, ep_rs_sum, time.time() - t0)) - - elif mode is not 'test': - print('unknow mode type') +""" +Actor-Critic +------------- +It uses TD-error as the Advantage. + +Actor Critic History +---------------------- +A3C > DDPG > AC + +Advantage +---------- +AC converge faster than Policy Gradient. + +Disadvantage (IMPORTANT) +------------------------ +The Policy is oscillated (difficult to converge), DDPG can solve +this problem using advantage of DQN. + +Reference +---------- +paper: https://papers.nips.cc/paper/1786-actor-critic-algorithms.pdf +View more on MorvanZhou's tutorial page: https://morvanzhou.github.io/tutorials/ +MorvanZhou's code: https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/ + +Environment +------------ +CartPole-v0: https://gym.openai.com/envs/CartPole-v0 + +A pole is attached by an un-actuated joint to a cart, which moves along a +frictionless track. The system is controlled by applying a force of +1 or -1 +to the cart. The pendulum starts upright, and the goal is to prevent it from +falling over. + +A reward of +1 is provided for every timestep that the pole remains upright. +The episode ends when the pole is more than 15 degrees from vertical, or the +cart moves more than 2.4 units from the center. + + +Prerequisites +-------------- +tensorflow >=2.0.0a0 +tensorlayer >=2.0.0 + +""" +import time + +import tensorlayer as tl + +from rlzoo.common.utils import * +from rlzoo.common.value_networks import * +from rlzoo.common.policy_networks import * + +tl.logging.set_verbosity(tl.logging.DEBUG) + + +############################### Actor-Critic #################################### +class AC: + def __init__(self, net_list, optimizers_list, gamma=0.9): + assert len(net_list) == 2 + assert len(optimizers_list) == 2 + self.name = 'AC' + self.actor, self.critic = net_list + assert isinstance(self.critic, ValueNetwork) + assert isinstance(self.actor, StochasticPolicyNetwork) + self.a_optimizer, self.c_optimizer = optimizers_list + self.GAMMA = gamma + + def update(self, s, a, r, s_): + # critic update + v_ = self.critic(np.array([s_])) + with tf.GradientTape() as tape: + v = self.critic(np.array([s])) + td_error = r + self.GAMMA * v_ - v # TD_error = r + lambd * V(newS) - V(S) + loss = tf.square(td_error) + grad = tape.gradient(loss, self.critic.trainable_weights) + self.c_optimizer.apply_gradients(zip(grad, self.critic.trainable_weights)) + + # actor update + with tf.GradientTape() as tape: + # _logits = self.actor(np.array([s])) + ## cross-entropy loss weighted by td-error (advantage), + # the cross-entropy mearsures the difference of two probability distributions: the predicted logits and sampled action distribution, + # then weighted by the td-error: small difference of real and predict actions for large td-error (advantage); and vice versa. + + _ = self.actor(np.array([s])) + neg_log_prob = self.actor.policy_dist.neglogp([a]) + _exp_v = tf.reduce_mean(neg_log_prob * td_error) + grad = tape.gradient(_exp_v, self.actor.trainable_weights) + self.a_optimizer.apply_gradients(zip(grad, self.actor.trainable_weights)) + return _exp_v + + def get_action(self, s): + return self.actor(np.array([s]))[0].numpy() + + def get_action_greedy(self, s): + return self.actor(np.array([s]), greedy=True)[0].numpy() + + def save_ckpt(self, env_name): # save trained weights + save_model(self.actor, 'model_actor', self.name, env_name) + save_model(self.critic, 'model_critic', self.name, env_name) + + def load_ckpt(self, env_name): # load trained weights + load_model(self.actor, 'model_actor', self.name, env_name) + load_model(self.critic, 'model_critic', self.name, env_name) + + def learn(self, env, train_episodes=1000, test_episodes=500, max_steps=200, + save_interval=100, mode='train', render=False, plot_func=None): + """ + :param env: learning environment + :param train_episodes: total number of episodes for training + :param test_episodes: total number of episodes for testing + :param max_steps: maximum number of steps for one episode + :param save_interval: time steps for saving the weights and plotting the results + :param mode: 'train' or 'test' + :param render: if true, visualize the environment + :param plot_func: additional function for interactive module + """ + + t0 = time.time() + if mode == 'train': + print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + reward_buffer = [] + for i_episode in range(train_episodes): + s = env.reset() + ep_rs_sum = 0 # rewards of all steps + + for step in range(max_steps): + + if render: + env.render() + + a = self.get_action(s) + s_new, r, done, info = env.step(a) + ep_rs_sum += r + + try: + self.update(s, a, r, s_new) # learn Policy : true_gradient = grad[logPi(s, a) * td_error] + except KeyboardInterrupt: # if Ctrl+C at running actor.learn(), then save model, or exit if not at actor.learn() + self.save_ckpt(env_name=env.spec.id) + plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) + + s = s_new + + if done: + break + + reward_buffer.append(ep_rs_sum) + if plot_func is not None: + plot_func(reward_buffer) + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ + .format(i_episode, train_episodes, ep_rs_sum, time.time() - t0)) + + if i_episode % save_interval == 0: + self.save_ckpt(env_name=env.spec.id) + plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) + + self.save_ckpt(env_name=env.spec.id) + plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) + + elif mode == 'test': + self.load_ckpt(env_name=env.spec.id) + print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + + reward_buffer = [] + for i_episode in range(test_episodes): + s = env.reset() + ep_rs_sum = 0 # rewards of all steps + for step in range(max_steps): + if render: env.render() + a = self.get_action_greedy(s) + s_new, r, done, info = env.step(a) + s_new = s_new + + ep_rs_sum += r + s = s_new + + if done: + break + + reward_buffer.append(ep_rs_sum) + if plot_func: + plot_func(reward_buffer) + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( + i_episode, test_episodes, ep_rs_sum, time.time() - t0)) + + elif mode is not 'test': + print('unknow mode type') diff --git a/rlzoo/algorithms/ac/default.py b/rlzoo/algorithms/ac/default.py old mode 100644 new mode 100755 index 7b56efb..3ea0ce2 --- a/rlzoo/algorithms/ac/default.py +++ b/rlzoo/algorithms/ac/default.py @@ -1,288 +1,288 @@ -import tensorflow as tf -import tensorlayer as tl - -from rlzoo.common import math_utils -from rlzoo.common.value_networks import * -from rlzoo.common.policy_networks import * -from gym import spaces -from rlzoo.common.utils import set_seed - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -gamma: discounted factor of reward -action_range: scale of action values ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -env: learning environment -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving the weights and plotting the results -mode: 'train' or 'test' -render: if true, visualize the environment ------------------------------------------------- -""" - - -def atari(env, default_seed=True): - if default_seed: - seed = 1 - set_seed(seed, env) # reproducible - - alg_params = dict( - gamma=0.9, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=tf.nn.tanh) - net_list = [actor, critic] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.Adam(a_lr) - c_optimizer = tf.optimizers.Adam(c_lr) - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=200, - train_episodes=500, - test_episodes=100, - save_interval=50, - ) - - return alg_params, learn_params - - -def classic_control(env, default_seed=True): - if default_seed: - seed = 1 - set_seed(seed, env) # reproducible - - alg_params = dict( - gamma=0.9, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=tf.nn.tanh) - net_list = [actor, critic] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.Adam(a_lr) - c_optimizer = tf.optimizers.Adam(c_lr) - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=200, - train_episodes=500, - test_episodes=100, - save_interval=50, - ) - - return alg_params, learn_params - - -def box2d(env, default_seed=True): - if default_seed: - seed = 1 - set_seed(seed, env) # reproducible - - alg_params = dict( - gamma=0.9, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=tf.nn.tanh) - net_list = [actor, critic] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.Adam(a_lr) - c_optimizer = tf.optimizers.Adam(c_lr) - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=200, - train_episodes=500, - test_episodes=100, - save_interval=50, - ) - - return alg_params, learn_params - - -def mujoco(env, default_seed=True): - if default_seed: - seed = 1 - set_seed(seed, env) # reproducible - - alg_params = dict( - gamma=0.9, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=tf.nn.tanh) - net_list = [actor, critic] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.Adam(a_lr) - c_optimizer = tf.optimizers.Adam(c_lr) - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=200, - train_episodes=500, - test_episodes=100, - save_interval=50, - ) - - return alg_params, learn_params - - -def robotics(env, default_seed=True): - if default_seed: - seed = 1 - set_seed(seed, env) # reproducible - - alg_params = dict( - gamma=0.9, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=tf.nn.tanh) - net_list = [actor, critic] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.Adam(a_lr) - c_optimizer = tf.optimizers.Adam(c_lr) - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=200, - train_episodes=500, - test_episodes=100, - save_interval=50, - ) - - return alg_params, learn_params - - -def dm_control(env, default_seed=True): - if default_seed: - seed = 1 - set_seed(seed, env) # reproducible - - alg_params = dict( - gamma=0.9, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=tf.nn.tanh) - net_list = [actor, critic] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.Adam(a_lr) - c_optimizer = tf.optimizers.Adam(c_lr) - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=200, - train_episodes=500, - test_episodes=100, - save_interval=50, - ) - - return alg_params, learn_params - - -def rlbench(env, default_seed=True): - if default_seed: - seed = 1 - set_seed(seed, env) # reproducible - - alg_params = dict( - gamma=0.9, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=tf.nn.tanh) - net_list = [actor, critic] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic - a_optimizer = tf.optimizers.Adam(a_lr) - c_optimizer = tf.optimizers.Adam(c_lr) - optimizers_list = [a_optimizer, c_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=200, - train_episodes=500, - test_episodes=100, - save_interval=50, - ) - - return alg_params, learn_params +import tensorflow as tf +import tensorlayer as tl + +from rlzoo.common import math_utils +from rlzoo.common.value_networks import * +from rlzoo.common.policy_networks import * +from gym import spaces +from rlzoo.common.utils import set_seed + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +gamma: discounted factor of reward +action_range: scale of action values +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +env: learning environment +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving the weights and plotting the results +mode: 'train' or 'test' +render: if true, visualize the environment +------------------------------------------------ +""" + + +def atari(env, default_seed=True): + if default_seed: + seed = 1 + set_seed(seed, env) # reproducible + + alg_params = dict( + gamma=0.9, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=tf.nn.tanh) + net_list = [actor, critic] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.Adam(a_lr) + c_optimizer = tf.optimizers.Adam(c_lr) + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=200, + train_episodes=500, + test_episodes=100, + save_interval=50, + ) + + return alg_params, learn_params + + +def classic_control(env, default_seed=True): + if default_seed: + seed = 1 + set_seed(seed, env) # reproducible + + alg_params = dict( + gamma=0.9, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=tf.nn.tanh) + net_list = [actor, critic] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.Adam(a_lr) + c_optimizer = tf.optimizers.Adam(c_lr) + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=200, + train_episodes=500, + test_episodes=100, + save_interval=50, + ) + + return alg_params, learn_params + + +def box2d(env, default_seed=True): + if default_seed: + seed = 1 + set_seed(seed, env) # reproducible + + alg_params = dict( + gamma=0.9, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=tf.nn.tanh) + net_list = [actor, critic] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.Adam(a_lr) + c_optimizer = tf.optimizers.Adam(c_lr) + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=200, + train_episodes=500, + test_episodes=100, + save_interval=50, + ) + + return alg_params, learn_params + + +def mujoco(env, default_seed=True): + if default_seed: + seed = 1 + set_seed(seed, env) # reproducible + + alg_params = dict( + gamma=0.9, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=tf.nn.tanh) + net_list = [actor, critic] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.Adam(a_lr) + c_optimizer = tf.optimizers.Adam(c_lr) + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=200, + train_episodes=500, + test_episodes=100, + save_interval=50, + ) + + return alg_params, learn_params + + +def robotics(env, default_seed=True): + if default_seed: + seed = 1 + set_seed(seed, env) # reproducible + + alg_params = dict( + gamma=0.9, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=tf.nn.tanh) + net_list = [actor, critic] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.Adam(a_lr) + c_optimizer = tf.optimizers.Adam(c_lr) + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=200, + train_episodes=500, + test_episodes=100, + save_interval=50, + ) + + return alg_params, learn_params + + +def dm_control(env, default_seed=True): + if default_seed: + seed = 1 + set_seed(seed, env) # reproducible + + alg_params = dict( + gamma=0.9, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=tf.nn.tanh) + net_list = [actor, critic] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.Adam(a_lr) + c_optimizer = tf.optimizers.Adam(c_lr) + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=200, + train_episodes=500, + test_episodes=100, + save_interval=50, + ) + + return alg_params, learn_params + + +def rlbench(env, default_seed=True): + if default_seed: + seed = 1 + set_seed(seed, env) # reproducible + + alg_params = dict( + gamma=0.9, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=tf.nn.tanh) + net_list = [actor, critic] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic + a_optimizer = tf.optimizers.Adam(a_lr) + c_optimizer = tf.optimizers.Adam(c_lr) + optimizers_list = [a_optimizer, c_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=200, + train_episodes=500, + test_episodes=100, + save_interval=50, + ) + + return alg_params, learn_params diff --git a/rlzoo/algorithms/ac/run_ac.py b/rlzoo/algorithms/ac/run_ac.py old mode 100644 new mode 100755 index 2162fc5..bace465 --- a/rlzoo/algorithms/ac/run_ac.py +++ b/rlzoo/algorithms/ac/run_ac.py @@ -1,59 +1,59 @@ -from rlzoo.common.utils import set_seed -from rlzoo.algorithms.ac.ac import AC -from rlzoo.common.value_networks import * -from rlzoo.common.policy_networks import * -import gym - -""" load environment """ -# env = gym.make('CartPole-v0').unwrapped -env = gym.make('Pendulum-v0').unwrapped -obs_space = env.observation_space -act_space = env.action_space -# reproducible -seed = 1 -set_seed(seed, env) - -# env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run - - -""" build networks for the algorithm """ -num_hidden_layer = 2 # number of hidden layers for the networks -hidden_dim = 64 # dimension of hidden layers for the networks -with tf.name_scope('AC'): - with tf.name_scope('Critic'): - critic = ValueNetwork(obs_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Actor'): - actor = StochasticPolicyNetwork(obs_space, act_space, hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=tf.nn.tanh) -net_list = [actor, critic] - -""" choose optimizers """ -a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic -a_optimizer = tf.optimizers.Adam(a_lr) -c_optimizer = tf.optimizers.Adam(c_lr) -optimizers_list = [a_optimizer, c_optimizer] - -model = AC(net_list, optimizers_list) -""" -full list of arguments for the algorithm ----------------------------------------- -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -gamma: discounted factor of reward -action_range: scale of action values -""" - -model.learn(env, train_episodes=500, max_steps=200, - save_interval=50, mode='train', render=False) -""" -full list of parameters for training ---------------------------------------- -env: learning environment -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving the weights and plotting the results -mode: 'train' or 'test' -render: if true, visualize the environment -""" -model.learn(env, test_episodes=100, max_steps=200, mode='test', render=True) +from rlzoo.common.utils import set_seed +from rlzoo.algorithms.ac.ac import AC +from rlzoo.common.value_networks import * +from rlzoo.common.policy_networks import * +import gym + +""" load environment """ +# env = gym.make('CartPole-v0').unwrapped +env = gym.make('Pendulum-v0').unwrapped +obs_space = env.observation_space +act_space = env.action_space +# reproducible +seed = 1 +set_seed(seed, env) + +# env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run + + +""" build networks for the algorithm """ +num_hidden_layer = 2 # number of hidden layers for the networks +hidden_dim = 64 # dimension of hidden layers for the networks +with tf.name_scope('AC'): + with tf.name_scope('Critic'): + critic = ValueNetwork(obs_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Actor'): + actor = StochasticPolicyNetwork(obs_space, act_space, hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=tf.nn.tanh) +net_list = [actor, critic] + +""" choose optimizers """ +a_lr, c_lr = 1e-4, 2e-4 # a_lr: learning rate of the actor; c_lr: learning rate of the critic +a_optimizer = tf.optimizers.Adam(a_lr) +c_optimizer = tf.optimizers.Adam(c_lr) +optimizers_list = [a_optimizer, c_optimizer] + +model = AC(net_list, optimizers_list) +""" +full list of arguments for the algorithm +---------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +gamma: discounted factor of reward +action_range: scale of action values +""" + +model.learn(env, train_episodes=500, max_steps=200, + save_interval=50, mode='train', render=False) +""" +full list of parameters for training +--------------------------------------- +env: learning environment +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving the weights and plotting the results +mode: 'train' or 'test' +render: if true, visualize the environment +""" +model.learn(env, test_episodes=100, max_steps=200, mode='test', render=True) diff --git a/rlzoo/algorithms/ddpg/__init__.py b/rlzoo/algorithms/ddpg/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/ddpg/ddpg.py b/rlzoo/algorithms/ddpg/ddpg.py old mode 100644 new mode 100755 index 569e98a..d5feb1c --- a/rlzoo/algorithms/ddpg/ddpg.py +++ b/rlzoo/algorithms/ddpg/ddpg.py @@ -1,275 +1,275 @@ -""" -Deep Deterministic Policy Gradient (DDPG) ------------------------------------------ -An algorithm concurrently learns a Q-function and a policy. -It uses off-policy data and the Bellman equation to learn the Q-function, -and uses the Q-function to learn the policy. -Reference ---------- -Deterministic Policy Gradient Algorithms, Silver et al. 2014 -Continuous Control With Deep Reinforcement Learning, Lillicrap et al. 2016 -MorvanZhou's tutorial page: https://morvanzhou.github.io/tutorials/ -MorvanZhou's code: https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/ - -Prerequisites -------------- -tensorflow >=2.0.0a0 -tensorflow-probability 0.6.0 -tensorlayer >=2.0.0 -""" - -import time - -from rlzoo.common.utils import * -from rlzoo.common.buffer import * -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * - - -############################### DDPG #################################### - - -class DDPG(object): - """ - DDPG class - """ - - def __init__(self, net_list, optimizers_list, replay_buffer_size, action_range=1., tau=0.01): - """ - :param net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization - :param optimizers_list: a list of optimizers for all networks and differentiable variables - :param replay_buffer_size: the size of buffer for storing explored samples - :param tau: soft update factor - """ - assert len(net_list) == 4 - assert len(optimizers_list) == 2 - self.name = 'DDPG' - - self.critic, self.critic_target, self.actor, self.actor_target = net_list - - assert isinstance(self.critic, QNetwork) - assert isinstance(self.critic_target, QNetwork) - assert isinstance(self.actor, DeterministicPolicyNetwork) - assert isinstance(self.actor_target, DeterministicPolicyNetwork) - assert isinstance(self.actor.action_space, gym.spaces.Box) - - def copy_para(from_model, to_model): - for i, j in zip(from_model.trainable_weights, to_model.trainable_weights): - j.assign(i) - - copy_para(self.actor, self.actor_target) - copy_para(self.critic, self.critic_target) - - self.replay_buffer_size = replay_buffer_size - self.buffer = ReplayBuffer(replay_buffer_size) - - self.ema = tf.train.ExponentialMovingAverage(decay=1 - tau) # soft replacement - self.action_range = action_range - - self.critic_opt, self.actor_opt = optimizers_list - - def ema_update(self): - """ - Soft updating by exponential smoothing - - :return: None - """ - paras = self.actor.trainable_weights + self.critic.trainable_weights - self.ema.apply(paras) - for i, j in zip(self.actor_target.trainable_weights + self.critic_target.trainable_weights, paras): - i.assign(self.ema.average(j)) - - def sample_action(self): - """ generate random actions for exploration """ - a = tf.random.uniform(self.actor.action_space.shape, self.actor.action_space.low, self.actor.action_space.high) - return a - - def get_action(self, s, noise_scale): - """ - Choose action with exploration - - :param s: state - - :return: action - """ - a = self.actor([s])[0].numpy()*self.action_range - - # add randomness to action selection for exploration - noise = np.random.normal(0, 1, a.shape) * noise_scale - a += noise - a = np.clip(a, self.actor.action_space.low, self.actor.action_space.high) - - return a - - def get_action_greedy(self, s): - """ - Choose action - - :param s: state - - :return: action - """ - return self.actor([s])[0].numpy()*self.action_range - - def update(self, batch_size, gamma): - """ - Update parameters - - :param batch_size: update batch size - :param gamma: reward decay factor - - :return: - """ - bs, ba, br, bs_, bd = self.buffer.sample(batch_size) - - ba_ = self.actor_target(bs_)*self.action_range - - q_ = self.critic_target([bs_, ba_]) - y = br + (1 - bd) * gamma * q_ - with tf.GradientTape() as tape: - q = self.critic([bs, ba]) - td_error = tf.losses.mean_squared_error(y, q) - c_grads = tape.gradient(td_error, self.critic.trainable_weights) - self.critic_opt.apply_gradients(zip(c_grads, self.critic.trainable_weights)) - - with tf.GradientTape() as tape: - a = self.actor(bs)*self.action_range - q = self.critic([bs, a]) - a_loss = - tf.reduce_mean(q) # maximize the q - a_grads = tape.gradient(a_loss, self.actor.trainable_weights) - self.actor_opt.apply_gradients(zip(a_grads, self.actor.trainable_weights)) - self.ema_update() - - def store_transition(self, s, a, r, s_, d): - """ - Store data in data buffer - - :param s: state - :param a: act - :param r: reward - :param s_: next state - - :return: None - """ - d = 1 if d else 0 - - self.buffer.push(s, a, [r], s_, d) - - def save_ckpt(self, env_name): - """ - save trained weights - - :return: None - """ - save_model(self.actor, 'model_policy_net', self.name, env_name) - save_model(self.actor_target, 'model_target_policy_net', self.name, env_name) - save_model(self.critic, 'model_q_net', self.name, env_name) - save_model(self.critic_target, 'model_target_q_net', self.name, env_name) - - def load_ckpt(self, env_name): - """ - load trained weights - - :return: None - """ - load_model(self.actor, 'model_policy_net', self.name, env_name) - load_model(self.actor_target, 'model_target_policy_net', self.name, env_name) - load_model(self.critic, 'model_q_net', self.name, env_name) - load_model(self.critic_target, 'model_target_q_net', self.name, env_name) - - def learn(self, env, train_episodes=200, test_episodes=100, max_steps=200, save_interval=10, explore_steps=500, - mode='train', render=False, batch_size=32, gamma=0.9, noise_scale=1., noise_scale_decay=0.995, - plot_func=None): - """ - learn function - - :param env: learning environment - :param train_episodes: total number of episodes for training - :param test_episodes: total number of episodes for testing - :param max_steps: maximum number of steps for one episode - :param save_interval: time steps for saving - :param explore_steps: for random action sampling in the beginning of training - :param mode: train or test mode - :param render: render each step - :param batch_size: update batch size - :param gamma: reward decay factor - :param noise_scale: range of action noise for exploration - :param noise_scale_decay: noise scale decay factor - :param plot_func: additional function for interactive module - :return: None - """ - - t0 = time.time() - - if mode == 'train': # train - print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - reward_buffer = [] - frame_idx = 0 - for i in range(1, train_episodes + 1): - s = env.reset() - ep_reward = 0 - - for j in range(max_steps): - if render: - env.render() - # Add exploration noise - if frame_idx > explore_steps: - a = self.get_action(s, noise_scale) - else: - a = self.sample_action() - frame_idx += 1 - - s_, r, done, info = env.step(a) - - self.store_transition(s, a, r, s_, done) - if len(self.buffer) >= self.replay_buffer_size: - self.update(batch_size, gamma) - noise_scale *= noise_scale_decay - s = s_ - ep_reward += r - - if done: - break - - print( - 'Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( - i, train_episodes, ep_reward, - time.time() - t0 - ) - ) - - reward_buffer.append(ep_reward) - if plot_func is not None: - plot_func(reward_buffer) - if i and not i % save_interval: - self.save_ckpt(env_name=env.spec.id) - plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) - - self.save_ckpt(env_name=env.spec.id) - plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) - - # test - elif mode == 'test': - self.load_ckpt(env_name=env.spec.id) - print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - reward_buffer = [] - for eps in range(1, test_episodes+1): - ep_rs_sum = 0 - s = env.reset() - for step in range(max_steps): - if render: - env.render() - action = self.get_action_greedy(s) - s, reward, done, info = env.step(action) - ep_rs_sum += reward - if done: - break - - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( - eps, test_episodes, ep_rs_sum, time.time() - t0) - ) - reward_buffer.append(ep_rs_sum) - if plot_func: - plot_func(reward_buffer) - else: +""" +Deep Deterministic Policy Gradient (DDPG) +----------------------------------------- +An algorithm concurrently learns a Q-function and a policy. +It uses off-policy data and the Bellman equation to learn the Q-function, +and uses the Q-function to learn the policy. +Reference +--------- +Deterministic Policy Gradient Algorithms, Silver et al. 2014 +Continuous Control With Deep Reinforcement Learning, Lillicrap et al. 2016 +MorvanZhou's tutorial page: https://morvanzhou.github.io/tutorials/ +MorvanZhou's code: https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/ + +Prerequisites +------------- +tensorflow >=2.0.0a0 +tensorflow-probability 0.6.0 +tensorlayer >=2.0.0 +""" + +import time + +from rlzoo.common.utils import * +from rlzoo.common.buffer import * +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * + + +############################### DDPG #################################### + + +class DDPG(object): + """ + DDPG class + """ + + def __init__(self, net_list, optimizers_list, replay_buffer_size, action_range=1., tau=0.01): + """ + :param net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization + :param optimizers_list: a list of optimizers for all networks and differentiable variables + :param replay_buffer_size: the size of buffer for storing explored samples + :param tau: soft update factor + """ + assert len(net_list) == 4 + assert len(optimizers_list) == 2 + self.name = 'DDPG' + + self.critic, self.critic_target, self.actor, self.actor_target = net_list + + assert isinstance(self.critic, QNetwork) + assert isinstance(self.critic_target, QNetwork) + assert isinstance(self.actor, DeterministicPolicyNetwork) + assert isinstance(self.actor_target, DeterministicPolicyNetwork) + assert isinstance(self.actor.action_space, gym.spaces.Box) + + def copy_para(from_model, to_model): + for i, j in zip(from_model.trainable_weights, to_model.trainable_weights): + j.assign(i) + + copy_para(self.actor, self.actor_target) + copy_para(self.critic, self.critic_target) + + self.replay_buffer_size = replay_buffer_size + self.buffer = ReplayBuffer(replay_buffer_size) + + self.ema = tf.train.ExponentialMovingAverage(decay=1 - tau) # soft replacement + self.action_range = action_range + + self.critic_opt, self.actor_opt = optimizers_list + + def ema_update(self): + """ + Soft updating by exponential smoothing + + :return: None + """ + paras = self.actor.trainable_weights + self.critic.trainable_weights + self.ema.apply(paras) + for i, j in zip(self.actor_target.trainable_weights + self.critic_target.trainable_weights, paras): + i.assign(self.ema.average(j)) + + def sample_action(self): + """ generate random actions for exploration """ + a = tf.random.uniform(self.actor.action_space.shape, self.actor.action_space.low, self.actor.action_space.high) + return a + + def get_action(self, s, noise_scale): + """ + Choose action with exploration + + :param s: state + + :return: action + """ + a = self.actor([s])[0].numpy()*self.action_range + + # add randomness to action selection for exploration + noise = np.random.normal(0, 1, a.shape) * noise_scale + a += noise + a = np.clip(a, self.actor.action_space.low, self.actor.action_space.high) + + return a + + def get_action_greedy(self, s): + """ + Choose action + + :param s: state + + :return: action + """ + return self.actor([s])[0].numpy()*self.action_range + + def update(self, batch_size, gamma): + """ + Update parameters + + :param batch_size: update batch size + :param gamma: reward decay factor + + :return: + """ + bs, ba, br, bs_, bd = self.buffer.sample(batch_size) + + ba_ = self.actor_target(bs_)*self.action_range + + q_ = self.critic_target([bs_, ba_]) + y = br + (1 - bd) * gamma * q_ + with tf.GradientTape() as tape: + q = self.critic([bs, ba]) + td_error = tf.losses.mean_squared_error(y, q) + c_grads = tape.gradient(td_error, self.critic.trainable_weights) + self.critic_opt.apply_gradients(zip(c_grads, self.critic.trainable_weights)) + + with tf.GradientTape() as tape: + a = self.actor(bs)*self.action_range + q = self.critic([bs, a]) + a_loss = - tf.reduce_mean(q) # maximize the q + a_grads = tape.gradient(a_loss, self.actor.trainable_weights) + self.actor_opt.apply_gradients(zip(a_grads, self.actor.trainable_weights)) + self.ema_update() + + def store_transition(self, s, a, r, s_, d): + """ + Store data in data buffer + + :param s: state + :param a: act + :param r: reward + :param s_: next state + + :return: None + """ + d = 1 if d else 0 + + self.buffer.push(s, a, [r], s_, d) + + def save_ckpt(self, env_name): + """ + save trained weights + + :return: None + """ + save_model(self.actor, 'model_policy_net', self.name, env_name) + save_model(self.actor_target, 'model_target_policy_net', self.name, env_name) + save_model(self.critic, 'model_q_net', self.name, env_name) + save_model(self.critic_target, 'model_target_q_net', self.name, env_name) + + def load_ckpt(self, env_name): + """ + load trained weights + + :return: None + """ + load_model(self.actor, 'model_policy_net', self.name, env_name) + load_model(self.actor_target, 'model_target_policy_net', self.name, env_name) + load_model(self.critic, 'model_q_net', self.name, env_name) + load_model(self.critic_target, 'model_target_q_net', self.name, env_name) + + def learn(self, env, train_episodes=200, test_episodes=100, max_steps=200, save_interval=10, explore_steps=500, + mode='train', render=False, batch_size=32, gamma=0.9, noise_scale=1., noise_scale_decay=0.995, + plot_func=None): + """ + learn function + + :param env: learning environment + :param train_episodes: total number of episodes for training + :param test_episodes: total number of episodes for testing + :param max_steps: maximum number of steps for one episode + :param save_interval: time steps for saving + :param explore_steps: for random action sampling in the beginning of training + :param mode: train or test mode + :param render: render each step + :param batch_size: update batch size + :param gamma: reward decay factor + :param noise_scale: range of action noise for exploration + :param noise_scale_decay: noise scale decay factor + :param plot_func: additional function for interactive module + :return: None + """ + + t0 = time.time() + + if mode == 'train': # train + print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + reward_buffer = [] + frame_idx = 0 + for i in range(1, train_episodes + 1): + s = env.reset() + ep_reward = 0 + + for j in range(max_steps): + if render: + env.render() + # Add exploration noise + if frame_idx > explore_steps: + a = self.get_action(s, noise_scale) + else: + a = self.sample_action() + frame_idx += 1 + + s_, r, done, info = env.step(a) + + self.store_transition(s, a, r, s_, done) + if len(self.buffer) >= self.replay_buffer_size: + self.update(batch_size, gamma) + noise_scale *= noise_scale_decay + s = s_ + ep_reward += r + + if done: + break + + print( + 'Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( + i, train_episodes, ep_reward, + time.time() - t0 + ) + ) + + reward_buffer.append(ep_reward) + if plot_func is not None: + plot_func(reward_buffer) + if i and not i % save_interval: + self.save_ckpt(env_name=env.spec.id) + plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) + + self.save_ckpt(env_name=env.spec.id) + plot_save_log(reward_buffer, algorithm_name=self.name, env_name=env.spec.id) + + # test + elif mode == 'test': + self.load_ckpt(env_name=env.spec.id) + print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + reward_buffer = [] + for eps in range(1, test_episodes+1): + ep_rs_sum = 0 + s = env.reset() + for step in range(max_steps): + if render: + env.render() + action = self.get_action_greedy(s) + s, reward, done, info = env.step(action) + ep_rs_sum += reward + if done: + break + + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( + eps, test_episodes, ep_rs_sum, time.time() - t0) + ) + reward_buffer.append(ep_rs_sum) + if plot_func: + plot_func(reward_buffer) + else: print('unknown mode type') \ No newline at end of file diff --git a/rlzoo/algorithms/ddpg/default.py b/rlzoo/algorithms/ddpg/default.py old mode 100644 new mode 100755 index 7e79b1c..0a5cc0a --- a/rlzoo/algorithms/ddpg/default.py +++ b/rlzoo/algorithms/ddpg/default.py @@ -1,327 +1,327 @@ -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -from rlzoo.common.utils import set_seed - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -replay_buffer_size: the size of buffer for storing explored samples -tau: soft update factor ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving -explore_steps: for random action sampling in the beginning of training -mode: train or test mode -render: render each step -batch_size: update batch size -gamma: reward decay factor -noise_scale: range of action noise for exploration -noise_scale_decay: noise scale decay factor ------------------------------------------------ -""" - -def classic_control(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - replay_buffer_size=10000, - tau=0.01, - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DDPG'): - with tf.name_scope('Q_Net'): - q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net'): - target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - - net_list = [q_net, target_q_net, policy_net, target_policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-3 - critic_lr = 2e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=100, - test_episodes=10, - max_steps=200, - save_interval=10, - explore_steps=500, - batch_size=32, - gamma=0.9, - noise_scale=1., - noise_scale_decay=0.995 - ) - - return alg_params, learn_params - - -def box2d(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - replay_buffer_size=10000, - tau=0.01, - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DDPG'): - with tf.name_scope('Q_Net'): - q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net'): - target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - - net_list = [q_net, target_q_net, policy_net, target_policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-3 - critic_lr = 2e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=100, - test_episodes=10, - max_steps=200, - save_interval=10, - explore_steps=500, - batch_size=32, - gamma=0.9, - noise_scale=1., - noise_scale_decay=0.995 - ) - - return alg_params, learn_params - - -def mujoco(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - replay_buffer_size=10000, - tau=0.01, - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DDPG'): - with tf.name_scope('Q_Net'): - q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net'): - target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - - net_list = [q_net, target_q_net, policy_net, target_policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-3 - critic_lr = 2e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=100, - test_episodes=10, - max_steps=200, - save_interval=10, - explore_steps=500, - batch_size=32, - gamma=0.9, - noise_scale=1., - noise_scale_decay=0.995 - ) - - return alg_params, learn_params - - -def robotics(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - replay_buffer_size=10000, - tau=0.01, - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DDPG'): - with tf.name_scope('Q_Net'): - q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net'): - target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - - net_list = [q_net, target_q_net, policy_net, target_policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-3 - critic_lr = 2e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=100, - test_episodes=10, - max_steps=200, - save_interval=10, - explore_steps=500, - batch_size=32, - gamma=0.9, - noise_scale=1., - noise_scale_decay=0.995 - ) - - return alg_params, learn_params - - -def dm_control(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - replay_buffer_size=10000, - tau=0.01, - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DDPG'): - with tf.name_scope('Q_Net'): - q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net'): - target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - - net_list = [q_net, target_q_net, policy_net, target_policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-3 - critic_lr = 2e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=100, - test_episodes=10, - max_steps=200, - save_interval=10, - explore_steps=500, - batch_size=32, - gamma=0.9, - noise_scale=1., - noise_scale_decay=0.995 - ) - - return alg_params, learn_params - - -def rlbench(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - replay_buffer_size=1000, - tau=0.01, - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DDPG'): - with tf.name_scope('Q_Net'): - q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net'): - target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - - net_list = [q_net, target_q_net, policy_net, target_policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-3 - critic_lr = 2e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=100, - test_episodes=10, - max_steps=200, - save_interval=10, - explore_steps=500, - batch_size=32, - gamma=0.9, - noise_scale=1., - noise_scale_decay=0.995 - ) - +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +from rlzoo.common.utils import set_seed + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +replay_buffer_size: the size of buffer for storing explored samples +tau: soft update factor +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving +explore_steps: for random action sampling in the beginning of training +mode: train or test mode +render: render each step +batch_size: update batch size +gamma: reward decay factor +noise_scale: range of action noise for exploration +noise_scale_decay: noise scale decay factor +----------------------------------------------- +""" + +def classic_control(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + replay_buffer_size=10000, + tau=0.01, + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DDPG'): + with tf.name_scope('Q_Net'): + q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net'): + target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + + net_list = [q_net, target_q_net, policy_net, target_policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-3 + critic_lr = 2e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=100, + test_episodes=10, + max_steps=200, + save_interval=10, + explore_steps=500, + batch_size=32, + gamma=0.9, + noise_scale=1., + noise_scale_decay=0.995 + ) + + return alg_params, learn_params + + +def box2d(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + replay_buffer_size=10000, + tau=0.01, + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DDPG'): + with tf.name_scope('Q_Net'): + q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net'): + target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + + net_list = [q_net, target_q_net, policy_net, target_policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-3 + critic_lr = 2e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=100, + test_episodes=10, + max_steps=200, + save_interval=10, + explore_steps=500, + batch_size=32, + gamma=0.9, + noise_scale=1., + noise_scale_decay=0.995 + ) + + return alg_params, learn_params + + +def mujoco(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + replay_buffer_size=10000, + tau=0.01, + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DDPG'): + with tf.name_scope('Q_Net'): + q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net'): + target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + + net_list = [q_net, target_q_net, policy_net, target_policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-3 + critic_lr = 2e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=100, + test_episodes=10, + max_steps=200, + save_interval=10, + explore_steps=500, + batch_size=32, + gamma=0.9, + noise_scale=1., + noise_scale_decay=0.995 + ) + + return alg_params, learn_params + + +def robotics(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + replay_buffer_size=10000, + tau=0.01, + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DDPG'): + with tf.name_scope('Q_Net'): + q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net'): + target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + + net_list = [q_net, target_q_net, policy_net, target_policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-3 + critic_lr = 2e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=100, + test_episodes=10, + max_steps=200, + save_interval=10, + explore_steps=500, + batch_size=32, + gamma=0.9, + noise_scale=1., + noise_scale_decay=0.995 + ) + + return alg_params, learn_params + + +def dm_control(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + replay_buffer_size=10000, + tau=0.01, + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DDPG'): + with tf.name_scope('Q_Net'): + q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net'): + target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + + net_list = [q_net, target_q_net, policy_net, target_policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-3 + critic_lr = 2e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=100, + test_episodes=10, + max_steps=200, + save_interval=10, + explore_steps=500, + batch_size=32, + gamma=0.9, + noise_scale=1., + noise_scale_decay=0.995 + ) + + return alg_params, learn_params + + +def rlbench(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + replay_buffer_size=1000, + tau=0.01, + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DDPG'): + with tf.name_scope('Q_Net'): + q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net'): + target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + + net_list = [q_net, target_q_net, policy_net, target_policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-3 + critic_lr = 2e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=100, + test_episodes=10, + max_steps=200, + save_interval=10, + explore_steps=500, + batch_size=32, + gamma=0.9, + noise_scale=1., + noise_scale_decay=0.995 + ) + return alg_params, learn_params \ No newline at end of file diff --git a/rlzoo/algorithms/ddpg/run_ddpg.py b/rlzoo/algorithms/ddpg/run_ddpg.py old mode 100644 new mode 100755 index bc292dc..8723466 --- a/rlzoo/algorithms/ddpg/run_ddpg.py +++ b/rlzoo/algorithms/ddpg/run_ddpg.py @@ -1,66 +1,66 @@ -from rlzoo.common.utils import make_env, set_seed -from rlzoo.algorithms.ddpg.ddpg import DDPG -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -import gym - -""" load environment """ -env = gym.make('Pendulum-v0').unwrapped - -obs_space = env.observation_space -act_space = env.action_space - -# reproducible -seed = 2 -set_seed(seed, env) - -""" build networks for the algorithm """ -name = 'DDPG' -num_hidden_layer = 2 # number of hidden layers for the networks -hidden_dim = 64 # dimension of hidden layers for the networks - -actor = DeterministicPolicyNetwork(obs_space, act_space, [hidden_dim] * num_hidden_layer) -critic = QNetwork(obs_space, act_space, [hidden_dim] * num_hidden_layer) - -actor_target = DeterministicPolicyNetwork(obs_space, act_space, [hidden_dim] * num_hidden_layer, trainable=False) - -critic_target = QNetwork(obs_space, act_space, [hidden_dim] * num_hidden_layer, trainable=False) - -net_list = [critic, critic_target, actor, actor_target] - -""" create model """ -actor_lr = 1e-3 -critic_lr = 2e-3 -optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] -replay_buffer_size = 10000 -model = DDPG(net_list, optimizers_list, replay_buffer_size) -""" -full list of arguments for the algorithm ----------------------------------------- -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -replay_buffer_size: the size of buffer for storing explored samples -tau: soft update factor -""" - -model.learn(env, train_episodes=100, max_steps=200, save_interval=10, - mode='train', render=False, batch_size=32, gamma=0.9, noise_scale=1., noise_scale_decay=0.995) -""" -full list of parameters for training ---------------------------------------- -env: learning environment -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving -explore_steps: for random action sampling in the beginning of training -mode: train or test mode -render: render each step -batch_size: update batch size -gamma: reward decay factor -noise_scale: range of action noise for exploration -noise_scale_decay: noise scale decay factor -""" - -model.learn(env, test_episodes=10, max_steps=200, mode='test', render=True) - +from rlzoo.common.utils import make_env, set_seed +from rlzoo.algorithms.ddpg.ddpg import DDPG +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +import gym + +""" load environment """ +env = gym.make('Pendulum-v0').unwrapped + +obs_space = env.observation_space +act_space = env.action_space + +# reproducible +seed = 2 +set_seed(seed, env) + +""" build networks for the algorithm """ +name = 'DDPG' +num_hidden_layer = 2 # number of hidden layers for the networks +hidden_dim = 64 # dimension of hidden layers for the networks + +actor = DeterministicPolicyNetwork(obs_space, act_space, [hidden_dim] * num_hidden_layer) +critic = QNetwork(obs_space, act_space, [hidden_dim] * num_hidden_layer) + +actor_target = DeterministicPolicyNetwork(obs_space, act_space, [hidden_dim] * num_hidden_layer, trainable=False) + +critic_target = QNetwork(obs_space, act_space, [hidden_dim] * num_hidden_layer, trainable=False) + +net_list = [critic, critic_target, actor, actor_target] + +""" create model """ +actor_lr = 1e-3 +critic_lr = 2e-3 +optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] +replay_buffer_size = 10000 +model = DDPG(net_list, optimizers_list, replay_buffer_size) +""" +full list of arguments for the algorithm +---------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +replay_buffer_size: the size of buffer for storing explored samples +tau: soft update factor +""" + +model.learn(env, train_episodes=100, max_steps=200, save_interval=10, + mode='train', render=False, batch_size=32, gamma=0.9, noise_scale=1., noise_scale_decay=0.995) +""" +full list of parameters for training +--------------------------------------- +env: learning environment +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving +explore_steps: for random action sampling in the beginning of training +mode: train or test mode +render: render each step +batch_size: update batch size +gamma: reward decay factor +noise_scale: range of action noise for exploration +noise_scale_decay: noise scale decay factor +""" + +model.learn(env, test_episodes=10, max_steps=200, mode='test', render=True) + diff --git a/rlzoo/algorithms/dppo/__init__.py b/rlzoo/algorithms/dppo/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/dppo/default.py b/rlzoo/algorithms/dppo/default.py old mode 100644 new mode 100755 index 01a79d8..7db1693 --- a/rlzoo/algorithms/dppo/default.py +++ b/rlzoo/algorithms/dppo/default.py @@ -1,334 +1,334 @@ -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -from rlzoo.common.utils import set_seed - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -epsilon: clip parameter (for method 'clip') -kl_target: controls bounds of policy update and adaptive lambda (for method 'penalty') -lam: KL-regularization coefficient (for method 'penalty') ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving -gamma: reward discount factor -mode: train or test -batch_size: update batch size -a_update_steps: actor update iteration steps -c_update_steps: critic update iteration steps -n_worker: number of workers ------------------------------------------------ -""" - - -def atari(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5 # for method 'penalty' - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DPPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer) - - net_list = v_net, policy_net - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - a_update_steps=10, - c_update_steps=10, - n_workers=num_env, - batch_size=32) - - return alg_params, learn_params - - -def classic_control(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5 # for method 'penalty' - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DPPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer) - - net_list = v_net, policy_net - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - a_update_steps=10, - c_update_steps=10, - n_workers=num_env, - batch_size=32) - - return alg_params, learn_params - - -def box2d(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5 # for method 'penalty' - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DPPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer) - - net_list = v_net, policy_net - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - a_update_steps=10, - c_update_steps=10, - n_workers=num_env, - batch_size=32) - - return alg_params, learn_params - - -def mujoco(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5 # for method 'penalty' - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DPPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer) - - net_list = v_net, policy_net - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - a_update_steps=10, - c_update_steps=10, - n_workers=num_env, - batch_size=32) - - return alg_params, learn_params - - -def robotics(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5 # for method 'penalty' - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DPPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer) - - net_list = v_net, policy_net - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - a_update_steps=10, - c_update_steps=10, - n_workers=num_env, - batch_size=32) - - return alg_params, learn_params - - -def dm_control(env, default_seed=True): - if default_seed: - assert isinstance(env, list) - seed = np.arange(len(env)).tolist() # a list of seeds for each env - set_seed(seed, env) # reproducible - - # for multi-threading - if isinstance(env, list): # judge if multiple envs are passed in for parallel computing - num_env = len(env) # number of envs passed in - env = env[0] # take one of the env as they are all the same - else: - num_env = 1 - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5 # for method 'penalty' - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('DPPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer) - - net_list = v_net, policy_net - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - a_update_steps=10, - c_update_steps=10, - n_workers=num_env, - batch_size=32) - - return alg_params, learn_params +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +from rlzoo.common.utils import set_seed + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +epsilon: clip parameter (for method 'clip') +kl_target: controls bounds of policy update and adaptive lambda (for method 'penalty') +lam: KL-regularization coefficient (for method 'penalty') +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving +gamma: reward discount factor +mode: train or test +batch_size: update batch size +a_update_steps: actor update iteration steps +c_update_steps: critic update iteration steps +n_worker: number of workers +----------------------------------------------- +""" + + +def atari(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5 # for method 'penalty' + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DPPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer) + + net_list = v_net, policy_net + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + a_update_steps=10, + c_update_steps=10, + n_workers=num_env, + batch_size=32) + + return alg_params, learn_params + + +def classic_control(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5 # for method 'penalty' + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DPPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer) + + net_list = v_net, policy_net + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + a_update_steps=10, + c_update_steps=10, + n_workers=num_env, + batch_size=32) + + return alg_params, learn_params + + +def box2d(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5 # for method 'penalty' + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DPPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer) + + net_list = v_net, policy_net + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + a_update_steps=10, + c_update_steps=10, + n_workers=num_env, + batch_size=32) + + return alg_params, learn_params + + +def mujoco(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5 # for method 'penalty' + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DPPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer) + + net_list = v_net, policy_net + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + a_update_steps=10, + c_update_steps=10, + n_workers=num_env, + batch_size=32) + + return alg_params, learn_params + + +def robotics(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5 # for method 'penalty' + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DPPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer) + + net_list = v_net, policy_net + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + a_update_steps=10, + c_update_steps=10, + n_workers=num_env, + batch_size=32) + + return alg_params, learn_params + + +def dm_control(env, default_seed=True): + if default_seed: + assert isinstance(env, list) + seed = np.arange(len(env)).tolist() # a list of seeds for each env + set_seed(seed, env) # reproducible + + # for multi-threading + if isinstance(env, list): # judge if multiple envs are passed in for parallel computing + num_env = len(env) # number of envs passed in + env = env[0] # take one of the env as they are all the same + else: + num_env = 1 + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5 # for method 'penalty' + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('DPPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer) + + net_list = v_net, policy_net + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + a_update_steps=10, + c_update_steps=10, + n_workers=num_env, + batch_size=32) + + return alg_params, learn_params diff --git a/rlzoo/algorithms/dppo/dppo.py b/rlzoo/algorithms/dppo/dppo.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/dppo_clip/__init__.py b/rlzoo/algorithms/dppo_clip/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/dppo_clip/dppo_clip.py b/rlzoo/algorithms/dppo_clip/dppo_clip.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/dppo_clip/run_dppo_clip.py b/rlzoo/algorithms/dppo_clip/run_dppo_clip.py old mode 100644 new mode 100755 diff --git a/dppo_clip_distributed/__init__.py b/rlzoo/algorithms/dppo_clip_distributed/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from dppo_clip_distributed/__init__.py rename to rlzoo/algorithms/dppo_clip_distributed/__init__.py diff --git a/rlzoo/algorithms/dppo_clip_distributed/dppo_clip.py b/rlzoo/algorithms/dppo_clip_distributed/dppo_clip.py new file mode 100755 index 0000000..343073c --- /dev/null +++ b/rlzoo/algorithms/dppo_clip_distributed/dppo_clip.py @@ -0,0 +1,279 @@ +from rlzoo.common.policy_networks import StochasticPolicyNetwork +from rlzoo.common.value_networks import ValueNetwork +from rlzoo.common.utils import * +import tensorflow as tf +import numpy as np +import copy +import pickle + + +def write_log(text: str): + pass + # print('infer server: '+text) + # with open('infer_server_log.txt', 'a') as f: + # f.write(str(text) + '\n') + + +EPS = 1e-8 + + +class RLAlgorithm: + def __init__(self): + self.state_buffer = [] # shape: (None, [n_env], [state_shape]) + self.action_buffer = [] + self.reward_buffer = [] + self.done_buffer = [] + self.next_state_buffer = [] + self.logp_buffer = [] + self.all_buffer = self.state_buffer, self.action_buffer, self.reward_buffer, self.done_buffer, \ + self.next_state_buffer, self.logp_buffer + self.traj_list = [] + self.gamma = 0.9 + self.name = 'NotNamed' + + @property + def all_weights(self): + raise NotImplementedError + + def update_model(self, params): + raise NotImplementedError + + def _get_value(self, batch_state): + """ + return: value: tf.Tensor + """ + raise NotImplementedError + + def _get_action(self, batch_state): + """ + return: action: tf.Tensor, log_p: tf.Tensor + """ + raise NotImplementedError + + @property + def logp_shape(self): + raise NotImplementedError + + def save_ckpt(self, env_name): + """ + save trained weights + + :return: None + """ + raise NotImplementedError + + def plot_save_log(self, running_reward, env_name): + plot_save_log(running_reward, algorithm_name=self.name, env_name=env_name) + + def collect_data(self, s, a, r, d, s_, log_p, batch_data=False): + if not batch_data: + s, a, r, d, s_, log_p = [s], [a], [r], [d], [s_], [log_p] + for i, data in enumerate([s, a, r, d, s_, log_p]): + self.all_buffer[i].append(data) + + def get_value(self, state, batch_data=False): + if not batch_data: + state = [state] + value = self._get_value(np.array(state)) + value_shape = np.shape(value) + value = tf.reshape(value, value_shape[:-1]) + return value + + def get_action(self, state, batch_data=False): + if not batch_data: + state = [state] + + state = np.array(state) + action, log_p = self._get_action(state) + action, log_p = action.numpy(), log_p.numpy() + action_shape = np.shape(action) + # 最后一维度是1 是batch但是len=1就不转, 是batch本来要转 + # 不是batch时候len=1也要转 + if action_shape[-1] == 1 and batch_data ^ (len(action_shape) == 1): + # ((batch_data and not len(action_shape) == 1) or (not batch_data and len(action_shape) == 1)): + action = np.reshape(action, action_shape[:-1]) # 转换 + log_p = np.reshape(log_p, log_p.shape[:-1]) + return action, log_p + + # def _cal_discounted_r(self, state_list, reward_list, done_list, batch_data=False): + # discounted_r = [] + # for r in reward_list[::-1]: + # v_s_ = r + 0.9 * v_s_ + # discounted_r.append(v_s_) + + def _cal_discounted_r(self, next_state_list, reward_list, done_list, batch_data=False): + discounted_r = np.zeros_like(reward_list) # reward_buffer shape: [-1, n_env] + # done_list = np.array(done_list, dtype=np.int) + done_list = np.array(done_list) + v_s_ = self.get_value(next_state_list[-1], batch_data) * (1 - done_list[-1]) + for i in range(len(reward_list) - 1, -1, -1): + # discounted_r[i] = v_s_ = reward_list[i] + self.gamma * v_s_ + discounted_r[i] = v_s_ = reward_list[i] + (1 - done_list[i]) * self.gamma * v_s_ + return discounted_r + + def _cal_adv(self, state_list, reward_list, done_list, next_state_list, batch_data=False): + dc_r = self._cal_discounted_r(next_state_list, reward_list, done_list, batch_data) + # dc_r = np.array( + # [[6.5132155], [6.125795], [5.6953278], [5.217031], [4.68559], [4.0951], [3.439], [2.71], [1.9], [1.]]) + if batch_data: + s_shape = np.shape(self.state_buffer) # state_buffer shape: [-1, n_env, *obs_shape] + state_list = np.reshape(self.state_buffer, [-1, *s_shape[2:]]) + v = self.get_value(state_list, batch_data).numpy() + v = v.reshape(*s_shape[:2]) + else: + v = self.get_value(state_list, batch_data).numpy() + + dc_r = np.array(dc_r, dtype=np.float32) + advs = dc_r - v + # advs = (advs - np.mean(advs)) / (np.std(advs) + 1e-8) # norm all env data adv at the same time + return advs + + def _get_traj(self): + traj_list = [] + for element in [ + self.state_buffer, self.action_buffer, self.reward_buffer, self.done_buffer, self.next_state_buffer, + self._cal_adv(self.state_buffer, self.reward_buffer, self.done_buffer, self.next_state_buffer, True), + self.logp_buffer]: + axes = list(range(len(np.shape(element)))) + axes[0], axes[1] = 1, 0 + result = np.transpose(element, axes) + # print(result) + traj_list.append(result) + traj_list = list(zip(*traj_list)) # + return traj_list + + def update_traj_list(self): + self.traj_list.extend(self._get_traj()) + for buffer in self.all_buffer: + buffer.clear() + + +class DPPO_CLIP(RLAlgorithm): + def __init__(self, net_builder, opt_builder, n_step=100, gamma=0.9, epsilon=0.2): + super().__init__() + self.critic, self.actor = None, None + self.net_builder = net_builder + self.gamma = gamma + self.n_step = n_step + self._logp_shape = None + self.epsilon = epsilon + self.name = 'DPPO_CLIP' + self.acter_optimizer, self.critic_optimizer = opt_builder() + + def init_components(self): # todo init process should be placed + networks = self.net_builder() + assert len(networks) == 2 + self.critic, self.actor = networks + assert isinstance(self.critic, ValueNetwork) + assert isinstance(self.actor, StochasticPolicyNetwork) + + @property + def all_weights(self): + return self.critic.trainable_weights + self.actor.trainable_weights + + # api + def _get_action(self, state): + action = self.actor(state) + log_p = self.actor.policy_dist.logp(action) + return action, log_p + + def _get_value(self, state): + return self.critic(state) + + def save_ckpt(self, env_name): + """ + save trained weights + + :return: None + """ + save_model(self.actor, 'actor', self.name, env_name) + save_model(self.critic, 'critic', self.name, env_name) + + def load_ckpt(self, env_name): + """ + load trained weights + + :return: None + """ + load_model(self.actor, 'actor', self.name, env_name) + load_model(self.critic, 'critic', self.name, env_name) + + # api + def update_model(self, params): + for i, j in zip(self.all_weights, params): + i.assign(j) + for buffer in self.all_buffer: + buffer.clear() + + def a_train(self, s, a, adv, oldpi_logp): + oldpi_prob = tf.exp(oldpi_logp) + with tf.GradientTape() as tape: + _ = self.actor(s) + pi_prob = tf.exp(self.actor.policy_dist.logp(a)) + ratio = pi_prob / (oldpi_prob + EPS) + + surr = ratio * adv + aloss = -tf.reduce_mean( + tf.minimum(surr, tf.clip_by_value(ratio, 1. - self.epsilon, 1. + self.epsilon) * adv)) + a_gard = tape.gradient(aloss, self.actor.trainable_weights) + return a_gard + + def c_train(self, dc_r, s): + dc_r = np.array(dc_r, dtype=np.float32) + with tf.GradientTape() as tape: + v = self.critic(s) + advantage = dc_r - v + closs = tf.reduce_mean(tf.square(advantage)) + c_grad = tape.gradient(closs, self.critic.trainable_weights) + return c_grad + + def train(self, traj_list, dis_agent=None): + for traj in traj_list: + state_list, action_list, reward_list, done_list, next_state_list, adv_list, logp_list = traj + for _ in range(10): + a_grad = self.a_train(state_list, action_list, adv_list, logp_list) + if dis_agent: + a_grad = [dis_agent.role_all_reduce(grad) for grad in a_grad] + self.acter_optimizer.apply_gradients(zip(a_grad, self.actor.trainable_weights)) + + dc_r = self._cal_discounted_r(next_state_list, reward_list, done_list) + for _ in range(10): + c_grad = self.c_train(dc_r, state_list) + if dis_agent: + c_grad = [dis_agent.role_all_reduce(grad) for grad in c_grad] + self.critic_optimizer.apply_gradients(zip(c_grad, self.critic.trainable_weights)) + + +if __name__ == '__main__': + from rlzoo.distributed.training_components import net_builder, env_maker, opt_builder + from rlzoo.common.utils import set_seed + + env = env_maker() + # set_seed(1, env) + + agent = DPPO_CLIP(net_builder, opt_builder) + agent.init_components() + + running_reward = [] + curr_step, max_step, traj_len = 0, 500 * 200, 200 + s = env.reset() + d = False + cnt = 0 + while curr_step < max_step: + for _ in range(traj_len): + curr_step += 1 + a, logp = agent.get_action(s) + s_, r, d, _ = env.step(a) + agent.collect_data(s, a, r, d, s_, logp) + if d: + s = env.reset() + else: + s = s_ + agent.update_traj_list() + agent.train(agent.traj_list) + avg_eps_reward = min(sum(agent.traj_list[0][2]) / (sum(agent.traj_list[0][3] + 1e-10)), traj_len) + agent.traj_list.clear() + running_reward.append(avg_eps_reward) + cnt += 1 + print(cnt, curr_step, avg_eps_reward) + agent.plot_save_log(running_reward, env.spec.id) diff --git a/rlzoo/algorithms/dppo_penalty/__init__.py b/rlzoo/algorithms/dppo_penalty/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/dppo_penalty/dppo_penalty.py b/rlzoo/algorithms/dppo_penalty/dppo_penalty.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/dppo_penalty/run_dppo_penalty.py b/rlzoo/algorithms/dppo_penalty/run_dppo_penalty.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/dqn/__init__.py b/rlzoo/algorithms/dqn/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/dqn/default.py b/rlzoo/algorithms/dqn/default.py old mode 100644 new mode 100755 index 689afe3..aef6d77 --- a/rlzoo/algorithms/dqn/default.py +++ b/rlzoo/algorithms/dqn/default.py @@ -1,210 +1,210 @@ -from gym.spaces import Discrete - -from rlzoo.common.utils import set_seed -from rlzoo.common.value_networks import * - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -double_q (bool): if True double DQN will be used -dueling (bool): if True dueling value estimation will be used -exploration_rate (float): fraction of entire training period over - which the exploration rate is annealed -exploration_final_eps (float): final value of random action probability -batch_size (int): size of a batched sampled from replay buffer for training -train_freq (int): update the model every `train_freq` steps -learning_starts (int): how many steps of the model to collect transitions - for before learning starts -target_network_update_freq (int): update the target network every - `target_network_update_freq` steps -buffer_size (int): size of the replay buffer -prioritized_replay (bool): if True prioritized replay buffer will be used. -prioritized_alpha (float): alpha parameter for prioritized replay -prioritized_beta0 (float): beta parameter for prioritized replay -mode (str): train or test ------------------------------------------------ -""" - - -def atari(env, default_seed=False, **kwargs): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - assert isinstance(env.action_space, Discrete) - - alg_params = dict( - dueling=True, - double_q=True, - buffer_size=1000, - prioritized_replay=True, - prioritized_alpha=0.6, - prioritized_beta0=0.4, - ) - alg_params.update(kwargs) - if alg_params.get('net_list') is None: - alg_params['net_list'] = [QNetwork(env.observation_space, env.action_space, [64], - state_only=True, dueling=alg_params['dueling'])] - - if alg_params.get('optimizers_list') is None: - alg_params['optimizers_list'] = tf.optimizers.Adam(1e-4, epsilon=1e-5, clipnorm=10), - - learn_params = dict( - train_episodes=int(1e5), - test_episodes=10, - max_steps=200, - save_interval=1e4, - batch_size=32, - exploration_rate=0.1, - exploration_final_eps=0.01, - train_freq=4, - learning_starts=10000, - target_network_update_freq=1000, - gamma=0.99, - ) - - return alg_params, learn_params - - -def classic_control(env, default_seed=False, **kwargs): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - assert isinstance(env.action_space, Discrete) - - alg_params = dict( - dueling=True, - double_q=True, - buffer_size=1000, - prioritized_replay=False, - prioritized_alpha=0.6, - prioritized_beta0=0.4, - ) - alg_params.update(kwargs) - if alg_params.get('net_list') is None: - alg_params['net_list'] = [QNetwork(env.observation_space, env.action_space, [64], activation=tf.nn.tanh, - state_only=True, dueling=alg_params['dueling'])] - - if alg_params.get('optimizers_list') is None: - alg_params['optimizers_list'] = tf.optimizers.Adam(5e-3, epsilon=1e-5), - - learn_params = dict( - train_episodes=int(1e3), - test_episodes=10, - max_steps=200, - save_interval=1e3, - batch_size=32, - exploration_rate=0.2, - exploration_final_eps=0.01, - train_freq=4, - learning_starts=200, - target_network_update_freq=50, - gamma=0.99, - ) - - return alg_params, learn_params - - -# class CNNQNet(tl.models.Model): -# def __init__(self, in_dim, act_dim, dueling): -# super().__init__() -# self._state_shape = in_dim -# self._action_shape = act_dim, -# self.dueling = dueling -# with tf.name_scope('DQN'): -# with tf.name_scope('CNN'): -# self.cnn = basic_nets.CNNModel(in_dim) -# mlp_in_shape = self.cnn.outputs[0].shape[0] -# with tf.name_scope('QValue'): -# hidden_dim = 256 -# self.preq = tl.layers.Dense( -# hidden_dim, tf.nn.relu, -# tf.initializers.Orthogonal(1.0), -# in_channels=mlp_in_shape -# ) -# self.qout = tl.layers.Dense( -# act_dim, None, -# tf.initializers.Orthogonal(1.0), -# in_channels=hidden_dim -# ) -# if dueling: -# with tf.name_scope('Value'): -# hidden_dim = 256 -# self.prev = tl.layers.Dense( -# hidden_dim, tf.nn.relu, -# tf.initializers.Orthogonal(1.0), -# in_channels=mlp_in_shape -# ) -# self.vout = tl.layers.Dense( -# 1, None, -# tf.initializers.Orthogonal(1.0), -# in_channels=hidden_dim -# ) -# -# def forward(self, obv): -# obv = tf.cast(obv, tf.float32) / 255.0 -# mlp_in = tl.layers.flatten_reshape(self.cnn(obv)) -# q_out = self.qout(self.preq(mlp_in)) -# if self.dueling: -# v_out = self.vout(self.prev(mlp_in)) -# q_out = v_out + q_out - tf.reduce_mean(q_out, 1, True) -# return q_out -# -# @property -# def state_shape(self): -# return copy.deepcopy(self._state_shape) -# -# @property -# def action_shape(self): -# return copy.deepcopy(self._action_shape) -# -# -# class MLPQNet(tl.models.Model): -# def __init__(self, in_dim, act_dim, dueling): -# super().__init__() -# self._state_shape = in_dim, -# self._action_shape = act_dim, -# self.dueling = dueling -# hidden_dim = 64 -# with tf.name_scope('DQN'): -# with tf.name_scope('MLP'): -# self.mlp = tl.layers.Dense( -# hidden_dim, tf.nn.tanh, -# tf.initializers.Orthogonal(1.0), -# in_channels=in_dim -# ) -# with tf.name_scope('QValue'): -# self.qmlp = tl.layers.Dense( -# act_dim, None, -# tf.initializers.Orthogonal(1.0), -# in_channels=hidden_dim -# ) -# if dueling: -# with tf.name_scope('Value'): -# self.vmlp = tl.layers.Dense( -# 1, None, -# tf.initializers.Orthogonal(1.0), -# in_channels=hidden_dim -# ) -# -# def forward(self, obv): -# obv = tf.cast(obv, tf.float32) -# latent = self.mlp(obv) -# q_out = self.qmlp(latent) -# if self.dueling: -# v_out = self.vmlp(latent) -# q_out = v_out + q_out - tf.reduce_mean(q_out, 1, True) -# return q_out -# -# @property -# def state_shape(self): -# return copy.deepcopy(self._state_shape) -# -# @property -# def action_shape(self): -# return copy.deepcopy(self._action_shape) +from gym.spaces import Discrete + +from rlzoo.common.utils import set_seed +from rlzoo.common.value_networks import * + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +double_q (bool): if True double DQN will be used +dueling (bool): if True dueling value estimation will be used +exploration_rate (float): fraction of entire training period over + which the exploration rate is annealed +exploration_final_eps (float): final value of random action probability +batch_size (int): size of a batched sampled from replay buffer for training +train_freq (int): update the model every `train_freq` steps +learning_starts (int): how many steps of the model to collect transitions + for before learning starts +target_network_update_freq (int): update the target network every + `target_network_update_freq` steps +buffer_size (int): size of the replay buffer +prioritized_replay (bool): if True prioritized replay buffer will be used. +prioritized_alpha (float): alpha parameter for prioritized replay +prioritized_beta0 (float): beta parameter for prioritized replay +mode (str): train or test +----------------------------------------------- +""" + + +def atari(env, default_seed=False, **kwargs): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + assert isinstance(env.action_space, Discrete) + + alg_params = dict( + dueling=True, + double_q=True, + buffer_size=1000, + prioritized_replay=True, + prioritized_alpha=0.6, + prioritized_beta0=0.4, + ) + alg_params.update(kwargs) + if alg_params.get('net_list') is None: + alg_params['net_list'] = [QNetwork(env.observation_space, env.action_space, [64], + state_only=True, dueling=alg_params['dueling'])] + + if alg_params.get('optimizers_list') is None: + alg_params['optimizers_list'] = tf.optimizers.Adam(1e-4, epsilon=1e-5, clipnorm=10), + + learn_params = dict( + train_episodes=int(1e5), + test_episodes=10, + max_steps=200, + save_interval=1e4, + batch_size=32, + exploration_rate=0.1, + exploration_final_eps=0.01, + train_freq=4, + learning_starts=10000, + target_network_update_freq=1000, + gamma=0.99, + ) + + return alg_params, learn_params + + +def classic_control(env, default_seed=False, **kwargs): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + assert isinstance(env.action_space, Discrete) + + alg_params = dict( + dueling=True, + double_q=True, + buffer_size=1000, + prioritized_replay=False, + prioritized_alpha=0.6, + prioritized_beta0=0.4, + ) + alg_params.update(kwargs) + if alg_params.get('net_list') is None: + alg_params['net_list'] = [QNetwork(env.observation_space, env.action_space, [64], activation=tf.nn.tanh, + state_only=True, dueling=alg_params['dueling'])] + + if alg_params.get('optimizers_list') is None: + alg_params['optimizers_list'] = tf.optimizers.Adam(5e-3, epsilon=1e-5), + + learn_params = dict( + train_episodes=int(1e3), + test_episodes=10, + max_steps=200, + save_interval=1e3, + batch_size=32, + exploration_rate=0.2, + exploration_final_eps=0.01, + train_freq=4, + learning_starts=200, + target_network_update_freq=50, + gamma=0.99, + ) + + return alg_params, learn_params + + +# class CNNQNet(tl.models.Model): +# def __init__(self, in_dim, act_dim, dueling): +# super().__init__() +# self._state_shape = in_dim +# self._action_shape = act_dim, +# self.dueling = dueling +# with tf.name_scope('DQN'): +# with tf.name_scope('CNN'): +# self.cnn = basic_nets.CNNModel(in_dim) +# mlp_in_shape = self.cnn.outputs[0].shape[0] +# with tf.name_scope('QValue'): +# hidden_dim = 256 +# self.preq = tl.layers.Dense( +# hidden_dim, tf.nn.relu, +# tf.initializers.Orthogonal(1.0), +# in_channels=mlp_in_shape +# ) +# self.qout = tl.layers.Dense( +# act_dim, None, +# tf.initializers.Orthogonal(1.0), +# in_channels=hidden_dim +# ) +# if dueling: +# with tf.name_scope('Value'): +# hidden_dim = 256 +# self.prev = tl.layers.Dense( +# hidden_dim, tf.nn.relu, +# tf.initializers.Orthogonal(1.0), +# in_channels=mlp_in_shape +# ) +# self.vout = tl.layers.Dense( +# 1, None, +# tf.initializers.Orthogonal(1.0), +# in_channels=hidden_dim +# ) +# +# def forward(self, obv): +# obv = tf.cast(obv, tf.float32) / 255.0 +# mlp_in = tl.layers.flatten_reshape(self.cnn(obv)) +# q_out = self.qout(self.preq(mlp_in)) +# if self.dueling: +# v_out = self.vout(self.prev(mlp_in)) +# q_out = v_out + q_out - tf.reduce_mean(q_out, 1, True) +# return q_out +# +# @property +# def state_shape(self): +# return copy.deepcopy(self._state_shape) +# +# @property +# def action_shape(self): +# return copy.deepcopy(self._action_shape) +# +# +# class MLPQNet(tl.models.Model): +# def __init__(self, in_dim, act_dim, dueling): +# super().__init__() +# self._state_shape = in_dim, +# self._action_shape = act_dim, +# self.dueling = dueling +# hidden_dim = 64 +# with tf.name_scope('DQN'): +# with tf.name_scope('MLP'): +# self.mlp = tl.layers.Dense( +# hidden_dim, tf.nn.tanh, +# tf.initializers.Orthogonal(1.0), +# in_channels=in_dim +# ) +# with tf.name_scope('QValue'): +# self.qmlp = tl.layers.Dense( +# act_dim, None, +# tf.initializers.Orthogonal(1.0), +# in_channels=hidden_dim +# ) +# if dueling: +# with tf.name_scope('Value'): +# self.vmlp = tl.layers.Dense( +# 1, None, +# tf.initializers.Orthogonal(1.0), +# in_channels=hidden_dim +# ) +# +# def forward(self, obv): +# obv = tf.cast(obv, tf.float32) +# latent = self.mlp(obv) +# q_out = self.qmlp(latent) +# if self.dueling: +# v_out = self.vmlp(latent) +# q_out = v_out + q_out - tf.reduce_mean(q_out, 1, True) +# return q_out +# +# @property +# def state_shape(self): +# return copy.deepcopy(self._state_shape) +# +# @property +# def action_shape(self): +# return copy.deepcopy(self._action_shape) diff --git a/rlzoo/algorithms/dqn/dqn.py b/rlzoo/algorithms/dqn/dqn.py old mode 100644 new mode 100755 index 43507e6..efbb500 --- a/rlzoo/algorithms/dqn/dqn.py +++ b/rlzoo/algorithms/dqn/dqn.py @@ -1,248 +1,248 @@ -""" -Deep Q Network -""" -import random -from copy import deepcopy - -from rlzoo.common.utils import * -from rlzoo.common.buffer import ReplayBuffer, PrioritizedReplayBuffer -from rlzoo.common.value_networks import * - - -class DQN(object): - """ - Papers: - - Mnih V, Kavukcuoglu K, Silver D, et al. Human-level control through deep - reinforcement learning[J]. Nature, 2015, 518(7540): 529. - - Hessel M, Modayil J, Van Hasselt H, et al. Rainbow: Combining Improvements - in Deep Reinforcement Learning[J]. 2017. - """ - - def __init__(self, net_list, optimizers_list, double_q, dueling, buffer_size, - prioritized_replay, prioritized_alpha, prioritized_beta0, ): - """ - Parameters: - ---------- - :param net_list (list): a list of networks (value and policy) used in the algorithm, from common functions or customization - :param optimizers_list (list): a list of optimizers for all networks and differentiable variables - :param double_q (bool): if True double DQN will be used - :param dueling (bool): if True dueling value estimation will be used - :param buffer_size (int): size of the replay buffer - :param prioritized_replay (bool): if True prioritized replay buffer will be used. - :param prioritized_alpha (float): alpha parameter for prioritized replay - :param prioritized_beta0 (float): beta parameter for prioritized replay - """ - assert isinstance(net_list[0], QNetwork) - self.name = 'DQN' - if prioritized_replay: - self.buffer = PrioritizedReplayBuffer( - buffer_size, prioritized_alpha, prioritized_beta0) - else: - self.buffer = ReplayBuffer(buffer_size) - - self.network = net_list[0] - self.target_network = deepcopy(net_list[0]) - self.network.train() - self.target_network.infer() - self.optimizer = optimizers_list[0] - self.double_q = double_q - self.prioritized_replay = prioritized_replay - self.dueling = dueling - - def get_action(self, obv, eps=0.2): - out_dim = self.network.action_shape[0] - if random.random() < eps: - return int(random.random() * out_dim) - else: - obv = np.expand_dims(obv, 0).astype('float32') - return self.network(obv).numpy().argmax(1)[0] - - def get_action_greedy(self, obv): - obv = np.expand_dims(obv, 0).astype('float32') - return self.network(obv).numpy().argmax(1)[0] - - def sync(self): - """Copy q network to target q network""" - - for var, var_tar in zip(self.network.trainable_weights, - self.target_network.trainable_weights): - var_tar.assign(var) - - def save_ckpt(self, env_name): - """ - save trained weights - :return: None - """ - save_model(self.network, 'qnet', 'DQN', env_name) - - def load_ckpt(self, env_name): - """ - load trained weights - :return: None - """ - load_model(self.network, 'qnet', 'DQN', env_name) - - # @tf.function - def _td_error(self, transitions, reward_gamma): - b_o, b_a, b_r, b_o_, b_d = transitions - b_d = tf.cast(b_d, tf.float32) - b_a = tf.cast(b_a, tf.int64) - b_r = tf.cast(b_r, tf.float32) - if self.double_q: - b_a_ = tf.one_hot(tf.argmax(self.network(b_o_), 1), self.network.action_shape[0]) - b_q_ = (1 - b_d) * tf.reduce_sum(self.target_network(b_o_) * b_a_, 1) - else: - b_q_ = (1 - b_d) * tf.reduce_max(self.target_network(b_o_), 1) - - b_q = tf.reduce_sum(self.network(b_o) * tf.one_hot(b_a, self.network.action_shape[0]), 1) - return b_q - (b_r + reward_gamma * b_q_) - - def store_transition(self, s, a, r, s_, d): - self.buffer.push(s, a, r, s_, d) - - def update(self, batch_size, gamma): - if self.prioritized_replay: - # sample from prioritized replay buffer - *transitions, b_w, idxs = self.buffer.sample(batch_size) - # calculate weighted huber loss - with tf.GradientTape() as tape: - priorities = self._td_error(transitions, gamma) - huber_loss = tf.where(tf.abs(priorities) < 1, - tf.square(priorities) * 0.5, - tf.abs(priorities) - 0.5) - loss = tf.reduce_mean(huber_loss * b_w) - # backpropagate - grad = tape.gradient(loss, self.network.trainable_weights) - self.optimizer.apply_gradients(zip(grad, self.network.trainable_weights)) - # update priorities - priorities = np.clip(np.abs(priorities), 1e-6, None) - self.buffer.update_priorities(idxs, priorities) - else: - # sample from prioritized replay buffer - transitions = self.buffer.sample(batch_size) - # calculate huber loss - with tf.GradientTape() as tape: - td_errors = self._td_error(transitions, gamma) - huber_loss = tf.where(tf.abs(td_errors) < 1, - tf.square(td_errors) * 0.5, - tf.abs(td_errors) - 0.5) - loss = tf.reduce_mean(huber_loss) - # backpropagate - grad = tape.gradient(loss, self.network.trainable_weights) - self.optimizer.apply_gradients(zip(grad, self.network.trainable_weights)) - - def learn( - self, env, mode='train', render=False, - train_episodes=1000, test_episodes=10, max_steps=200, - save_interval=1000, gamma=0.99, - exploration_rate=0.2, exploration_final_eps=0.01, - target_network_update_freq=50, - batch_size=32, train_freq=4, learning_starts=200, - plot_func=None - ): - - """ - :param env: learning environment - :param mode: train or test - :param render: render each step - :param train_episodes: total number of episodes for training - :param test_episodes: total number of episodes for testing - :param max_steps: maximum number of steps for one episode - :param save_interval: time steps for saving - :param gamma: reward decay factor - :param exploration_rate (float): fraction of entire training period over - which the exploration rate is annealed - :param exploration_final_eps (float): final value of random action probability - :param target_network_update_freq (int): update the target network every - `target_network_update_freq` steps - :param batch_size (int): size of a batched sampled from replay buffer for training - :param train_freq (int): update the model every `train_freq` steps - :param learning_starts (int): how many steps of the model to collect transitions - for before learning starts - :param plot_func: additional function for interactive module - - """ - if mode == 'train': - print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - reward_buffer = [] - i = 0 - for episode in range(1, train_episodes + 1): - o = env.reset() - ep_reward = 0 - for step in range(1, max_steps + 1): - i += 1 - if render: - env.render() - eps = 1 - (1 - exploration_final_eps) * \ - min(1, i / exploration_rate * (train_episodes * max_steps)) - a = self.get_action(o, eps) - - # execute action and feed to replay buffer - # note that `_` tail in var name means next - o_, r, done, info = env.step(a) - self.store_transition(o, a, r, o_, done) - ep_reward += r - - # update networks - if i >= learning_starts and i % train_freq == 0: - self.update(batch_size, gamma) - - if i % target_network_update_freq == 0: - self.sync() - - # reset current observation - if done: - break - else: - o = o_ - - # saving model - if i % save_interval == 0: - self.save_ckpt(env.spec.id) - print( - 'Time steps so far: {}, episode so far: {}, ' - 'episode reward: {:.4f}, episode length: {}' - .format(i, episode, ep_reward, step) - ) - reward_buffer.append(ep_reward) - if plot_func is not None: - plot_func(reward_buffer) - - elif mode == 'test': - print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - - self.load_ckpt(env.spec.id) - self.network.infer() - - reward_buffer = [] - for episode in range(1, test_episodes + 1): - o = env.reset() - ep_reward = 0 - for step in range(1, max_steps + 1): - if render: - env.render() - a = self.get_action_greedy(o) - - # execute action - # note that `_` tail in var name means next - o_, r, done, info = env.step(a) - ep_reward += r - - if done: - break - else: - o = o_ - - print( - 'episode so far: {}, ' - 'episode reward: {:.4f}, episode length: {}' - .format(episode, ep_reward, step) - ) - reward_buffer.append(ep_reward) - if plot_func is not None: - plot_func(reward_buffer) - - else: - print('unknown mode type') +""" +Deep Q Network +""" +import random +from copy import deepcopy + +from rlzoo.common.utils import * +from rlzoo.common.buffer import ReplayBuffer, PrioritizedReplayBuffer +from rlzoo.common.value_networks import * + + +class DQN(object): + """ + Papers: + + Mnih V, Kavukcuoglu K, Silver D, et al. Human-level control through deep + reinforcement learning[J]. Nature, 2015, 518(7540): 529. + + Hessel M, Modayil J, Van Hasselt H, et al. Rainbow: Combining Improvements + in Deep Reinforcement Learning[J]. 2017. + """ + + def __init__(self, net_list, optimizers_list, double_q, dueling, buffer_size, + prioritized_replay, prioritized_alpha, prioritized_beta0, ): + """ + Parameters: + ---------- + :param net_list (list): a list of networks (value and policy) used in the algorithm, from common functions or customization + :param optimizers_list (list): a list of optimizers for all networks and differentiable variables + :param double_q (bool): if True double DQN will be used + :param dueling (bool): if True dueling value estimation will be used + :param buffer_size (int): size of the replay buffer + :param prioritized_replay (bool): if True prioritized replay buffer will be used. + :param prioritized_alpha (float): alpha parameter for prioritized replay + :param prioritized_beta0 (float): beta parameter for prioritized replay + """ + assert isinstance(net_list[0], QNetwork) + self.name = 'DQN' + if prioritized_replay: + self.buffer = PrioritizedReplayBuffer( + buffer_size, prioritized_alpha, prioritized_beta0) + else: + self.buffer = ReplayBuffer(buffer_size) + + self.network = net_list[0] + self.target_network = deepcopy(net_list[0]) + self.network.train() + self.target_network.infer() + self.optimizer = optimizers_list[0] + self.double_q = double_q + self.prioritized_replay = prioritized_replay + self.dueling = dueling + + def get_action(self, obv, eps=0.2): + out_dim = self.network.action_shape[0] + if random.random() < eps: + return int(random.random() * out_dim) + else: + obv = np.expand_dims(obv, 0).astype('float32') + return self.network(obv).numpy().argmax(1)[0] + + def get_action_greedy(self, obv): + obv = np.expand_dims(obv, 0).astype('float32') + return self.network(obv).numpy().argmax(1)[0] + + def sync(self): + """Copy q network to target q network""" + + for var, var_tar in zip(self.network.trainable_weights, + self.target_network.trainable_weights): + var_tar.assign(var) + + def save_ckpt(self, env_name): + """ + save trained weights + :return: None + """ + save_model(self.network, 'qnet', 'DQN', env_name) + + def load_ckpt(self, env_name): + """ + load trained weights + :return: None + """ + load_model(self.network, 'qnet', 'DQN', env_name) + + # @tf.function + def _td_error(self, transitions, reward_gamma): + b_o, b_a, b_r, b_o_, b_d = transitions + b_d = tf.cast(b_d, tf.float32) + b_a = tf.cast(b_a, tf.int64) + b_r = tf.cast(b_r, tf.float32) + if self.double_q: + b_a_ = tf.one_hot(tf.argmax(self.network(b_o_), 1), self.network.action_shape[0]) + b_q_ = (1 - b_d) * tf.reduce_sum(self.target_network(b_o_) * b_a_, 1) + else: + b_q_ = (1 - b_d) * tf.reduce_max(self.target_network(b_o_), 1) + + b_q = tf.reduce_sum(self.network(b_o) * tf.one_hot(b_a, self.network.action_shape[0]), 1) + return b_q - (b_r + reward_gamma * b_q_) + + def store_transition(self, s, a, r, s_, d): + self.buffer.push(s, a, r, s_, d) + + def update(self, batch_size, gamma): + if self.prioritized_replay: + # sample from prioritized replay buffer + *transitions, b_w, idxs = self.buffer.sample(batch_size) + # calculate weighted huber loss + with tf.GradientTape() as tape: + priorities = self._td_error(transitions, gamma) + huber_loss = tf.where(tf.abs(priorities) < 1, + tf.square(priorities) * 0.5, + tf.abs(priorities) - 0.5) + loss = tf.reduce_mean(huber_loss * b_w) + # backpropagate + grad = tape.gradient(loss, self.network.trainable_weights) + self.optimizer.apply_gradients(zip(grad, self.network.trainable_weights)) + # update priorities + priorities = np.clip(np.abs(priorities), 1e-6, None) + self.buffer.update_priorities(idxs, priorities) + else: + # sample from prioritized replay buffer + transitions = self.buffer.sample(batch_size) + # calculate huber loss + with tf.GradientTape() as tape: + td_errors = self._td_error(transitions, gamma) + huber_loss = tf.where(tf.abs(td_errors) < 1, + tf.square(td_errors) * 0.5, + tf.abs(td_errors) - 0.5) + loss = tf.reduce_mean(huber_loss) + # backpropagate + grad = tape.gradient(loss, self.network.trainable_weights) + self.optimizer.apply_gradients(zip(grad, self.network.trainable_weights)) + + def learn( + self, env, mode='train', render=False, + train_episodes=1000, test_episodes=10, max_steps=200, + save_interval=1000, gamma=0.99, + exploration_rate=0.2, exploration_final_eps=0.01, + target_network_update_freq=50, + batch_size=32, train_freq=4, learning_starts=200, + plot_func=None + ): + + """ + :param env: learning environment + :param mode: train or test + :param render: render each step + :param train_episodes: total number of episodes for training + :param test_episodes: total number of episodes for testing + :param max_steps: maximum number of steps for one episode + :param save_interval: time steps for saving + :param gamma: reward decay factor + :param exploration_rate (float): fraction of entire training period over + which the exploration rate is annealed + :param exploration_final_eps (float): final value of random action probability + :param target_network_update_freq (int): update the target network every + `target_network_update_freq` steps + :param batch_size (int): size of a batched sampled from replay buffer for training + :param train_freq (int): update the model every `train_freq` steps + :param learning_starts (int): how many steps of the model to collect transitions + for before learning starts + :param plot_func: additional function for interactive module + + """ + if mode == 'train': + print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + reward_buffer = [] + i = 0 + for episode in range(1, train_episodes + 1): + o = env.reset() + ep_reward = 0 + for step in range(1, max_steps + 1): + i += 1 + if render: + env.render() + eps = 1 - (1 - exploration_final_eps) * \ + min(1, i / exploration_rate * (train_episodes * max_steps)) + a = self.get_action(o, eps) + + # execute action and feed to replay buffer + # note that `_` tail in var name means next + o_, r, done, info = env.step(a) + self.store_transition(o, a, r, o_, done) + ep_reward += r + + # update networks + if i >= learning_starts and i % train_freq == 0: + self.update(batch_size, gamma) + + if i % target_network_update_freq == 0: + self.sync() + + # reset current observation + if done: + break + else: + o = o_ + + # saving model + if i % save_interval == 0: + self.save_ckpt(env.spec.id) + print( + 'Time steps so far: {}, episode so far: {}, ' + 'episode reward: {:.4f}, episode length: {}' + .format(i, episode, ep_reward, step) + ) + reward_buffer.append(ep_reward) + if plot_func is not None: + plot_func(reward_buffer) + + elif mode == 'test': + print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + + self.load_ckpt(env.spec.id) + self.network.infer() + + reward_buffer = [] + for episode in range(1, test_episodes + 1): + o = env.reset() + ep_reward = 0 + for step in range(1, max_steps + 1): + if render: + env.render() + a = self.get_action_greedy(o) + + # execute action + # note that `_` tail in var name means next + o_, r, done, info = env.step(a) + ep_reward += r + + if done: + break + else: + o = o_ + + print( + 'episode so far: {}, ' + 'episode reward: {:.4f}, episode length: {}' + .format(episode, ep_reward, step) + ) + reward_buffer.append(ep_reward) + if plot_func is not None: + plot_func(reward_buffer) + + else: + print('unknown mode type') diff --git a/rlzoo/algorithms/dqn/run_dqn.py b/rlzoo/algorithms/dqn/run_dqn.py old mode 100644 new mode 100755 index 75f624c..6e8a031 --- a/rlzoo/algorithms/dqn/run_dqn.py +++ b/rlzoo/algorithms/dqn/run_dqn.py @@ -1,82 +1,82 @@ -import gym - -from rlzoo.algorithms.dqn.dqn import DQN -from rlzoo.algorithms.dqn.default import * -from rlzoo.common.value_networks import * -import gym - -""" load environment """ -env = gym.make('CartPole-v0').unwrapped - -obs_space = env.observation_space -act_space = env.action_space - -# reproducible -seed = 2 -set_seed(seed, env) - -in_dim = env.observation_space.shape[0] -act_dim = env.action_space.n -""" build networks for the algorithm """ -name = 'DQN' -Q_net = QNetwork(env.observation_space, env.action_space, [64], activation=tf.nn.tanh, - state_only=True, dueling=True) -net_list = [Q_net] - -""" create model """ -optimizer = tf.optimizers.Adam(5e-3, epsilon=1e-5) -optimizers_list = [optimizer] -model = DQN(net_list, optimizers_list, - double_q=True, - dueling=True, - buffer_size=10000, - prioritized_replay=False, - prioritized_alpha=0.6, - prioritized_beta0=0.4) -""" -full list of arguments for the algorithm ----------------------------------------- -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -replay_buffer_size: the size of buffer for storing explored samples -tau: soft update factor -""" - -model.learn(env, mode='train', render=False, - train_episodes=1000, - test_episodes=10, - max_steps=200, - save_interval=1e3, - batch_size=32, - exploration_rate=0.2, - exploration_final_eps=0.01, - train_freq=4, - learning_starts=200, - target_network_update_freq=50, - gamma=0.99, ) -""" -full list of parameters for training ---------------------------------------- -env: learning environment -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving -explore_steps: for random action sampling in the beginning of training -mode: train or test mode -render: render each step -batch_size: update batch size -gamma: reward decay factor -noise_scale: range of action noise for exploration -noise_scale_decay: noise scale decay factor -""" - -model.learn(env, mode='test', render=True, - test_episodes=10, - batch_size=32, - exploration_rate=0.2, - exploration_final_eps=0.01, - train_freq=4, - learning_starts=200, - target_network_update_freq=50, - gamma=0.99, ) +import gym + +from rlzoo.algorithms.dqn.dqn import DQN +from rlzoo.algorithms.dqn.default import * +from rlzoo.common.value_networks import * +import gym + +""" load environment """ +env = gym.make('CartPole-v0').unwrapped + +obs_space = env.observation_space +act_space = env.action_space + +# reproducible +seed = 2 +set_seed(seed, env) + +in_dim = env.observation_space.shape[0] +act_dim = env.action_space.n +""" build networks for the algorithm """ +name = 'DQN' +Q_net = QNetwork(env.observation_space, env.action_space, [64], activation=tf.nn.tanh, + state_only=True, dueling=True) +net_list = [Q_net] + +""" create model """ +optimizer = tf.optimizers.Adam(5e-3, epsilon=1e-5) +optimizers_list = [optimizer] +model = DQN(net_list, optimizers_list, + double_q=True, + dueling=True, + buffer_size=10000, + prioritized_replay=False, + prioritized_alpha=0.6, + prioritized_beta0=0.4) +""" +full list of arguments for the algorithm +---------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +replay_buffer_size: the size of buffer for storing explored samples +tau: soft update factor +""" + +model.learn(env, mode='train', render=False, + train_episodes=1000, + test_episodes=10, + max_steps=200, + save_interval=1e3, + batch_size=32, + exploration_rate=0.2, + exploration_final_eps=0.01, + train_freq=4, + learning_starts=200, + target_network_update_freq=50, + gamma=0.99, ) +""" +full list of parameters for training +--------------------------------------- +env: learning environment +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving +explore_steps: for random action sampling in the beginning of training +mode: train or test mode +render: render each step +batch_size: update batch size +gamma: reward decay factor +noise_scale: range of action noise for exploration +noise_scale_decay: noise scale decay factor +""" + +model.learn(env, mode='test', render=True, + test_episodes=10, + batch_size=32, + exploration_rate=0.2, + exploration_final_eps=0.01, + train_freq=4, + learning_starts=200, + target_network_update_freq=50, + gamma=0.99, ) diff --git a/rlzoo/algorithms/pg/__init__.py b/rlzoo/algorithms/pg/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/pg/default.py b/rlzoo/algorithms/pg/default.py old mode 100644 new mode 100755 index 09361b6..40836c5 --- a/rlzoo/algorithms/pg/default.py +++ b/rlzoo/algorithms/pg/default.py @@ -1,259 +1,259 @@ -from rlzoo.common.policy_networks import * -from rlzoo.common.utils import set_seed - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving -mode: train or test -render: render each step -gamma: reward decay ------------------------------------------------ -""" - - -def atari(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict() - - if alg_params.get('net_list') is None: - num_hidden_layer = 1 # number of hidden layers for the networks - hidden_dim = 32 # dimension of hidden layers for the networks - with tf.name_scope('PG'): - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - net_list = [policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - learning_rate = 0.02 - policy_optimizer = tf.optimizers.Adam(learning_rate) - optimizers_list = [policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=200, - test_episodes=100, - max_steps=200, - save_interval=20, - gamma=0.95 - ) - - return alg_params, learn_params - - -def classic_control(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict() - - if alg_params.get('net_list') is None: - num_hidden_layer = 1 # number of hidden layers for the networks - hidden_dim = 32 # dimension of hidden layers for the networks - with tf.name_scope('PG'): - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - net_list = [policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - learning_rate = 0.02 - policy_optimizer = tf.optimizers.Adam(learning_rate) - optimizers_list = [policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=200, - test_episodes=100, - max_steps=200, - save_interval=20, - gamma=0.95 - ) - - return alg_params, learn_params - - -def box2d(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict() - - if alg_params.get('net_list') is None: - num_hidden_layer = 1 # number of hidden layers for the networks - hidden_dim = 32 # dimension of hidden layers for the networks - with tf.name_scope('PG'): - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - net_list = [policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - learning_rate = 0.02 - policy_optimizer = tf.optimizers.Adam(learning_rate) - optimizers_list = [policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=200, - test_episodes=100, - max_steps=200, - save_interval=20, - gamma=0.95 - ) - - return alg_params, learn_params - - -def mujoco(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict() - - if alg_params.get('net_list') is None: - num_hidden_layer = 1 # number of hidden layers for the networks - hidden_dim = 32 # dimension of hidden layers for the networks - with tf.name_scope('PG'): - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - net_list = [policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - learning_rate = 0.02 - policy_optimizer = tf.optimizers.Adam(learning_rate) - optimizers_list = [policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=200, - test_episodes=100, - max_steps=200, - save_interval=20, - gamma=0.95 - ) - - return alg_params, learn_params - - -def robotics(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict() - - if alg_params.get('net_list') is None: - num_hidden_layer = 1 # number of hidden layers for the networks - hidden_dim = 32 # dimension of hidden layers for the networks - with tf.name_scope('PG'): - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - net_list = [policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - learning_rate = 0.02 - policy_optimizer = tf.optimizers.Adam(learning_rate) - optimizers_list = [policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=200, - test_episodes=100, - max_steps=200, - save_interval=20, - gamma=0.95 - ) - - return alg_params, learn_params - - -def dm_control(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict() - - if alg_params.get('net_list') is None: - num_hidden_layer = 1 # number of hidden layers for the networks - hidden_dim = 32 # dimension of hidden layers for the networks - with tf.name_scope('PG'): - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - net_list = [policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - learning_rate = 0.02 - policy_optimizer = tf.optimizers.Adam(learning_rate) - optimizers_list = [policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=200, - test_episodes=100, - max_steps=200, - save_interval=20, - gamma=0.95 - ) - - return alg_params, learn_params - - -def rlbench(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict() - - if alg_params.get('net_list') is None: - num_hidden_layer = 1 # number of hidden layers for the networks - hidden_dim = 32 # dimension of hidden layers for the networks - with tf.name_scope('PG'): - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - num_hidden_layer * [hidden_dim]) - net_list = [policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - learning_rate = 0.02 - policy_optimizer = tf.optimizers.Adam(learning_rate) - optimizers_list = [policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - train_episodes=200, - test_episodes=100, - max_steps=200, - save_interval=20, - gamma=0.95 - ) - - return alg_params, learn_params +from rlzoo.common.policy_networks import * +from rlzoo.common.utils import set_seed + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving +mode: train or test +render: render each step +gamma: reward decay +----------------------------------------------- +""" + + +def atari(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict() + + if alg_params.get('net_list') is None: + num_hidden_layer = 1 # number of hidden layers for the networks + hidden_dim = 32 # dimension of hidden layers for the networks + with tf.name_scope('PG'): + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + net_list = [policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + learning_rate = 0.02 + policy_optimizer = tf.optimizers.Adam(learning_rate) + optimizers_list = [policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=200, + test_episodes=100, + max_steps=200, + save_interval=20, + gamma=0.95 + ) + + return alg_params, learn_params + + +def classic_control(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict() + + if alg_params.get('net_list') is None: + num_hidden_layer = 1 # number of hidden layers for the networks + hidden_dim = 32 # dimension of hidden layers for the networks + with tf.name_scope('PG'): + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + net_list = [policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + learning_rate = 0.02 + policy_optimizer = tf.optimizers.Adam(learning_rate) + optimizers_list = [policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=200, + test_episodes=100, + max_steps=200, + save_interval=20, + gamma=0.95 + ) + + return alg_params, learn_params + + +def box2d(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict() + + if alg_params.get('net_list') is None: + num_hidden_layer = 1 # number of hidden layers for the networks + hidden_dim = 32 # dimension of hidden layers for the networks + with tf.name_scope('PG'): + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + net_list = [policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + learning_rate = 0.02 + policy_optimizer = tf.optimizers.Adam(learning_rate) + optimizers_list = [policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=200, + test_episodes=100, + max_steps=200, + save_interval=20, + gamma=0.95 + ) + + return alg_params, learn_params + + +def mujoco(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict() + + if alg_params.get('net_list') is None: + num_hidden_layer = 1 # number of hidden layers for the networks + hidden_dim = 32 # dimension of hidden layers for the networks + with tf.name_scope('PG'): + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + net_list = [policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + learning_rate = 0.02 + policy_optimizer = tf.optimizers.Adam(learning_rate) + optimizers_list = [policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=200, + test_episodes=100, + max_steps=200, + save_interval=20, + gamma=0.95 + ) + + return alg_params, learn_params + + +def robotics(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict() + + if alg_params.get('net_list') is None: + num_hidden_layer = 1 # number of hidden layers for the networks + hidden_dim = 32 # dimension of hidden layers for the networks + with tf.name_scope('PG'): + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + net_list = [policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + learning_rate = 0.02 + policy_optimizer = tf.optimizers.Adam(learning_rate) + optimizers_list = [policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=200, + test_episodes=100, + max_steps=200, + save_interval=20, + gamma=0.95 + ) + + return alg_params, learn_params + + +def dm_control(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict() + + if alg_params.get('net_list') is None: + num_hidden_layer = 1 # number of hidden layers for the networks + hidden_dim = 32 # dimension of hidden layers for the networks + with tf.name_scope('PG'): + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + net_list = [policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + learning_rate = 0.02 + policy_optimizer = tf.optimizers.Adam(learning_rate) + optimizers_list = [policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=200, + test_episodes=100, + max_steps=200, + save_interval=20, + gamma=0.95 + ) + + return alg_params, learn_params + + +def rlbench(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict() + + if alg_params.get('net_list') is None: + num_hidden_layer = 1 # number of hidden layers for the networks + hidden_dim = 32 # dimension of hidden layers for the networks + with tf.name_scope('PG'): + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + num_hidden_layer * [hidden_dim]) + net_list = [policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + learning_rate = 0.02 + policy_optimizer = tf.optimizers.Adam(learning_rate) + optimizers_list = [policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + train_episodes=200, + test_episodes=100, + max_steps=200, + save_interval=20, + gamma=0.95 + ) + + return alg_params, learn_params diff --git a/rlzoo/algorithms/pg/pg.py b/rlzoo/algorithms/pg/pg.py old mode 100644 new mode 100755 index cfbe671..5d7252a --- a/rlzoo/algorithms/pg/pg.py +++ b/rlzoo/algorithms/pg/pg.py @@ -1,217 +1,217 @@ -""" -Vanilla Policy Gradient(VPG or REINFORCE) ------------------------------------------ -The policy gradient algorithm works by updating policy parameters via stochastic gradient ascent on policy performance. -It's an on-policy algorithm can be used for environments with either discrete or continuous action spaces. -Here is an example on discrete action space game CartPole-v0. -To apply it on continuous action space, you need to change the last softmax layer and the get_action function. - -Reference ---------- -Cookbook: Barto A G, Sutton R S. Reinforcement Learning: An Introduction[J]. 1998. -MorvanZhou's tutorial page: https://morvanzhou.github.io/tutorials/ -MorvanZhou's code: https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/ - -Prerequisites --------------- -tensorflow >=2.0.0a0 -tensorflow-probability 0.6.0 -tensorlayer >=2.0.0 - -""" -import time - -from rlzoo.common.utils import * -from rlzoo.common.policy_networks import * - - -############################### PG #################################### - - -class PG: - """ - PG class - """ - - def __init__(self, net_list, optimizers_list): - """ - :param net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization - :param optimizers_list: a list of optimizers for all networks and differentiable variables - - """ - assert len(net_list) == 1 - assert len(optimizers_list) == 1 - self.name = 'PG' - self.model = net_list[0] - assert isinstance(self.model, StochasticPolicyNetwork) - self.buffer = [] - print('Policy Network', self.model) - self.optimizer = optimizers_list[0] - - def get_action(self, s): - """ - choose action with probabilities. - - :param s: state - - :return: act - """ - return self.model([s])[0].numpy() - - def get_action_greedy(self, s): - """ - choose action with greedy policy - - :param s: state - - :return: act - """ - return self.model([s], greedy=True).numpy()[0] - - def store_transition(self, s, a, r): - """ - store data in memory buffer - - :param s: state - :param a: act - :param r: reward - - :return: - """ - self.buffer.append([s, np.array(a, np.float32), np.array(r, np.float32)]) - - def update(self, gamma): - """ - update policy parameters via stochastic gradient ascent - - :return: None - """ - # discount and normalize episode reward - s, a, r = zip(*self.buffer) - s, a, r = np.array(s), np.array(a), np.array(r).flatten() - discounted_ep_rs_norm = self._discount_and_norm_rewards(r, gamma) - - with tf.GradientTape() as tape: - self.model(s) - neg_log_prob = self.model.policy_dist.neglogp(a) - loss = tf.reduce_mean(neg_log_prob * discounted_ep_rs_norm) # reward guided loss - - grad = tape.gradient(loss, self.model.trainable_weights) - self.optimizer.apply_gradients(zip(grad, self.model.trainable_weights)) - - self.buffer = [] - return discounted_ep_rs_norm - - def _discount_and_norm_rewards(self, reward_list, gamma): - """ - compute discount_and_norm_rewards - - :return: discount_and_norm_rewards - """ - # discount episode rewards - discounted_ep_rs = np.zeros_like(reward_list) - running_add = 0 - for t in reversed(range(0, len(reward_list))): - running_add = running_add * gamma + reward_list[t] - discounted_ep_rs[t] = running_add - - # normalize episode rewards - discounted_ep_rs -= np.mean(discounted_ep_rs) - std = np.std(discounted_ep_rs) - if std != 0: - discounted_ep_rs /= np.std(discounted_ep_rs) - discounted_ep_rs = discounted_ep_rs[:, np.newaxis] - return discounted_ep_rs - - def save_ckpt(self, env_name): - """ - save trained weights - - :return: None - """ - save_model(self.model, 'model_policy', self.name, env_name) - - def load_ckpt(self, env_name): - """ - load trained weights - - :return: None - """ - load_model(self.model, 'model_policy', self.name, env_name) - - def learn(self, env, train_episodes=200, test_episodes=100, max_steps=200, save_interval=100, - mode='train', render=False, gamma=0.95, plot_func=None): - """ - :param env: learning environment - :param train_episodes: total number of episodes for training - :param test_episodes: total number of episodes for testing - :param max_steps: maximum number of steps for one episode - :param save_interval: time steps for saving - :param mode: train or test - :param render: render each step - :param gamma: reward decay - :param plot_func: additional function for interactive module - :return: None - """ - - if mode == 'train': - print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - reward_buffer = [] - t0 = time.time() - - for i_episode in range(1, train_episodes + 1): - - observation = env.reset() - - ep_rs_sum = 0 - for step in range(max_steps): - if render: - env.render() - action = self.get_action(observation) - observation_, reward, done, info = env.step(action) - self.store_transition(observation, action, reward) - - ep_rs_sum += reward - observation = observation_ - - if done: - break - - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( - i_episode, train_episodes, ep_rs_sum, time.time() - t0) - ) - reward_buffer.append(ep_rs_sum) - if plot_func is not None: - plot_func(reward_buffer) - - self.update(gamma) - - if i_episode and i_episode % save_interval == 0: - self.save_ckpt(env_name=env.spec.id) - plot_save_log(reward_buffer, algorithm_name='PG', env_name=env.spec.id) - - self.save_ckpt(env_name=env.spec.id) - plot_save_log(reward_buffer, algorithm_name='PG', env_name=env.spec.id) - - elif mode == 'test': - # test - self.load_ckpt(env_name=env.spec.id) - print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - t0 = time.time() - for eps in range(test_episodes): - observation = env.reset() - ep_rs_sum = 0 - for step in range(max_steps): - if render: - env.render() - action = self.get_action_greedy(observation) - observation, reward, done, info = env.step(action) - ep_rs_sum += reward - if done: - break - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( - eps, test_episodes, ep_rs_sum, time.time() - t0) - ) - - else: - print('unknown mode type') +""" +Vanilla Policy Gradient(VPG or REINFORCE) +----------------------------------------- +The policy gradient algorithm works by updating policy parameters via stochastic gradient ascent on policy performance. +It's an on-policy algorithm can be used for environments with either discrete or continuous action spaces. +Here is an example on discrete action space game CartPole-v0. +To apply it on continuous action space, you need to change the last softmax layer and the get_action function. + +Reference +--------- +Cookbook: Barto A G, Sutton R S. Reinforcement Learning: An Introduction[J]. 1998. +MorvanZhou's tutorial page: https://morvanzhou.github.io/tutorials/ +MorvanZhou's code: https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/ + +Prerequisites +-------------- +tensorflow >=2.0.0a0 +tensorflow-probability 0.6.0 +tensorlayer >=2.0.0 + +""" +import time + +from rlzoo.common.utils import * +from rlzoo.common.policy_networks import * + + +############################### PG #################################### + + +class PG: + """ + PG class + """ + + def __init__(self, net_list, optimizers_list): + """ + :param net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization + :param optimizers_list: a list of optimizers for all networks and differentiable variables + + """ + assert len(net_list) == 1 + assert len(optimizers_list) == 1 + self.name = 'PG' + self.model = net_list[0] + assert isinstance(self.model, StochasticPolicyNetwork) + self.buffer = [] + print('Policy Network', self.model) + self.optimizer = optimizers_list[0] + + def get_action(self, s): + """ + choose action with probabilities. + + :param s: state + + :return: act + """ + return self.model([s])[0].numpy() + + def get_action_greedy(self, s): + """ + choose action with greedy policy + + :param s: state + + :return: act + """ + return self.model([s], greedy=True).numpy()[0] + + def store_transition(self, s, a, r): + """ + store data in memory buffer + + :param s: state + :param a: act + :param r: reward + + :return: + """ + self.buffer.append([s, np.array(a, np.float32), np.array(r, np.float32)]) + + def update(self, gamma): + """ + update policy parameters via stochastic gradient ascent + + :return: None + """ + # discount and normalize episode reward + s, a, r = zip(*self.buffer) + s, a, r = np.array(s), np.array(a), np.array(r).flatten() + discounted_ep_rs_norm = self._discount_and_norm_rewards(r, gamma) + + with tf.GradientTape() as tape: + self.model(s) + neg_log_prob = self.model.policy_dist.neglogp(a) + loss = tf.reduce_mean(neg_log_prob * discounted_ep_rs_norm) # reward guided loss + + grad = tape.gradient(loss, self.model.trainable_weights) + self.optimizer.apply_gradients(zip(grad, self.model.trainable_weights)) + + self.buffer = [] + return discounted_ep_rs_norm + + def _discount_and_norm_rewards(self, reward_list, gamma): + """ + compute discount_and_norm_rewards + + :return: discount_and_norm_rewards + """ + # discount episode rewards + discounted_ep_rs = np.zeros_like(reward_list) + running_add = 0 + for t in reversed(range(0, len(reward_list))): + running_add = running_add * gamma + reward_list[t] + discounted_ep_rs[t] = running_add + + # normalize episode rewards + discounted_ep_rs -= np.mean(discounted_ep_rs) + std = np.std(discounted_ep_rs) + if std != 0: + discounted_ep_rs /= np.std(discounted_ep_rs) + discounted_ep_rs = discounted_ep_rs[:, np.newaxis] + return discounted_ep_rs + + def save_ckpt(self, env_name): + """ + save trained weights + + :return: None + """ + save_model(self.model, 'model_policy', self.name, env_name) + + def load_ckpt(self, env_name): + """ + load trained weights + + :return: None + """ + load_model(self.model, 'model_policy', self.name, env_name) + + def learn(self, env, train_episodes=200, test_episodes=100, max_steps=200, save_interval=100, + mode='train', render=False, gamma=0.95, plot_func=None): + """ + :param env: learning environment + :param train_episodes: total number of episodes for training + :param test_episodes: total number of episodes for testing + :param max_steps: maximum number of steps for one episode + :param save_interval: time steps for saving + :param mode: train or test + :param render: render each step + :param gamma: reward decay + :param plot_func: additional function for interactive module + :return: None + """ + + if mode == 'train': + print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + reward_buffer = [] + t0 = time.time() + + for i_episode in range(1, train_episodes + 1): + + observation = env.reset() + + ep_rs_sum = 0 + for step in range(max_steps): + if render: + env.render() + action = self.get_action(observation) + observation_, reward, done, info = env.step(action) + self.store_transition(observation, action, reward) + + ep_rs_sum += reward + observation = observation_ + + if done: + break + + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( + i_episode, train_episodes, ep_rs_sum, time.time() - t0) + ) + reward_buffer.append(ep_rs_sum) + if plot_func is not None: + plot_func(reward_buffer) + + self.update(gamma) + + if i_episode and i_episode % save_interval == 0: + self.save_ckpt(env_name=env.spec.id) + plot_save_log(reward_buffer, algorithm_name='PG', env_name=env.spec.id) + + self.save_ckpt(env_name=env.spec.id) + plot_save_log(reward_buffer, algorithm_name='PG', env_name=env.spec.id) + + elif mode == 'test': + # test + self.load_ckpt(env_name=env.spec.id) + print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + t0 = time.time() + for eps in range(test_episodes): + observation = env.reset() + ep_rs_sum = 0 + for step in range(max_steps): + if render: + env.render() + action = self.get_action_greedy(observation) + observation, reward, done, info = env.step(action) + ep_rs_sum += reward + if done: + break + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}'.format( + eps, test_episodes, ep_rs_sum, time.time() - t0) + ) + + else: + print('unknown mode type') diff --git a/rlzoo/algorithms/pg/run_pg.py b/rlzoo/algorithms/pg/run_pg.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/ppo/__init__.py b/rlzoo/algorithms/ppo/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/ppo/default.py b/rlzoo/algorithms/ppo/default.py old mode 100644 new mode 100755 index a8305f8..3723871 --- a/rlzoo/algorithms/ppo/default.py +++ b/rlzoo/algorithms/ppo/default.py @@ -1,322 +1,322 @@ -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -from rlzoo.common.utils import set_seed - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -epsilon: clip parameter (for method 'clip') -kl_target: controls bounds of policy update and adaptive lambda (for method 'penalty') -lam: KL-regularization coefficient (for method 'penalty') ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving -gamma: reward discount factor -mode: train or test -render: render each step -batch_size: UPDATE batch size -a_update_steps: actor update iteration steps -c_update_steps: critic update iteration steps ------------------------------------------------ -""" - - -def atari(env, default_seed=True): - if default_seed: - # reproducible - seed = 1 - set_seed(seed, env) - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5,) # for method 'penalty' - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('PPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, trainable=True) - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - batch_size=32, - a_update_steps=10, - c_update_steps=10) - - return alg_params, learn_params - - -def classic_control(env, default_seed=True): - if default_seed: - # reproducible - seed = 1 - set_seed(seed, env) - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5,) # for method 'penalty' - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('PPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, trainable=True) - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - batch_size=32, - a_update_steps=10, - c_update_steps=10) - - return alg_params, learn_params - - -def box2d(env, default_seed=True): - if default_seed: - # reproducible - seed = 1 - set_seed(seed, env) - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5,) # for method 'penalty' - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('PPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, trainable=True) - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - batch_size=32, - a_update_steps=10, - c_update_steps=10) - - return alg_params, learn_params - - -def mujoco(env, default_seed=True): - if default_seed: - # reproducible - seed = 1 - set_seed(seed, env) - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5,) # for method 'penalty' - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('PPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, trainable=True) - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - batch_size=32, - a_update_steps=10, - c_update_steps=10) - - return alg_params, learn_params - - -def robotics(env, default_seed=True): - if default_seed: - # reproducible - seed = 1 - set_seed(seed, env) - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5,) # for method 'penalty' - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('PPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, trainable=True) - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - batch_size=32, - a_update_steps=10, - c_update_steps=10) - - return alg_params, learn_params - - -def dm_control(env, default_seed=True): - if default_seed: - # reproducible - seed = 1 - set_seed(seed, env) - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5,) # for method 'penalty' - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('PPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, trainable=True) - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - batch_size=32, - a_update_steps=10, - c_update_steps=10) - - return alg_params, learn_params - - -def rlbench(env, default_seed=True): - if default_seed: - # reproducible - seed = 1 - set_seed(seed, env) - - alg_params = dict(method='clip', # method can be clip or penalty - epsilon=0.2, # for method 'clip' - kl_target=0.01, # for method 'penalty' - lam=0.5,) # for method 'penalty' - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('PPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, trainable=True) - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - actor_lr = 1e-4 - critic_lr = 2e-4 - optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=1000, - test_episodes=100, - max_steps=200, - save_interval=50, - gamma=0.9, - batch_size=32, - a_update_steps=10, - c_update_steps=10) - - return alg_params, learn_params +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +from rlzoo.common.utils import set_seed + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +epsilon: clip parameter (for method 'clip') +kl_target: controls bounds of policy update and adaptive lambda (for method 'penalty') +lam: KL-regularization coefficient (for method 'penalty') +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving +gamma: reward discount factor +mode: train or test +render: render each step +batch_size: UPDATE batch size +a_update_steps: actor update iteration steps +c_update_steps: critic update iteration steps +----------------------------------------------- +""" + + +def atari(env, default_seed=True): + if default_seed: + # reproducible + seed = 1 + set_seed(seed, env) + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5,) # for method 'penalty' + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('PPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, trainable=True) + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + batch_size=32, + a_update_steps=10, + c_update_steps=10) + + return alg_params, learn_params + + +def classic_control(env, default_seed=True): + if default_seed: + # reproducible + seed = 1 + set_seed(seed, env) + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5,) # for method 'penalty' + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('PPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, trainable=True) + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + batch_size=32, + a_update_steps=10, + c_update_steps=10) + + return alg_params, learn_params + + +def box2d(env, default_seed=True): + if default_seed: + # reproducible + seed = 1 + set_seed(seed, env) + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5,) # for method 'penalty' + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('PPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, trainable=True) + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + batch_size=32, + a_update_steps=10, + c_update_steps=10) + + return alg_params, learn_params + + +def mujoco(env, default_seed=True): + if default_seed: + # reproducible + seed = 1 + set_seed(seed, env) + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5,) # for method 'penalty' + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('PPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, trainable=True) + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + batch_size=32, + a_update_steps=10, + c_update_steps=10) + + return alg_params, learn_params + + +def robotics(env, default_seed=True): + if default_seed: + # reproducible + seed = 1 + set_seed(seed, env) + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5,) # for method 'penalty' + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('PPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, trainable=True) + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + batch_size=32, + a_update_steps=10, + c_update_steps=10) + + return alg_params, learn_params + + +def dm_control(env, default_seed=True): + if default_seed: + # reproducible + seed = 1 + set_seed(seed, env) + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5,) # for method 'penalty' + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('PPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, trainable=True) + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + batch_size=32, + a_update_steps=10, + c_update_steps=10) + + return alg_params, learn_params + + +def rlbench(env, default_seed=True): + if default_seed: + # reproducible + seed = 1 + set_seed(seed, env) + + alg_params = dict(method='clip', # method can be clip or penalty + epsilon=0.2, # for method 'clip' + kl_target=0.01, # for method 'penalty' + lam=0.5,) # for method 'penalty' + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('PPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, trainable=True) + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + actor_lr = 1e-4 + critic_lr = 2e-4 + optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=1000, + test_episodes=100, + max_steps=200, + save_interval=50, + gamma=0.9, + batch_size=32, + a_update_steps=10, + c_update_steps=10) + + return alg_params, learn_params diff --git a/rlzoo/algorithms/ppo/ppo.py b/rlzoo/algorithms/ppo/ppo.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/ppo_clip/__init__.py b/rlzoo/algorithms/ppo_clip/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/ppo_clip/ppo_clip.py b/rlzoo/algorithms/ppo_clip/ppo_clip.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/ppo_clip/run_ppo_clip.py b/rlzoo/algorithms/ppo_clip/run_ppo_clip.py old mode 100644 new mode 100755 index fff0853..99645ed --- a/rlzoo/algorithms/ppo_clip/run_ppo_clip.py +++ b/rlzoo/algorithms/ppo_clip/run_ppo_clip.py @@ -1,59 +1,59 @@ -from rlzoo.common.utils import make_env, set_seed -from rlzoo.algorithms.ppo_clip.ppo_clip import PPO_CLIP -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -import gym - - -""" load environment """ -env = gym.make('Pendulum-v0').unwrapped - -# reproducible -seed = 1 -set_seed(seed, env) - -""" build networks for the algorithm """ -name = 'PPO_CLIP' -hidden_dim = 64 -num_hidden_layer = 2 -critic = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') - -actor = StochasticPolicyNetwork(env.observation_space, env.action_space, [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, name=name + '_policy') -net_list = critic, actor - -""" create model """ -actor_lr = 1e-4 -critic_lr = 2e-4 -optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - -model = PPO_CLIP(net_list, optimizers_list,) -""" -full list of arguments for the algorithm ----------------------------------------- -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -epsilon: clip parameter -""" - -model.learn(env, train_episodes=500, max_steps=200, save_interval=50, gamma=0.9, - mode='train', render=False, batch_size=32, a_update_steps=10, c_update_steps=10) - -""" -full list of parameters for training ---------------------------------------- -env: learning environment -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving -gamma: reward discount factor -mode: train or test -render: render each step -batch_size: UPDATE batch size -a_update_steps: actor update iteration steps -c_update_steps: critic update iteration steps -:return: None -""" -model.learn(env, test_episodes=100, max_steps=200, mode='test', render=True) - +from rlzoo.common.utils import make_env, set_seed +from rlzoo.algorithms.ppo_clip.ppo_clip import PPO_CLIP +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +import gym + + +""" load environment """ +env = gym.make('Pendulum-v0').unwrapped + +# reproducible +seed = 1 +set_seed(seed, env) + +""" build networks for the algorithm """ +name = 'PPO_CLIP' +hidden_dim = 64 +num_hidden_layer = 2 +critic = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') + +actor = StochasticPolicyNetwork(env.observation_space, env.action_space, [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, name=name + '_policy') +net_list = critic, actor + +""" create model """ +actor_lr = 1e-4 +critic_lr = 2e-4 +optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + +model = PPO_CLIP(net_list, optimizers_list,) +""" +full list of arguments for the algorithm +---------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +epsilon: clip parameter +""" + +model.learn(env, train_episodes=500, max_steps=200, save_interval=50, gamma=0.9, + mode='train', render=False, batch_size=32, a_update_steps=10, c_update_steps=10) + +""" +full list of parameters for training +--------------------------------------- +env: learning environment +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving +gamma: reward discount factor +mode: train or test +render: render each step +batch_size: UPDATE batch size +a_update_steps: actor update iteration steps +c_update_steps: critic update iteration steps +:return: None +""" +model.learn(env, test_episodes=100, max_steps=200, mode='test', render=True) + diff --git a/rlzoo/algorithms/ppo_penalty/__init__.py b/rlzoo/algorithms/ppo_penalty/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/ppo_penalty/ppo_penalty.py b/rlzoo/algorithms/ppo_penalty/ppo_penalty.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/ppo_penalty/run_ppo_penalty.py b/rlzoo/algorithms/ppo_penalty/run_ppo_penalty.py old mode 100644 new mode 100755 index 4e847d0..de6672b --- a/rlzoo/algorithms/ppo_penalty/run_ppo_penalty.py +++ b/rlzoo/algorithms/ppo_penalty/run_ppo_penalty.py @@ -1,60 +1,60 @@ -from rlzoo.common.utils import make_env, set_seed -from rlzoo.algorithms.ppo_penalty.ppo_penalty import PPO_PENALTY -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -import gym - - -""" load environment """ -env = gym.make('Pendulum-v0').unwrapped - -# reproducible -seed = 1 -set_seed(seed, env) - -""" build networks for the algorithm """ -name = 'PPO_PENALTY' -hidden_dim = 64 -num_hidden_layer = 2 -critic = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') - -actor = StochasticPolicyNetwork(env.observation_space, env.action_space, [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, name=name + '_policy') -net_list = critic, actor - -""" create model """ -actor_lr = 1e-4 -critic_lr = 2e-4 -optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] - -model = PPO_PENALTY(net_list, optimizers_list,) -""" -full list of arguments for the algorithm ----------------------------------------- -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -kl_target: controls bounds of policy update and adaptive lambda -lam: KL-regularization coefficient -""" - -model.learn(env, train_episodes=500, max_steps=200, save_interval=50, gamma=0.9, - mode='train', render=False, batch_size=32, a_update_steps=10, c_update_steps=10) - -""" -full list of parameters for training ---------------------------------------- -env: learning environment -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: times teps for saving -gamma: reward discount factor -mode: train or test -render: render each step -batch_size: update batch size -a_update_steps: actor update iteration steps -c_update_steps: critic update iteration steps -:return: None -""" - -model.learn(env, test_episodes=100, max_steps=200, mode='test', render=True) +from rlzoo.common.utils import make_env, set_seed +from rlzoo.algorithms.ppo_penalty.ppo_penalty import PPO_PENALTY +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +import gym + + +""" load environment """ +env = gym.make('Pendulum-v0').unwrapped + +# reproducible +seed = 1 +set_seed(seed, env) + +""" build networks for the algorithm """ +name = 'PPO_PENALTY' +hidden_dim = 64 +num_hidden_layer = 2 +critic = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') + +actor = StochasticPolicyNetwork(env.observation_space, env.action_space, [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, name=name + '_policy') +net_list = critic, actor + +""" create model """ +actor_lr = 1e-4 +critic_lr = 2e-4 +optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + +model = PPO_PENALTY(net_list, optimizers_list,) +""" +full list of arguments for the algorithm +---------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +kl_target: controls bounds of policy update and adaptive lambda +lam: KL-regularization coefficient +""" + +model.learn(env, train_episodes=500, max_steps=200, save_interval=50, gamma=0.9, + mode='train', render=False, batch_size=32, a_update_steps=10, c_update_steps=10) + +""" +full list of parameters for training +--------------------------------------- +env: learning environment +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: times teps for saving +gamma: reward discount factor +mode: train or test +render: render each step +batch_size: update batch size +a_update_steps: actor update iteration steps +c_update_steps: critic update iteration steps +:return: None +""" + +model.learn(env, test_episodes=100, max_steps=200, mode='test', render=True) diff --git a/rlzoo/algorithms/sac/__init__.py b/rlzoo/algorithms/sac/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/sac/default.py b/rlzoo/algorithms/sac/default.py old mode 100644 new mode 100755 index 913db21..8fb86ea --- a/rlzoo/algorithms/sac/default.py +++ b/rlzoo/algorithms/sac/default.py @@ -1,364 +1,364 @@ -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -from rlzoo.common.utils import set_seed - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -replay_buffer_capacity: the size of buffer for storing explored samples ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -batch_size: udpate batchsize -explore_steps: for random action sampling in the beginning of training -update_itr: repeated updates for single step -policy_target_update_interval: delayed update for the policy network and target networks -reward_scale: value range of reward -save_interval: timesteps for saving the weights and plotting the results -mode: 'train' or 'test' -AUTO_ENTROPY: automatically udpating variable alpha for entropy -render: if true, visualize the environment ------------------------------------------------ -""" - - -def classic_control(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here - with tf.name_scope('SAC'): - with tf.name_scope('Q_Net1'): - soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=None, - state_conditioned=True) - net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha - soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) - soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - alpha_optimizer = tf.optimizers.Adam(alpha_lr) - optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=200, - update_itr=3, - policy_target_update_interval=3, - reward_scale=1., - AUTO_ENTROPY=True, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def box2d(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here - with tf.name_scope('SAC'): - with tf.name_scope('Q_Net1'): - soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=None, - state_conditioned=True) - net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha - soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) - soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - alpha_optimizer = tf.optimizers.Adam(alpha_lr) - optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=200, - update_itr=3, - policy_target_update_interval=3, - reward_scale=1., - AUTO_ENTROPY=True, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def mujoco(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here - with tf.name_scope('SAC'): - with tf.name_scope('Q_Net1'): - soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=None, - state_conditioned=True) - net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha - soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) - soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - alpha_optimizer = tf.optimizers.Adam(alpha_lr) - optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=200, - update_itr=3, - policy_target_update_interval=3, - reward_scale=1., - AUTO_ENTROPY=True, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def robotics(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here - with tf.name_scope('SAC'): - with tf.name_scope('Q_Net1'): - soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=None, - state_conditioned=True) - net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha - soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) - soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - alpha_optimizer = tf.optimizers.Adam(alpha_lr) - optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=200, - update_itr=3, - policy_target_update_interval=3, - reward_scale=1., - AUTO_ENTROPY=True, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def dm_control(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here - with tf.name_scope('SAC'): - with tf.name_scope('Q_Net1'): - soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=None, - state_conditioned=True) - net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha - soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) - soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - alpha_optimizer = tf.optimizers.Adam(alpha_lr) - optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=200, - update_itr=3, - policy_target_update_interval=3, - reward_scale=1., - AUTO_ENTROPY=True, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def rlbench(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here - with tf.name_scope('SAC'): - with tf.name_scope('Q_Net1'): - soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=None, - state_conditioned=True) - net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha - soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) - soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - alpha_optimizer = tf.optimizers.Adam(alpha_lr) - optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=200, - update_itr=3, - policy_target_update_interval=3, - reward_scale=1., - AUTO_ENTROPY=True, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +from rlzoo.common.utils import set_seed + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +replay_buffer_capacity: the size of buffer for storing explored samples +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +batch_size: udpate batchsize +explore_steps: for random action sampling in the beginning of training +update_itr: repeated updates for single step +policy_target_update_interval: delayed update for the policy network and target networks +reward_scale: value range of reward +save_interval: timesteps for saving the weights and plotting the results +mode: 'train' or 'test' +AUTO_ENTROPY: automatically udpating variable alpha for entropy +render: if true, visualize the environment +----------------------------------------------- +""" + + +def classic_control(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here + with tf.name_scope('SAC'): + with tf.name_scope('Q_Net1'): + soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=None, + state_conditioned=True) + net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha + soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) + soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + alpha_optimizer = tf.optimizers.Adam(alpha_lr) + optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=200, + update_itr=3, + policy_target_update_interval=3, + reward_scale=1., + AUTO_ENTROPY=True, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def box2d(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here + with tf.name_scope('SAC'): + with tf.name_scope('Q_Net1'): + soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=None, + state_conditioned=True) + net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha + soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) + soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + alpha_optimizer = tf.optimizers.Adam(alpha_lr) + optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=200, + update_itr=3, + policy_target_update_interval=3, + reward_scale=1., + AUTO_ENTROPY=True, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def mujoco(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here + with tf.name_scope('SAC'): + with tf.name_scope('Q_Net1'): + soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=None, + state_conditioned=True) + net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha + soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) + soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + alpha_optimizer = tf.optimizers.Adam(alpha_lr) + optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=200, + update_itr=3, + policy_target_update_interval=3, + reward_scale=1., + AUTO_ENTROPY=True, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def robotics(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here + with tf.name_scope('SAC'): + with tf.name_scope('Q_Net1'): + soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=None, + state_conditioned=True) + net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha + soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) + soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + alpha_optimizer = tf.optimizers.Adam(alpha_lr) + optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=200, + update_itr=3, + policy_target_update_interval=3, + reward_scale=1., + AUTO_ENTROPY=True, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def dm_control(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here + with tf.name_scope('SAC'): + with tf.name_scope('Q_Net1'): + soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=None, + state_conditioned=True) + net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha + soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) + soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + alpha_optimizer = tf.optimizers.Adam(alpha_lr) + optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=200, + update_itr=3, + policy_target_update_interval=3, + reward_scale=1., + AUTO_ENTROPY=True, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def rlbench(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here + with tf.name_scope('SAC'): + with tf.name_scope('Q_Net1'): + soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=None, + state_conditioned=True) + net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha + soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) + soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + alpha_optimizer = tf.optimizers.Adam(alpha_lr) + optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=200, + update_itr=3, + policy_target_update_interval=3, + reward_scale=1., + AUTO_ENTROPY=True, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params diff --git a/rlzoo/algorithms/sac/run_sac.py b/rlzoo/algorithms/sac/run_sac.py old mode 100644 new mode 100755 index 89efb0a..5a1edcb --- a/rlzoo/algorithms/sac/run_sac.py +++ b/rlzoo/algorithms/sac/run_sac.py @@ -1,82 +1,82 @@ -from rlzoo.algorithms.sac.sac import SAC -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -import gym - -""" load environment """ -env = gym.make('Pendulum-v0').unwrapped -# env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run -action_shape = env.action_space.shape -state_shape = env.observation_space.shape -# reproducible -seed = 2 -np.random.seed(seed) -tf.random.set_seed(seed) -env.seed(seed) - -""" build networks for the algorithm """ -num_hidden_layer = 2 # number of hidden layers for the networks -hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here -with tf.name_scope('SAC'): - with tf.name_scope('Q_Net1'): - soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim], - output_activation=None, - state_conditioned=True) -net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] - -""" choose optimizers """ -soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha -soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) -soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) -policy_optimizer = tf.optimizers.Adam(policy_lr) -alpha_optimizer = tf.optimizers.Adam(alpha_lr) -optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] - -model = SAC(net_list, optimizers_list) -""" -full list of arguments for the algorithm ----------------------------------------- -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -state_dim: dimension of state for the environment -action_dim: dimension of action for the environment -replay_buffer_capacity: the size of buffer for storing explored samples -action_range: value of each action in [-action_range, action_range] -""" - -model.learn(env, train_episodes=100, max_steps=150, batch_size=64, explore_steps=500, \ - update_itr=3, policy_target_update_interval=3, reward_scale=1., save_interval=10, \ - mode='train', AUTO_ENTROPY=True, render=False) -""" -full list of parameters for training ---------------------------------------- -env: learning environment -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -batch_size: udpate batchsize -explore_steps: for random action sampling in the beginning of training -update_itr: repeated updates for single step -policy_target_update_interval: delayed update for the policy network and target networks -reward_scale: value range of reward -save_interval: timesteps for saving the weights and plotting the results -mode: 'train' or 'test' -AUTO_ENTROPY: automatically udpating variable alpha for entropy -DETERMINISTIC: stochastic action policy if False, otherwise deterministic -render: if true, visualize the environment -""" -# test -model.learn(env, test_episodes=10, max_steps=150, mode='test', render=True) +from rlzoo.algorithms.sac.sac import SAC +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +import gym + +""" load environment """ +env = gym.make('Pendulum-v0').unwrapped +# env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run +action_shape = env.action_space.shape +state_shape = env.observation_space.shape +# reproducible +seed = 2 +np.random.seed(seed) +tf.random.set_seed(seed) +env.seed(seed) + +""" build networks for the algorithm """ +num_hidden_layer = 2 # number of hidden layers for the networks +hidden_dim = 64 # dimension of hidden layers for the networks, default as the same for each layer here +with tf.name_scope('SAC'): + with tf.name_scope('Q_Net1'): + soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim], + output_activation=None, + state_conditioned=True) +net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] + +""" choose optimizers """ +soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha +soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) +soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) +policy_optimizer = tf.optimizers.Adam(policy_lr) +alpha_optimizer = tf.optimizers.Adam(alpha_lr) +optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] + +model = SAC(net_list, optimizers_list) +""" +full list of arguments for the algorithm +---------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +state_dim: dimension of state for the environment +action_dim: dimension of action for the environment +replay_buffer_capacity: the size of buffer for storing explored samples +action_range: value of each action in [-action_range, action_range] +""" + +model.learn(env, train_episodes=100, max_steps=150, batch_size=64, explore_steps=500, \ + update_itr=3, policy_target_update_interval=3, reward_scale=1., save_interval=10, \ + mode='train', AUTO_ENTROPY=True, render=False) +""" +full list of parameters for training +--------------------------------------- +env: learning environment +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +batch_size: udpate batchsize +explore_steps: for random action sampling in the beginning of training +update_itr: repeated updates for single step +policy_target_update_interval: delayed update for the policy network and target networks +reward_scale: value range of reward +save_interval: timesteps for saving the weights and plotting the results +mode: 'train' or 'test' +AUTO_ENTROPY: automatically udpating variable alpha for entropy +DETERMINISTIC: stochastic action policy if False, otherwise deterministic +render: if true, visualize the environment +""" +# test +model.learn(env, test_episodes=10, max_steps=150, mode='test', render=True) diff --git a/rlzoo/algorithms/sac/sac.py b/rlzoo/algorithms/sac/sac.py old mode 100644 new mode 100755 index 8f7041b..84291a5 --- a/rlzoo/algorithms/sac/sac.py +++ b/rlzoo/algorithms/sac/sac.py @@ -1,286 +1,286 @@ -""" -Soft Actor-Critic -using target Q instead of V net: 2 Q net, 2 target Q net, 1 policy net -adding alpha loss -paper: https://arxiv.org/pdf/1812.05905.pdf -Actor policy is stochastic. -Env: Openai Gym Pendulum-v0, continuous action space -tensorflow 2.0.0a0 -tensorflow-probability 0.6.0 -tensorlayer 2.0.0 -&& -pip install box2d box2d-kengz --user -""" - -import time - -import tensorflow_probability as tfp -import tensorlayer as tl -from rlzoo.common.utils import * -from rlzoo.common.buffer import * -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * - -tfd = tfp.distributions -Normal = tfd.Normal - -tl.logging.set_verbosity(tl.logging.DEBUG) - - -class SAC(): - """ Soft Actor-Critic """ - - def __init__(self, net_list, optimizers_list, replay_buffer_capacity=5e5): - self.replay_buffer = ReplayBuffer(replay_buffer_capacity) - self.name = 'SAC' - - # get all networks - [self.soft_q_net1, self.soft_q_net2, self.target_soft_q_net1, self.target_soft_q_net2, - self.policy_net] = net_list - - assert isinstance(self.soft_q_net1, QNetwork) - assert isinstance(self.soft_q_net2, QNetwork) - assert isinstance(self.target_soft_q_net1, QNetwork) - assert isinstance(self.target_soft_q_net2, QNetwork) - assert isinstance(self.policy_net, StochasticPolicyNetwork) - assert isinstance(self.policy_net.action_space, gym.spaces.Box) - - self.action_dim = self.policy_net.action_shape[0] - - self.log_alpha = tf.Variable(0, dtype=np.float32, name='log_alpha') - self.alpha = tf.math.exp(self.log_alpha) - print('Soft Q Network (1,2): ', self.soft_q_net1) - print('Policy Network: ', self.policy_net) - - # initialize weights of target networks - self.target_soft_q_net1 = self.target_ini(self.soft_q_net1, self.target_soft_q_net1) - self.target_soft_q_net2 = self.target_ini(self.soft_q_net2, self.target_soft_q_net2) - - [self.soft_q_optimizer1, self.soft_q_optimizer2, self.policy_optimizer, self.alpha_optimizer] = optimizers_list - - def evaluate(self, state, epsilon=1e-6): - """ generate action with state for calculating gradients """ - _ = self.policy_net(state) - mean, log_std = self.policy_net.policy_dist.get_param() # as SAC uses TanhNorm instead of normal distribution, need original mean_std - std = tf.math.exp(log_std) # no clip in evaluation, clip affects gradients flow - - normal = Normal(0, 1) - z = normal.sample(mean.shape) - action_0 = tf.math.tanh(mean + std * z) # TanhNormal distribution as actions; reparameterization trick - # according to original paper, with an extra last term for normalizing different action range - log_prob = Normal(mean, std).log_prob(mean + std * z) - tf.math.log(1. - action_0 ** 2 + epsilon) - # both dims of normal.log_prob and -log(1-a**2) are (N,dim_of_action); - # the Normal.log_prob outputs the same dim of input features instead of 1 dim probability, - # needs sum up across the dim of actions to get 1 dim probability; or else use Multivariate Normal. - log_prob = tf.reduce_sum(log_prob, axis=1)[:, np.newaxis] # expand dim as reduce_sum causes 1 dim reduced - - action = action_0 * self.policy_net.policy_dist.action_scale + self.policy_net.policy_dist.action_mean - - return action, log_prob, z, mean, log_std - - def get_action(self, state): - """ generate action with state for interaction with envronment """ - action, _, _, _, _ = self.evaluate(np.array([state])) - return action.numpy()[0] - - def get_action_greedy(self, state): - """ generate action with state for interaction with envronment """ - mean = self.policy_net(np.array([state]), greedy=True).numpy()[0] - action = tf.math.tanh(mean) * self.policy_net.policy_dist.action_scale + self.policy_net.policy_dist.action_mean - return action - - def sample_action(self, ): - """ generate random actions for exploration """ - return self.policy_net.random_sample() - - def target_ini(self, net, target_net): - """ hard-copy update for initializing target networks """ - for target_param, param in zip(target_net.trainable_weights, net.trainable_weights): - target_param.assign(param) - return target_net - - def target_soft_update(self, net, target_net, soft_tau): - """ soft update the target net with Polyak averaging """ - for target_param, param in zip(target_net.trainable_weights, net.trainable_weights): - target_param.assign( # copy weight value into target parameters - target_param * (1.0 - soft_tau) + param * soft_tau - ) - return target_net - - def update(self, batch_size, reward_scale=10., auto_entropy=True, target_entropy=-2, gamma=0.99, soft_tau=1e-2): - """ update all networks in SAC """ - state, action, reward, next_state, done = self.replay_buffer.sample(batch_size) - - reward = reward[:, np.newaxis] # expand dim - done = done[:, np.newaxis] - - reward = reward_scale * (reward - - np.mean(reward, axis=0)) / ( - np.std(reward, axis=0) + 1e-6) # normalize with batch mean and std - - # Training Q Function - new_next_action, next_log_prob, _, _, _ = self.evaluate(next_state) - target_q_min = tf.minimum( - self.target_soft_q_net1([next_state, new_next_action]), - self.target_soft_q_net2([next_state, new_next_action]) - ) - self.alpha * next_log_prob - target_q_value = reward + (1 - done) * gamma * target_q_min # if done==1, only reward - - with tf.GradientTape() as q1_tape: - predicted_q_value1 = self.soft_q_net1([state, action]) - q_value_loss1 = tf.reduce_mean(tf.losses.mean_squared_error(predicted_q_value1, target_q_value)) - q1_grad = q1_tape.gradient(q_value_loss1, self.soft_q_net1.trainable_weights) - self.soft_q_optimizer1.apply_gradients(zip(q1_grad, self.soft_q_net1.trainable_weights)) - - with tf.GradientTape() as q2_tape: - predicted_q_value2 = self.soft_q_net2([state, action]) - q_value_loss2 = tf.reduce_mean(tf.losses.mean_squared_error(predicted_q_value2, target_q_value)) - q2_grad = q2_tape.gradient(q_value_loss2, self.soft_q_net2.trainable_weights) - self.soft_q_optimizer2.apply_gradients(zip(q2_grad, self.soft_q_net2.trainable_weights)) - - # Training Policy Function - with tf.GradientTape() as p_tape: - new_action, log_prob, z, mean, log_std = self.evaluate(state) - """ implementation 1 """ - predicted_new_q_value = tf.minimum(self.soft_q_net1([state, new_action]), - self.soft_q_net2([state, new_action])) - """ implementation 2 """ - # predicted_new_q_value = self.soft_q_net1([state, new_action]) - policy_loss = tf.reduce_mean(self.alpha * log_prob - predicted_new_q_value) - p_grad = p_tape.gradient(policy_loss, self.policy_net.trainable_weights) - self.policy_optimizer.apply_gradients(zip(p_grad, self.policy_net.trainable_weights)) - - # Updating alpha w.r.t entropy - # alpha: trade-off between exploration (max entropy) and exploitation (max Q) - if auto_entropy is True: - with tf.GradientTape() as alpha_tape: - alpha_loss = -tf.reduce_mean((self.log_alpha * (log_prob + target_entropy))) - alpha_grad = alpha_tape.gradient(alpha_loss, [self.log_alpha]) - self.alpha_optimizer.apply_gradients(zip(alpha_grad, [self.log_alpha])) - self.alpha = tf.math.exp(self.log_alpha) - else: # fixed alpha - self.alpha = 1. - alpha_loss = 0 - - # Soft update the target value nets - self.target_soft_q_net1 = self.target_soft_update(self.soft_q_net1, self.target_soft_q_net1, soft_tau) - self.target_soft_q_net2 = self.target_soft_update(self.soft_q_net2, self.target_soft_q_net2, soft_tau) - - def save_ckpt(self, env_name): - """ save trained weights """ - save_model(self.soft_q_net1, 'model_q_net1', self.name, env_name) - save_model(self.soft_q_net2, 'model_q_net2', self.name, env_name) - save_model(self.target_soft_q_net1, 'model_target_q_net1', self.name, env_name) - save_model(self.target_soft_q_net2, 'model_target_q_net2', self.name, env_name) - save_model(self.policy_net, 'model_policy_net', self.name, env_name) - - def load_ckpt(self, env_name): - """ load trained weights """ - load_model(self.soft_q_net1, 'model_q_net1', self.name, env_name) - load_model(self.soft_q_net2, 'model_q_net2', self.name, env_name) - load_model(self.target_soft_q_net1, 'model_target_q_net1', self.name, env_name) - load_model(self.target_soft_q_net2, 'model_target_q_net2', self.name, env_name) - load_model(self.policy_net, 'model_policy_net', self.name, env_name) - - def learn(self, env, train_episodes=1000, test_episodes=1000, max_steps=150, batch_size=64, explore_steps=500, - update_itr=3, policy_target_update_interval=3, reward_scale=1., save_interval=20, - mode='train', AUTO_ENTROPY=True, render=False, plot_func=None): - """ - :param env: learning environment - :param train_episodes: total number of episodes for training - :param test_episodes: total number of episodes for testing - :param max_steps: maximum number of steps for one episode - :param batch_size: udpate batchsize - :param explore_steps: for random action sampling in the beginning of training - :param update_itr: repeated updates for single step - :param policy_target_update_interval: delayed update for the policy network and target networks - :param reward_scale: value range of reward - :param save_interval: timesteps for saving the weights and plotting the results - :param mode: 'train' or 'test' - :param AUTO_ENTROPY: automatically updating variable alpha for entropy - :param render: if true, visualize the environment - :param plot_func: additional function for interactive module - """ - - # training loop - if mode == 'train': - print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - frame_idx = 0 - rewards = [] - t0 = time.time() - for eps in range(train_episodes): - state = env.reset() - episode_reward = 0 - - for step in range(max_steps): - if frame_idx > explore_steps: - action = self.get_action(state) - else: - action = self.sample_action() - - next_state, reward, done, _ = env.step(action) - if render: env.render() - done = 1 if done == True else 0 - - self.replay_buffer.push(state, action, reward, next_state, done) - - state = next_state - episode_reward += reward - frame_idx += 1 - - if len(self.replay_buffer) > batch_size: - for i in range(update_itr): - self.update( - batch_size, reward_scale=reward_scale, auto_entropy=AUTO_ENTROPY, - target_entropy=-1. * self.action_dim - ) - - if done: - break - if eps % int(save_interval) == 0: - plot_save_log(rewards, algorithm_name=self.name, env_name=env.spec.id) - self.save_ckpt(env_name=env.spec.id) - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ - .format(eps, train_episodes, episode_reward, time.time() - t0)) - rewards.append(episode_reward) - if plot_func is not None: - plot_func(rewards) - plot_save_log(rewards, algorithm_name=self.name, env_name=env.spec.id) - self.save_ckpt(env_name=env.spec.id) - - elif mode == 'test': - frame_idx = 0 - rewards = [] - t0 = time.time() - self.load_ckpt(env_name=env.spec.id) - print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - # set test mode - self.soft_q_net1.eval() - self.soft_q_net2.eval() - self.target_soft_q_net1.eval() - self.target_soft_q_net2.eval() - self.policy_net.eval() - - for eps in range(test_episodes): - state = env.reset() - episode_reward = 0 - - for step in range(max_steps): - action = self.get_action_greedy(state) - next_state, reward, done, _ = env.step(action) - if render: env.render() - done = 1 if done == True else 0 - - state = next_state - episode_reward += reward - frame_idx += 1 - if done: - break - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ - .format(eps, test_episodes, episode_reward, time.time() - t0)) - rewards.append(episode_reward) - if plot_func: - plot_func(rewards) - - else: - print('unknow mode type') +""" +Soft Actor-Critic +using target Q instead of V net: 2 Q net, 2 target Q net, 1 policy net +adding alpha loss +paper: https://arxiv.org/pdf/1812.05905.pdf +Actor policy is stochastic. +Env: Openai Gym Pendulum-v0, continuous action space +tensorflow 2.0.0a0 +tensorflow-probability 0.6.0 +tensorlayer 2.0.0 +&& +pip install box2d box2d-kengz --user +""" + +import time + +import tensorflow_probability as tfp +import tensorlayer as tl +from rlzoo.common.utils import * +from rlzoo.common.buffer import * +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * + +tfd = tfp.distributions +Normal = tfd.Normal + +tl.logging.set_verbosity(tl.logging.DEBUG) + + +class SAC(): + """ Soft Actor-Critic """ + + def __init__(self, net_list, optimizers_list, replay_buffer_capacity=5e5): + self.replay_buffer = ReplayBuffer(replay_buffer_capacity) + self.name = 'SAC' + + # get all networks + [self.soft_q_net1, self.soft_q_net2, self.target_soft_q_net1, self.target_soft_q_net2, + self.policy_net] = net_list + + assert isinstance(self.soft_q_net1, QNetwork) + assert isinstance(self.soft_q_net2, QNetwork) + assert isinstance(self.target_soft_q_net1, QNetwork) + assert isinstance(self.target_soft_q_net2, QNetwork) + assert isinstance(self.policy_net, StochasticPolicyNetwork) + assert isinstance(self.policy_net.action_space, gym.spaces.Box) + + self.action_dim = self.policy_net.action_shape[0] + + self.log_alpha = tf.Variable(0, dtype=np.float32, name='log_alpha') + self.alpha = tf.math.exp(self.log_alpha) + print('Soft Q Network (1,2): ', self.soft_q_net1) + print('Policy Network: ', self.policy_net) + + # initialize weights of target networks + self.target_soft_q_net1 = self.target_ini(self.soft_q_net1, self.target_soft_q_net1) + self.target_soft_q_net2 = self.target_ini(self.soft_q_net2, self.target_soft_q_net2) + + [self.soft_q_optimizer1, self.soft_q_optimizer2, self.policy_optimizer, self.alpha_optimizer] = optimizers_list + + def evaluate(self, state, epsilon=1e-6): + """ generate action with state for calculating gradients """ + _ = self.policy_net(state) + mean, log_std = self.policy_net.policy_dist.get_param() # as SAC uses TanhNorm instead of normal distribution, need original mean_std + std = tf.math.exp(log_std) # no clip in evaluation, clip affects gradients flow + + normal = Normal(0, 1) + z = normal.sample(mean.shape) + action_0 = tf.math.tanh(mean + std * z) # TanhNormal distribution as actions; reparameterization trick + # according to original paper, with an extra last term for normalizing different action range + log_prob = Normal(mean, std).log_prob(mean + std * z) - tf.math.log(1. - action_0 ** 2 + epsilon) + # both dims of normal.log_prob and -log(1-a**2) are (N,dim_of_action); + # the Normal.log_prob outputs the same dim of input features instead of 1 dim probability, + # needs sum up across the dim of actions to get 1 dim probability; or else use Multivariate Normal. + log_prob = tf.reduce_sum(log_prob, axis=1)[:, np.newaxis] # expand dim as reduce_sum causes 1 dim reduced + + action = action_0 * self.policy_net.policy_dist.action_scale + self.policy_net.policy_dist.action_mean + + return action, log_prob, z, mean, log_std + + def get_action(self, state): + """ generate action with state for interaction with envronment """ + action, _, _, _, _ = self.evaluate(np.array([state])) + return action.numpy()[0] + + def get_action_greedy(self, state): + """ generate action with state for interaction with envronment """ + mean = self.policy_net(np.array([state]), greedy=True).numpy()[0] + action = tf.math.tanh(mean) * self.policy_net.policy_dist.action_scale + self.policy_net.policy_dist.action_mean + return action + + def sample_action(self, ): + """ generate random actions for exploration """ + return self.policy_net.random_sample() + + def target_ini(self, net, target_net): + """ hard-copy update for initializing target networks """ + for target_param, param in zip(target_net.trainable_weights, net.trainable_weights): + target_param.assign(param) + return target_net + + def target_soft_update(self, net, target_net, soft_tau): + """ soft update the target net with Polyak averaging """ + for target_param, param in zip(target_net.trainable_weights, net.trainable_weights): + target_param.assign( # copy weight value into target parameters + target_param * (1.0 - soft_tau) + param * soft_tau + ) + return target_net + + def update(self, batch_size, reward_scale=10., auto_entropy=True, target_entropy=-2, gamma=0.99, soft_tau=1e-2): + """ update all networks in SAC """ + state, action, reward, next_state, done = self.replay_buffer.sample(batch_size) + + reward = reward[:, np.newaxis] # expand dim + done = done[:, np.newaxis] + + reward = reward_scale * (reward - + np.mean(reward, axis=0)) / ( + np.std(reward, axis=0) + 1e-6) # normalize with batch mean and std + + # Training Q Function + new_next_action, next_log_prob, _, _, _ = self.evaluate(next_state) + target_q_min = tf.minimum( + self.target_soft_q_net1([next_state, new_next_action]), + self.target_soft_q_net2([next_state, new_next_action]) + ) - self.alpha * next_log_prob + target_q_value = reward + (1 - done) * gamma * target_q_min # if done==1, only reward + + with tf.GradientTape() as q1_tape: + predicted_q_value1 = self.soft_q_net1([state, action]) + q_value_loss1 = tf.reduce_mean(tf.losses.mean_squared_error(predicted_q_value1, target_q_value)) + q1_grad = q1_tape.gradient(q_value_loss1, self.soft_q_net1.trainable_weights) + self.soft_q_optimizer1.apply_gradients(zip(q1_grad, self.soft_q_net1.trainable_weights)) + + with tf.GradientTape() as q2_tape: + predicted_q_value2 = self.soft_q_net2([state, action]) + q_value_loss2 = tf.reduce_mean(tf.losses.mean_squared_error(predicted_q_value2, target_q_value)) + q2_grad = q2_tape.gradient(q_value_loss2, self.soft_q_net2.trainable_weights) + self.soft_q_optimizer2.apply_gradients(zip(q2_grad, self.soft_q_net2.trainable_weights)) + + # Training Policy Function + with tf.GradientTape() as p_tape: + new_action, log_prob, z, mean, log_std = self.evaluate(state) + """ implementation 1 """ + predicted_new_q_value = tf.minimum(self.soft_q_net1([state, new_action]), + self.soft_q_net2([state, new_action])) + """ implementation 2 """ + # predicted_new_q_value = self.soft_q_net1([state, new_action]) + policy_loss = tf.reduce_mean(self.alpha * log_prob - predicted_new_q_value) + p_grad = p_tape.gradient(policy_loss, self.policy_net.trainable_weights) + self.policy_optimizer.apply_gradients(zip(p_grad, self.policy_net.trainable_weights)) + + # Updating alpha w.r.t entropy + # alpha: trade-off between exploration (max entropy) and exploitation (max Q) + if auto_entropy is True: + with tf.GradientTape() as alpha_tape: + alpha_loss = -tf.reduce_mean((self.log_alpha * (log_prob + target_entropy))) + alpha_grad = alpha_tape.gradient(alpha_loss, [self.log_alpha]) + self.alpha_optimizer.apply_gradients(zip(alpha_grad, [self.log_alpha])) + self.alpha = tf.math.exp(self.log_alpha) + else: # fixed alpha + self.alpha = 1. + alpha_loss = 0 + + # Soft update the target value nets + self.target_soft_q_net1 = self.target_soft_update(self.soft_q_net1, self.target_soft_q_net1, soft_tau) + self.target_soft_q_net2 = self.target_soft_update(self.soft_q_net2, self.target_soft_q_net2, soft_tau) + + def save_ckpt(self, env_name): + """ save trained weights """ + save_model(self.soft_q_net1, 'model_q_net1', self.name, env_name) + save_model(self.soft_q_net2, 'model_q_net2', self.name, env_name) + save_model(self.target_soft_q_net1, 'model_target_q_net1', self.name, env_name) + save_model(self.target_soft_q_net2, 'model_target_q_net2', self.name, env_name) + save_model(self.policy_net, 'model_policy_net', self.name, env_name) + + def load_ckpt(self, env_name): + """ load trained weights """ + load_model(self.soft_q_net1, 'model_q_net1', self.name, env_name) + load_model(self.soft_q_net2, 'model_q_net2', self.name, env_name) + load_model(self.target_soft_q_net1, 'model_target_q_net1', self.name, env_name) + load_model(self.target_soft_q_net2, 'model_target_q_net2', self.name, env_name) + load_model(self.policy_net, 'model_policy_net', self.name, env_name) + + def learn(self, env, train_episodes=1000, test_episodes=1000, max_steps=150, batch_size=64, explore_steps=500, + update_itr=3, policy_target_update_interval=3, reward_scale=1., save_interval=20, + mode='train', AUTO_ENTROPY=True, render=False, plot_func=None): + """ + :param env: learning environment + :param train_episodes: total number of episodes for training + :param test_episodes: total number of episodes for testing + :param max_steps: maximum number of steps for one episode + :param batch_size: udpate batchsize + :param explore_steps: for random action sampling in the beginning of training + :param update_itr: repeated updates for single step + :param policy_target_update_interval: delayed update for the policy network and target networks + :param reward_scale: value range of reward + :param save_interval: timesteps for saving the weights and plotting the results + :param mode: 'train' or 'test' + :param AUTO_ENTROPY: automatically updating variable alpha for entropy + :param render: if true, visualize the environment + :param plot_func: additional function for interactive module + """ + + # training loop + if mode == 'train': + print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + frame_idx = 0 + rewards = [] + t0 = time.time() + for eps in range(train_episodes): + state = env.reset() + episode_reward = 0 + + for step in range(max_steps): + if frame_idx > explore_steps: + action = self.get_action(state) + else: + action = self.sample_action() + + next_state, reward, done, _ = env.step(action) + if render: env.render() + done = 1 if done == True else 0 + + self.replay_buffer.push(state, action, reward, next_state, done) + + state = next_state + episode_reward += reward + frame_idx += 1 + + if len(self.replay_buffer) > batch_size: + for i in range(update_itr): + self.update( + batch_size, reward_scale=reward_scale, auto_entropy=AUTO_ENTROPY, + target_entropy=-1. * self.action_dim + ) + + if done: + break + if eps % int(save_interval) == 0: + plot_save_log(rewards, algorithm_name=self.name, env_name=env.spec.id) + self.save_ckpt(env_name=env.spec.id) + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ + .format(eps, train_episodes, episode_reward, time.time() - t0)) + rewards.append(episode_reward) + if plot_func is not None: + plot_func(rewards) + plot_save_log(rewards, algorithm_name=self.name, env_name=env.spec.id) + self.save_ckpt(env_name=env.spec.id) + + elif mode == 'test': + frame_idx = 0 + rewards = [] + t0 = time.time() + self.load_ckpt(env_name=env.spec.id) + print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + # set test mode + self.soft_q_net1.eval() + self.soft_q_net2.eval() + self.target_soft_q_net1.eval() + self.target_soft_q_net2.eval() + self.policy_net.eval() + + for eps in range(test_episodes): + state = env.reset() + episode_reward = 0 + + for step in range(max_steps): + action = self.get_action_greedy(state) + next_state, reward, done, _ = env.step(action) + if render: env.render() + done = 1 if done == True else 0 + + state = next_state + episode_reward += reward + frame_idx += 1 + if done: + break + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ + .format(eps, test_episodes, episode_reward, time.time() - t0)) + rewards.append(episode_reward) + if plot_func: + plot_func(rewards) + + else: + print('unknow mode type') diff --git a/rlzoo/algorithms/td3/__init__.py b/rlzoo/algorithms/td3/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/td3/default.py b/rlzoo/algorithms/td3/default.py old mode 100644 new mode 100755 index 6240103..8994450 --- a/rlzoo/algorithms/td3/default.py +++ b/rlzoo/algorithms/td3/default.py @@ -1,371 +1,371 @@ -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -from rlzoo.common.utils import set_seed - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -replay_buffer_capacity: the size of buffer for storing explored samples -policy_target_update_interval: delayed interval for updating the target policy ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -batch_size: udpate batchsize -explore_steps: for random action sampling in the beginning of training -update_itr: repeated updates for single step -reward_scale: value range of reward -save_interval: timesteps for saving the weights and plotting the results -explore_noise_scale: range of action noise for exploration -eval_noise_scale: range of action noise for evaluation of action value -mode: 'train' or 'test' -render: if true, visualize the environment ------------------------------------------------ -""" - - -def classic_control(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - policy_target_update_interval=5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TD3'): - with tf.name_scope('Q_Net1'): - q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network - q_optimizer1 = tf.optimizers.Adam(q_lr) - q_optimizer2 = tf.optimizers.Adam(q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=500, - update_itr=3, - reward_scale=1., - explore_noise_scale=1.0, - eval_noise_scale=0.5, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def box2d(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - policy_target_update_interval=5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TD3'): - with tf.name_scope('Q_Net1'): - q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network - q_optimizer1 = tf.optimizers.Adam(q_lr) - q_optimizer2 = tf.optimizers.Adam(q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=500, - update_itr=3, - reward_scale=1., - explore_noise_scale=1.0, - eval_noise_scale=0.5, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def mujoco(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - policy_target_update_interval=5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TD3'): - with tf.name_scope('Q_Net1'): - q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network - q_optimizer1 = tf.optimizers.Adam(q_lr) - q_optimizer2 = tf.optimizers.Adam(q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=500, - update_itr=3, - reward_scale=1., - explore_noise_scale=1.0, - eval_noise_scale=0.5, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def robotics(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - policy_target_update_interval=5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TD3'): - with tf.name_scope('Q_Net1'): - q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network - q_optimizer1 = tf.optimizers.Adam(q_lr) - q_optimizer2 = tf.optimizers.Adam(q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=500, - update_itr=3, - reward_scale=1., - explore_noise_scale=1.0, - eval_noise_scale=0.5, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def dm_control(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - policy_target_update_interval=5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TD3'): - with tf.name_scope('Q_Net1'): - q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network - q_optimizer1 = tf.optimizers.Adam(q_lr) - q_optimizer2 = tf.optimizers.Adam(q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=500, - update_itr=3, - reward_scale=1., - explore_noise_scale=1.0, - eval_noise_scale=0.5, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params - - -def rlbench(env, default_seed=True): - if default_seed: - seed = 2 - set_seed(seed, env) # reproducible - - alg_params = dict( - replay_buffer_capacity=5e5, - policy_target_update_interval=5, - ) - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TD3'): - with tf.name_scope('Q_Net1'): - q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] - alg_params['net_list'] = net_list - if alg_params.get('optimizers_list') is None: - q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network - q_optimizer1 = tf.optimizers.Adam(q_lr) - q_optimizer2 = tf.optimizers.Adam(q_lr) - policy_optimizer = tf.optimizers.Adam(policy_lr) - optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict( - max_steps=150, - batch_size=64, - explore_steps=500, - update_itr=3, - reward_scale=1., - explore_noise_scale=1.0, - eval_noise_scale=0.5, - train_episodes=100, - test_episodes=10, - save_interval=10, - ) - - return alg_params, learn_params +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +from rlzoo.common.utils import set_seed + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +replay_buffer_capacity: the size of buffer for storing explored samples +policy_target_update_interval: delayed interval for updating the target policy +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +batch_size: udpate batchsize +explore_steps: for random action sampling in the beginning of training +update_itr: repeated updates for single step +reward_scale: value range of reward +save_interval: timesteps for saving the weights and plotting the results +explore_noise_scale: range of action noise for exploration +eval_noise_scale: range of action noise for evaluation of action value +mode: 'train' or 'test' +render: if true, visualize the environment +----------------------------------------------- +""" + + +def classic_control(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + policy_target_update_interval=5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TD3'): + with tf.name_scope('Q_Net1'): + q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network + q_optimizer1 = tf.optimizers.Adam(q_lr) + q_optimizer2 = tf.optimizers.Adam(q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=500, + update_itr=3, + reward_scale=1., + explore_noise_scale=1.0, + eval_noise_scale=0.5, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def box2d(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + policy_target_update_interval=5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TD3'): + with tf.name_scope('Q_Net1'): + q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network + q_optimizer1 = tf.optimizers.Adam(q_lr) + q_optimizer2 = tf.optimizers.Adam(q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=500, + update_itr=3, + reward_scale=1., + explore_noise_scale=1.0, + eval_noise_scale=0.5, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def mujoco(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + policy_target_update_interval=5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TD3'): + with tf.name_scope('Q_Net1'): + q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network + q_optimizer1 = tf.optimizers.Adam(q_lr) + q_optimizer2 = tf.optimizers.Adam(q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=500, + update_itr=3, + reward_scale=1., + explore_noise_scale=1.0, + eval_noise_scale=0.5, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def robotics(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + policy_target_update_interval=5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TD3'): + with tf.name_scope('Q_Net1'): + q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network + q_optimizer1 = tf.optimizers.Adam(q_lr) + q_optimizer2 = tf.optimizers.Adam(q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=500, + update_itr=3, + reward_scale=1., + explore_noise_scale=1.0, + eval_noise_scale=0.5, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def dm_control(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + policy_target_update_interval=5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TD3'): + with tf.name_scope('Q_Net1'): + q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network + q_optimizer1 = tf.optimizers.Adam(q_lr) + q_optimizer2 = tf.optimizers.Adam(q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=500, + update_itr=3, + reward_scale=1., + explore_noise_scale=1.0, + eval_noise_scale=0.5, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params + + +def rlbench(env, default_seed=True): + if default_seed: + seed = 2 + set_seed(seed, env) # reproducible + + alg_params = dict( + replay_buffer_capacity=5e5, + policy_target_update_interval=5, + ) + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TD3'): + with tf.name_scope('Q_Net1'): + q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] + alg_params['net_list'] = net_list + if alg_params.get('optimizers_list') is None: + q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network + q_optimizer1 = tf.optimizers.Adam(q_lr) + q_optimizer2 = tf.optimizers.Adam(q_lr) + policy_optimizer = tf.optimizers.Adam(policy_lr) + optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict( + max_steps=150, + batch_size=64, + explore_steps=500, + update_itr=3, + reward_scale=1., + explore_noise_scale=1.0, + eval_noise_scale=0.5, + train_episodes=100, + test_episodes=10, + save_interval=10, + ) + + return alg_params, learn_params diff --git a/rlzoo/algorithms/td3/run_td3.py b/rlzoo/algorithms/td3/run_td3.py old mode 100644 new mode 100755 index 3dbd84c..c38bf8f --- a/rlzoo/algorithms/td3/run_td3.py +++ b/rlzoo/algorithms/td3/run_td3.py @@ -1,83 +1,83 @@ -from rlzoo.algorithms.td3.td3 import TD3 -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -import gym - -""" load environment """ -env = gym.make('Pendulum-v0').unwrapped -# env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run -action_shape = env.action_space.shape -state_shape = env.observation_space.shape -# reproducible -seed = 2 -np.random.seed(seed) -tf.random.set_seed(seed) -env.seed(seed) - -""" build networks for the algorithm """ -num_hidden_layer = 2 # number of hidden layers for the networks -hidden_dim = 64 # dimension of hidden layers for the networks -with tf.name_scope('TD3'): - with tf.name_scope('Q_Net1'): - q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Q_Net2'): - q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net1'): - target_q_net1 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Q_Net2'): - target_q_net2 = QNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Policy'): - policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) - with tf.name_scope('Target_Policy'): - target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, - hidden_dim_list=num_hidden_layer * [hidden_dim]) -net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] - -""" choose optimizers """ -q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network -q_optimizer1 = tf.optimizers.Adam(q_lr) -q_optimizer2 = tf.optimizers.Adam(q_lr) -policy_optimizer = tf.optimizers.Adam(policy_lr) -optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] - -model = TD3(net_list, optimizers_list) -""" -full list of arguments for the algorithm ----------------------------------------- -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -state_dim: dimension of state for the environment -action_dim: dimension of action for the environment -replay_buffer_capacity: the size of buffer for storing explored samples -policy_target_update_interval: delayed interval for updating the target policy -action_range: value of each action in [-action_range, action_range] -""" - -model.learn(env, train_episodes=100, max_steps=150, batch_size=64, explore_steps=500, update_itr=3, - reward_scale=1., save_interval=10, explore_noise_scale=1.0, eval_noise_scale=0.5, mode='train', - render=False) -""" -full list of parameters for training ---------------------------------------- -env: learning environment -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -batch_size: udpate batchsize -explore_steps: for random action sampling in the beginning of training -update_itr: repeated updates for single step -reward_scale: value range of reward -save_interval: timesteps for saving the weights and plotting the results -explore_noise_scale: range of action noise for exploration -eval_noise_scale: range of action noise for evaluation of action value -mode: 'train' or 'test' -render: if true, visualize the environment - -""" -# test -model.learn(env, test_episodes=10, max_steps=150, mode='test', render=True) +from rlzoo.algorithms.td3.td3 import TD3 +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +import gym + +""" load environment """ +env = gym.make('Pendulum-v0').unwrapped +# env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run +action_shape = env.action_space.shape +state_shape = env.observation_space.shape +# reproducible +seed = 2 +np.random.seed(seed) +tf.random.set_seed(seed) +env.seed(seed) + +""" build networks for the algorithm """ +num_hidden_layer = 2 # number of hidden layers for the networks +hidden_dim = 64 # dimension of hidden layers for the networks +with tf.name_scope('TD3'): + with tf.name_scope('Q_Net1'): + q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Q_Net2'): + q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net1'): + target_q_net1 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Q_Net2'): + target_q_net2 = QNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Policy'): + policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) + with tf.name_scope('Target_Policy'): + target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, + hidden_dim_list=num_hidden_layer * [hidden_dim]) +net_list = [q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net] + +""" choose optimizers """ +q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network +q_optimizer1 = tf.optimizers.Adam(q_lr) +q_optimizer2 = tf.optimizers.Adam(q_lr) +policy_optimizer = tf.optimizers.Adam(policy_lr) +optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] + +model = TD3(net_list, optimizers_list) +""" +full list of arguments for the algorithm +---------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +state_dim: dimension of state for the environment +action_dim: dimension of action for the environment +replay_buffer_capacity: the size of buffer for storing explored samples +policy_target_update_interval: delayed interval for updating the target policy +action_range: value of each action in [-action_range, action_range] +""" + +model.learn(env, train_episodes=100, max_steps=150, batch_size=64, explore_steps=500, update_itr=3, + reward_scale=1., save_interval=10, explore_noise_scale=1.0, eval_noise_scale=0.5, mode='train', + render=False) +""" +full list of parameters for training +--------------------------------------- +env: learning environment +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +batch_size: udpate batchsize +explore_steps: for random action sampling in the beginning of training +update_itr: repeated updates for single step +reward_scale: value range of reward +save_interval: timesteps for saving the weights and plotting the results +explore_noise_scale: range of action noise for exploration +eval_noise_scale: range of action noise for evaluation of action value +mode: 'train' or 'test' +render: if true, visualize the environment + +""" +# test +model.learn(env, test_episodes=10, max_steps=150, mode='test', render=True) diff --git a/rlzoo/algorithms/td3/td3.py b/rlzoo/algorithms/td3/td3.py old mode 100644 new mode 100755 index 7c3deb9..637a6ac --- a/rlzoo/algorithms/td3/td3.py +++ b/rlzoo/algorithms/td3/td3.py @@ -1,314 +1,314 @@ -""" -Twin Delayed DDPG (TD3) ------------------------- -DDPG suffers from problems like overestimate of Q-values and sensitivity to hyper-parameters. -Twin Delayed DDPG (TD3) is a variant of DDPG with several tricks: -* Trick One: Clipped Double-Q Learning. TD3 learns two Q-functions instead of one (hence “twin”), -and uses the smaller of the two Q-values to form the targets in the Bellman error loss functions. - -* Trick Two: “Delayed” Policy Updates. TD3 updates the policy (and target networks) less frequently -than the Q-function. - -* Trick Three: Target Policy Smoothing. TD3 adds noise to the target action, to make it harder for -the policy to exploit Q-function errors by smoothing out Q along changes in action. - -The implementation of TD3 includes 6 networks: 2 Q-net, 2 target Q-net, 1 policy net, 1 target policy net -Actor policy in TD3 is deterministic, with Gaussian exploration noise. - -Reference ---------- -original paper: https://arxiv.org/pdf/1802.09477.pdf - - -Environment ---- -Openai Gym Pendulum-v0, continuous action space -https://gym.openai.com/envs/Pendulum-v0/ - -Prerequisites ---- -tensorflow >=2.0.0a0 -tensorflow-probability 0.6.0 -tensorlayer >=2.0.0 - -&& -pip install box2d box2d-kengz --user - - -""" -import time - -import tensorflow_probability as tfp -import tensorlayer as tl -from rlzoo.common.utils import * -from rlzoo.common.buffer import * -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * - -tfd = tfp.distributions -Normal = tfd.Normal - -tl.logging.set_verbosity(tl.logging.DEBUG) - - -############################### TD3 #################################### - - -class TD3(): - """ twin-delayed ddpg """ - - def __init__(self, net_list, optimizers_list, replay_buffer_capacity=5e5, policy_target_update_interval=5): - self.name = 'TD3' - self.replay_buffer = ReplayBuffer(replay_buffer_capacity) - - # get all networks - [self.q_net1, self.q_net2, self.target_q_net1, self.target_q_net2, self.policy_net, - self.target_policy_net] = net_list - - assert isinstance(self.q_net1, QNetwork) - assert isinstance(self.q_net2, QNetwork) - assert isinstance(self.target_q_net1, QNetwork) - assert isinstance(self.target_q_net2, QNetwork) - assert isinstance(self.policy_net, DeterministicPolicyNetwork) - assert isinstance(self.target_policy_net, DeterministicPolicyNetwork) - assert isinstance(self.policy_net.action_space, gym.spaces.Box) - - print('Q Network (1,2): ', self.q_net1) - print('Policy Network: ', self.policy_net) - - # initialize weights of target networks - self.target_q_net1 = self.target_ini(self.q_net1, self.target_q_net1) - self.target_q_net2 = self.target_ini(self.q_net2, self.target_q_net2) - self.target_policy_net = self.target_ini(self.policy_net, self.target_policy_net) - - self.update_cnt = 0 - self.policy_target_update_interval = policy_target_update_interval - - [self.q_optimizer1, self.q_optimizer2, self.policy_optimizer] = optimizers_list - - def evaluate(self, state, eval_noise_scale, target=False): - """ - generate action with state for calculating gradients; - - :param eval_noise_scale: as the trick of target policy smoothing, for generating noisy actions. - """ - if target: - action = self.target_policy_net(state) - else: - action = self.policy_net(state) - # add noise - normal = Normal(0, 1) - eval_noise_clip = 2 * eval_noise_scale - noise = normal.sample(action.shape) * eval_noise_scale - noise = tf.clip_by_value(noise, -eval_noise_clip, eval_noise_clip) - action = action + noise - - return action - - def get_action(self, state, explore_noise_scale): - """ generate action with state for interaction with envronment """ - action = self.policy_net(np.array([state])) - action = action.numpy()[0] - - # add noise - normal = Normal(0, 1) - noise = normal.sample(action.shape) * explore_noise_scale - action = action + noise - - return action.numpy() - - def get_action_greedy(self, state): - """ generate action with state for interaction with envronment """ - return self.policy_net(np.array([state])).numpy()[0] - - def sample_action(self): - """ generate random actions for exploration """ - return self.policy_net.random_sample() - - def target_ini(self, net, target_net): - """ hard-copy update for initializing target networks """ - for target_param, param in zip(target_net.trainable_weights, net.trainable_weights): - target_param.assign(param) - return target_net - - def target_soft_update(self, net, target_net, soft_tau): - """ soft update the target net with Polyak averaging """ - for target_param, param in zip(target_net.trainable_weights, net.trainable_weights): - target_param.assign( # copy weight value into target parameters - target_param * (1.0 - soft_tau) + param * soft_tau - ) - return target_net - - def update(self, batch_size, eval_noise_scale, reward_scale=1., gamma=0.9, soft_tau=1e-2): - """ update all networks in TD3 """ - self.update_cnt += 1 - state, action, reward, next_state, done = self.replay_buffer.sample(batch_size) - - reward = reward[:, np.newaxis] # expand dim - done = done[:, np.newaxis] - - new_next_action = self.evaluate( - next_state, eval_noise_scale=eval_noise_scale, target=True - ) # clipped normal noise - reward = reward_scale * (reward - - np.mean(reward, axis=0)) / (np.std(reward, - axis=0) + 1e-6) # normalize with batch mean and std; plus a small number to prevent numerical problem - - # Training Q Function - target_q_min = tf.minimum(self.target_q_net1([next_state, new_next_action]), - self.target_q_net2([next_state, new_next_action])) - - target_q_value = reward + (1 - done) * gamma * target_q_min # if done==1, only reward - - with tf.GradientTape() as q1_tape: - predicted_q_value1 = self.q_net1([state, action]) - q_value_loss1 = tf.reduce_mean(tf.square(predicted_q_value1 - target_q_value)) - q1_grad = q1_tape.gradient(q_value_loss1, self.q_net1.trainable_weights) - self.q_optimizer1.apply_gradients(zip(q1_grad, self.q_net1.trainable_weights)) - - with tf.GradientTape() as q2_tape: - predicted_q_value2 = self.q_net2([state, action]) - q_value_loss2 = tf.reduce_mean(tf.square(predicted_q_value2 - target_q_value)) - q2_grad = q2_tape.gradient(q_value_loss2, self.q_net2.trainable_weights) - self.q_optimizer2.apply_gradients(zip(q2_grad, self.q_net2.trainable_weights)) - - # Training Policy Function - if self.update_cnt % self.policy_target_update_interval == 0: - with tf.GradientTape() as p_tape: - new_action = self.evaluate( - state, eval_noise_scale=0.0, target=False - ) # no noise, deterministic policy gradients - # """ implementation 1 """ - # predicted_new_q_value = tf.minimum(self.q_net1([state, new_action]),self.q_net2([state, new_action])) - """ implementation 2 """ - predicted_new_q_value = self.q_net1([state, new_action]) - policy_loss = -tf.reduce_mean(predicted_new_q_value) - p_grad = p_tape.gradient(policy_loss, self.policy_net.trainable_weights) - self.policy_optimizer.apply_gradients(zip(p_grad, self.policy_net.trainable_weights)) - - # Soft update the target nets - self.target_q_net1 = self.target_soft_update(self.q_net1, self.target_q_net1, soft_tau) - self.target_q_net2 = self.target_soft_update(self.q_net2, self.target_q_net2, soft_tau) - self.target_policy_net = self.target_soft_update(self.policy_net, self.target_policy_net, soft_tau) - - def save_ckpt(self, env_name): # save trained weights - save_model(self.q_net1, 'model_q_net1', self.name, env_name) - save_model(self.q_net2, 'model_q_net2', self.name, env_name) - save_model(self.target_q_net1, 'model_target_q_net1', self.name, env_name) - save_model(self.target_q_net2, 'model_target_q_net2', self.name, env_name) - save_model(self.policy_net, 'model_policy_net', self.name, env_name) - save_model(self.target_policy_net, 'model_target_policy_net', self.name, env_name) - - def load_ckpt(self, env_name): # load trained weights - load_model(self.q_net1, 'model_q_net1', self.name, env_name) - load_model(self.q_net2, 'model_q_net2', self.name, env_name) - load_model(self.target_q_net1, 'model_target_q_net1', self.name, env_name) - load_model(self.target_q_net2, 'model_target_q_net2', self.name, env_name) - load_model(self.policy_net, 'model_policy_net', self.name, env_name) - load_model(self.target_policy_net, 'model_target_policy_net', self.name, env_name) - - def learn(self, env, train_episodes=1000, test_episodes=1000, max_steps=150, batch_size=64, explore_steps=500, - update_itr=3, - reward_scale=1., save_interval=10, explore_noise_scale=1.0, eval_noise_scale=0.5, mode='train', - render=False, plot_func=None): - """ - :param env: learning environment - :param train_episodes: total number of episodes for training - :param test_episodes: total number of episodes for testing - :param max_steps: maximum number of steps for one episode - :param batch_size: udpate batchsize - :param explore_steps: for random action sampling in the beginning of training - :param update_itr: repeated updates for single step - :param reward_scale: value range of reward - :param save_interval: timesteps for saving the weights and plotting the results - :param explore_noise_scale: range of action noise for exploration - :param eval_noise_scale: range of action noise for evaluation of action value - :param mode: 'train' or 'test' - :param render: if true, visualize the environment - :param plot_func: additional function for interactive module - """ - - # training loop - if mode == 'train': - print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - frame_idx = 0 - rewards = [] - t0 = time.time() - for eps in range(train_episodes): - state = env.reset() - episode_reward = 0 - - for step in range(max_steps): - if frame_idx > explore_steps: - action = self.get_action(state, explore_noise_scale=explore_noise_scale) - else: - action = self.sample_action() - - next_state, reward, done, _ = env.step(action) - if render: env.render() - done = 1 if done == True else 0 - - self.replay_buffer.push(state, action, reward, next_state, done) - - state = next_state - episode_reward += reward - frame_idx += 1 - - if len(self.replay_buffer) > batch_size: - for i in range(update_itr): - self.update(batch_size, eval_noise_scale=eval_noise_scale, reward_scale=reward_scale) - - if done: - break - - if eps % int(save_interval) == 0: - plot_save_log(rewards, algorithm_name=self.name, env_name=env.spec.id) - self.save_ckpt(env_name=env.spec.id) - - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ - .format(eps, train_episodes, episode_reward, time.time() - t0)) - rewards.append(episode_reward) - if plot_func is not None: - plot_func(rewards) - plot_save_log(rewards, algorithm_name=self.name, env_name=env.spec.id) - self.save_ckpt(env_name=env.spec.id) - - elif mode == 'test': - frame_idx = 0 - rewards = [] - t0 = time.time() - - self.load_ckpt(env_name=env.spec.id) - print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) - # set test mode - self.q_net1.eval() - self.q_net2.eval() - self.target_q_net1.eval() - self.target_q_net2.eval() - self.policy_net.eval() - self.target_policy_net.eval() - - for eps in range(test_episodes): - state = env.reset() - episode_reward = 0 - - for step in range(max_steps): - action = self.get_action_greedy(state) - next_state, reward, done, _ = env.step(action) - if render: env.render() - done = 1 if done == True else 0 - - state = next_state - episode_reward += reward - frame_idx += 1 - - if done: - break - print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ - .format(eps, test_episodes, episode_reward, time.time() - t0)) - rewards.append(episode_reward) - if plot_func is not None: - plot_func(rewards) - - else: - print('unknow mode type, activate test mode as default') +""" +Twin Delayed DDPG (TD3) +------------------------ +DDPG suffers from problems like overestimate of Q-values and sensitivity to hyper-parameters. +Twin Delayed DDPG (TD3) is a variant of DDPG with several tricks: +* Trick One: Clipped Double-Q Learning. TD3 learns two Q-functions instead of one (hence “twin”), +and uses the smaller of the two Q-values to form the targets in the Bellman error loss functions. + +* Trick Two: “Delayed” Policy Updates. TD3 updates the policy (and target networks) less frequently +than the Q-function. + +* Trick Three: Target Policy Smoothing. TD3 adds noise to the target action, to make it harder for +the policy to exploit Q-function errors by smoothing out Q along changes in action. + +The implementation of TD3 includes 6 networks: 2 Q-net, 2 target Q-net, 1 policy net, 1 target policy net +Actor policy in TD3 is deterministic, with Gaussian exploration noise. + +Reference +--------- +original paper: https://arxiv.org/pdf/1802.09477.pdf + + +Environment +--- +Openai Gym Pendulum-v0, continuous action space +https://gym.openai.com/envs/Pendulum-v0/ + +Prerequisites +--- +tensorflow >=2.0.0a0 +tensorflow-probability 0.6.0 +tensorlayer >=2.0.0 + +&& +pip install box2d box2d-kengz --user + + +""" +import time + +import tensorflow_probability as tfp +import tensorlayer as tl +from rlzoo.common.utils import * +from rlzoo.common.buffer import * +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * + +tfd = tfp.distributions +Normal = tfd.Normal + +tl.logging.set_verbosity(tl.logging.DEBUG) + + +############################### TD3 #################################### + + +class TD3(): + """ twin-delayed ddpg """ + + def __init__(self, net_list, optimizers_list, replay_buffer_capacity=5e5, policy_target_update_interval=5): + self.name = 'TD3' + self.replay_buffer = ReplayBuffer(replay_buffer_capacity) + + # get all networks + [self.q_net1, self.q_net2, self.target_q_net1, self.target_q_net2, self.policy_net, + self.target_policy_net] = net_list + + assert isinstance(self.q_net1, QNetwork) + assert isinstance(self.q_net2, QNetwork) + assert isinstance(self.target_q_net1, QNetwork) + assert isinstance(self.target_q_net2, QNetwork) + assert isinstance(self.policy_net, DeterministicPolicyNetwork) + assert isinstance(self.target_policy_net, DeterministicPolicyNetwork) + assert isinstance(self.policy_net.action_space, gym.spaces.Box) + + print('Q Network (1,2): ', self.q_net1) + print('Policy Network: ', self.policy_net) + + # initialize weights of target networks + self.target_q_net1 = self.target_ini(self.q_net1, self.target_q_net1) + self.target_q_net2 = self.target_ini(self.q_net2, self.target_q_net2) + self.target_policy_net = self.target_ini(self.policy_net, self.target_policy_net) + + self.update_cnt = 0 + self.policy_target_update_interval = policy_target_update_interval + + [self.q_optimizer1, self.q_optimizer2, self.policy_optimizer] = optimizers_list + + def evaluate(self, state, eval_noise_scale, target=False): + """ + generate action with state for calculating gradients; + + :param eval_noise_scale: as the trick of target policy smoothing, for generating noisy actions. + """ + if target: + action = self.target_policy_net(state) + else: + action = self.policy_net(state) + # add noise + normal = Normal(0, 1) + eval_noise_clip = 2 * eval_noise_scale + noise = normal.sample(action.shape) * eval_noise_scale + noise = tf.clip_by_value(noise, -eval_noise_clip, eval_noise_clip) + action = action + noise + + return action + + def get_action(self, state, explore_noise_scale): + """ generate action with state for interaction with envronment """ + action = self.policy_net(np.array([state])) + action = action.numpy()[0] + + # add noise + normal = Normal(0, 1) + noise = normal.sample(action.shape) * explore_noise_scale + action = action + noise + + return action.numpy() + + def get_action_greedy(self, state): + """ generate action with state for interaction with envronment """ + return self.policy_net(np.array([state])).numpy()[0] + + def sample_action(self): + """ generate random actions for exploration """ + return self.policy_net.random_sample() + + def target_ini(self, net, target_net): + """ hard-copy update for initializing target networks """ + for target_param, param in zip(target_net.trainable_weights, net.trainable_weights): + target_param.assign(param) + return target_net + + def target_soft_update(self, net, target_net, soft_tau): + """ soft update the target net with Polyak averaging """ + for target_param, param in zip(target_net.trainable_weights, net.trainable_weights): + target_param.assign( # copy weight value into target parameters + target_param * (1.0 - soft_tau) + param * soft_tau + ) + return target_net + + def update(self, batch_size, eval_noise_scale, reward_scale=1., gamma=0.9, soft_tau=1e-2): + """ update all networks in TD3 """ + self.update_cnt += 1 + state, action, reward, next_state, done = self.replay_buffer.sample(batch_size) + + reward = reward[:, np.newaxis] # expand dim + done = done[:, np.newaxis] + + new_next_action = self.evaluate( + next_state, eval_noise_scale=eval_noise_scale, target=True + ) # clipped normal noise + reward = reward_scale * (reward - + np.mean(reward, axis=0)) / (np.std(reward, + axis=0) + 1e-6) # normalize with batch mean and std; plus a small number to prevent numerical problem + + # Training Q Function + target_q_min = tf.minimum(self.target_q_net1([next_state, new_next_action]), + self.target_q_net2([next_state, new_next_action])) + + target_q_value = reward + (1 - done) * gamma * target_q_min # if done==1, only reward + + with tf.GradientTape() as q1_tape: + predicted_q_value1 = self.q_net1([state, action]) + q_value_loss1 = tf.reduce_mean(tf.square(predicted_q_value1 - target_q_value)) + q1_grad = q1_tape.gradient(q_value_loss1, self.q_net1.trainable_weights) + self.q_optimizer1.apply_gradients(zip(q1_grad, self.q_net1.trainable_weights)) + + with tf.GradientTape() as q2_tape: + predicted_q_value2 = self.q_net2([state, action]) + q_value_loss2 = tf.reduce_mean(tf.square(predicted_q_value2 - target_q_value)) + q2_grad = q2_tape.gradient(q_value_loss2, self.q_net2.trainable_weights) + self.q_optimizer2.apply_gradients(zip(q2_grad, self.q_net2.trainable_weights)) + + # Training Policy Function + if self.update_cnt % self.policy_target_update_interval == 0: + with tf.GradientTape() as p_tape: + new_action = self.evaluate( + state, eval_noise_scale=0.0, target=False + ) # no noise, deterministic policy gradients + # """ implementation 1 """ + # predicted_new_q_value = tf.minimum(self.q_net1([state, new_action]),self.q_net2([state, new_action])) + """ implementation 2 """ + predicted_new_q_value = self.q_net1([state, new_action]) + policy_loss = -tf.reduce_mean(predicted_new_q_value) + p_grad = p_tape.gradient(policy_loss, self.policy_net.trainable_weights) + self.policy_optimizer.apply_gradients(zip(p_grad, self.policy_net.trainable_weights)) + + # Soft update the target nets + self.target_q_net1 = self.target_soft_update(self.q_net1, self.target_q_net1, soft_tau) + self.target_q_net2 = self.target_soft_update(self.q_net2, self.target_q_net2, soft_tau) + self.target_policy_net = self.target_soft_update(self.policy_net, self.target_policy_net, soft_tau) + + def save_ckpt(self, env_name): # save trained weights + save_model(self.q_net1, 'model_q_net1', self.name, env_name) + save_model(self.q_net2, 'model_q_net2', self.name, env_name) + save_model(self.target_q_net1, 'model_target_q_net1', self.name, env_name) + save_model(self.target_q_net2, 'model_target_q_net2', self.name, env_name) + save_model(self.policy_net, 'model_policy_net', self.name, env_name) + save_model(self.target_policy_net, 'model_target_policy_net', self.name, env_name) + + def load_ckpt(self, env_name): # load trained weights + load_model(self.q_net1, 'model_q_net1', self.name, env_name) + load_model(self.q_net2, 'model_q_net2', self.name, env_name) + load_model(self.target_q_net1, 'model_target_q_net1', self.name, env_name) + load_model(self.target_q_net2, 'model_target_q_net2', self.name, env_name) + load_model(self.policy_net, 'model_policy_net', self.name, env_name) + load_model(self.target_policy_net, 'model_target_policy_net', self.name, env_name) + + def learn(self, env, train_episodes=1000, test_episodes=1000, max_steps=150, batch_size=64, explore_steps=500, + update_itr=3, + reward_scale=1., save_interval=10, explore_noise_scale=1.0, eval_noise_scale=0.5, mode='train', + render=False, plot_func=None): + """ + :param env: learning environment + :param train_episodes: total number of episodes for training + :param test_episodes: total number of episodes for testing + :param max_steps: maximum number of steps for one episode + :param batch_size: udpate batchsize + :param explore_steps: for random action sampling in the beginning of training + :param update_itr: repeated updates for single step + :param reward_scale: value range of reward + :param save_interval: timesteps for saving the weights and plotting the results + :param explore_noise_scale: range of action noise for exploration + :param eval_noise_scale: range of action noise for evaluation of action value + :param mode: 'train' or 'test' + :param render: if true, visualize the environment + :param plot_func: additional function for interactive module + """ + + # training loop + if mode == 'train': + print('Training... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + frame_idx = 0 + rewards = [] + t0 = time.time() + for eps in range(train_episodes): + state = env.reset() + episode_reward = 0 + + for step in range(max_steps): + if frame_idx > explore_steps: + action = self.get_action(state, explore_noise_scale=explore_noise_scale) + else: + action = self.sample_action() + + next_state, reward, done, _ = env.step(action) + if render: env.render() + done = 1 if done == True else 0 + + self.replay_buffer.push(state, action, reward, next_state, done) + + state = next_state + episode_reward += reward + frame_idx += 1 + + if len(self.replay_buffer) > batch_size: + for i in range(update_itr): + self.update(batch_size, eval_noise_scale=eval_noise_scale, reward_scale=reward_scale) + + if done: + break + + if eps % int(save_interval) == 0: + plot_save_log(rewards, algorithm_name=self.name, env_name=env.spec.id) + self.save_ckpt(env_name=env.spec.id) + + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ + .format(eps, train_episodes, episode_reward, time.time() - t0)) + rewards.append(episode_reward) + if plot_func is not None: + plot_func(rewards) + plot_save_log(rewards, algorithm_name=self.name, env_name=env.spec.id) + self.save_ckpt(env_name=env.spec.id) + + elif mode == 'test': + frame_idx = 0 + rewards = [] + t0 = time.time() + + self.load_ckpt(env_name=env.spec.id) + print('Testing... | Algorithm: {} | Environment: {}'.format(self.name, env.spec.id)) + # set test mode + self.q_net1.eval() + self.q_net2.eval() + self.target_q_net1.eval() + self.target_q_net2.eval() + self.policy_net.eval() + self.target_policy_net.eval() + + for eps in range(test_episodes): + state = env.reset() + episode_reward = 0 + + for step in range(max_steps): + action = self.get_action_greedy(state) + next_state, reward, done, _ = env.step(action) + if render: env.render() + done = 1 if done == True else 0 + + state = next_state + episode_reward += reward + frame_idx += 1 + + if done: + break + print('Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}' \ + .format(eps, test_episodes, episode_reward, time.time() - t0)) + rewards.append(episode_reward) + if plot_func is not None: + plot_func(rewards) + + else: + print('unknow mode type, activate test mode as default') diff --git a/rlzoo/algorithms/trpo/__init__.py b/rlzoo/algorithms/trpo/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/algorithms/trpo/default.py b/rlzoo/algorithms/trpo/default.py old mode 100644 new mode 100755 index 6f71615..a1365bb --- a/rlzoo/algorithms/trpo/default.py +++ b/rlzoo/algorithms/trpo/default.py @@ -1,330 +1,330 @@ -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -from rlzoo.common.utils import set_seed - -""" -full list of algorithm parameters (alg_params) ------------------------------------------------ -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -damping_coeff: Artifact for numerical stability -cg_iters: Number of iterations of conjugate gradient to perform -delta: KL-divergence limit for TRPO update. ------------------------------------------------ - -full list of learning parameters (learn_params) ------------------------------------------------ -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving -gamma: reward discount factor -mode: train or test -render: render each step -batch_size: update batch size -backtrack_iters: Maximum number of steps allowed in the backtracking line search -backtrack_coeff: How far back to step during backtracking line search -train_critic_iters: critic update iteration steps ------------------------------------------------ -""" - - -def atari(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - damping_coeff=0.1, - cg_iters=10, - delta=0.01 - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TRPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) - - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - critic_lr = 1e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=2000, - test_episodes=100, - max_steps=200, - save_interval=100, - gamma=0.9, - batch_size=256, - backtrack_iters=10, - backtrack_coeff=0.8, - train_critic_iters=80) - - return alg_params, learn_params - - -def classic_control(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - damping_coeff=0.1, - cg_iters=10, - delta=0.01 - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TRPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) - - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - critic_lr = 1e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=2000, - test_episodes=100, - max_steps=200, - save_interval=100, - gamma=0.9, - batch_size=256, - backtrack_iters=10, - backtrack_coeff=0.8, - train_critic_iters=80) - - return alg_params, learn_params - - -def box2d(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - damping_coeff=0.1, - cg_iters=10, - delta=0.01 - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TRPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) - - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - critic_lr = 1e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=2000, - test_episodes=100, - max_steps=200, - save_interval=100, - gamma=0.9, - batch_size=256, - backtrack_iters=10, - backtrack_coeff=0.8, - train_critic_iters=80) - - return alg_params, learn_params - - -def mujoco(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - damping_coeff=0.1, - cg_iters=10, - delta=0.01 - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TRPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) - - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - critic_lr = 1e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=2000, - test_episodes=100, - max_steps=200, - save_interval=100, - gamma=0.9, - batch_size=256, - backtrack_iters=10, - backtrack_coeff=0.8, - train_critic_iters=80) - - return alg_params, learn_params - - -def robotics(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - damping_coeff=0.1, - cg_iters=10, - delta=0.01 - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TRPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) - - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - critic_lr = 1e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=2000, - test_episodes=100, - max_steps=200, - save_interval=100, - gamma=0.9, - batch_size=256, - backtrack_iters=10, - backtrack_coeff=0.8, - train_critic_iters=80) - - return alg_params, learn_params - - -def dm_control(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - damping_coeff=0.1, - cg_iters=10, - delta=0.01 - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TRPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) - - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - critic_lr = 1e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=2000, - test_episodes=100, - max_steps=200, - save_interval=100, - gamma=0.9, - batch_size=256, - backtrack_iters=10, - backtrack_coeff=0.8, - train_critic_iters=80) - - return alg_params, learn_params - - -def rlbench(env, default_seed=True): - if default_seed: - # reproducible - seed = 2 - set_seed(seed, env) - - alg_params = dict( - damping_coeff=0.1, - cg_iters=10, - delta=0.01 - ) - - if alg_params.get('net_list') is None: - num_hidden_layer = 2 # number of hidden layers for the networks - hidden_dim = 64 # dimension of hidden layers for the networks - with tf.name_scope('TRPO'): - with tf.name_scope('V_Net'): - v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) - with tf.name_scope('Policy'): - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, - [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) - - net_list = [v_net, policy_net] - alg_params['net_list'] = net_list - - if alg_params.get('optimizers_list') is None: - critic_lr = 1e-3 - optimizers_list = [tf.optimizers.Adam(critic_lr)] - alg_params['optimizers_list'] = optimizers_list - - learn_params = dict(train_episodes=2000, - test_episodes=100, - max_steps=200, - save_interval=100, - gamma=0.9, - batch_size=256, - backtrack_iters=10, - backtrack_coeff=0.8, - train_critic_iters=80) - - return alg_params, learn_params +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +from rlzoo.common.utils import set_seed + +""" +full list of algorithm parameters (alg_params) +----------------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +damping_coeff: Artifact for numerical stability +cg_iters: Number of iterations of conjugate gradient to perform +delta: KL-divergence limit for TRPO update. +----------------------------------------------- + +full list of learning parameters (learn_params) +----------------------------------------------- +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving +gamma: reward discount factor +mode: train or test +render: render each step +batch_size: update batch size +backtrack_iters: Maximum number of steps allowed in the backtracking line search +backtrack_coeff: How far back to step during backtracking line search +train_critic_iters: critic update iteration steps +----------------------------------------------- +""" + + +def atari(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + damping_coeff=0.1, + cg_iters=10, + delta=0.01 + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TRPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) + + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + critic_lr = 1e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=2000, + test_episodes=100, + max_steps=200, + save_interval=100, + gamma=0.9, + batch_size=256, + backtrack_iters=10, + backtrack_coeff=0.8, + train_critic_iters=80) + + return alg_params, learn_params + + +def classic_control(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + damping_coeff=0.1, + cg_iters=10, + delta=0.01 + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TRPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) + + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + critic_lr = 1e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=2000, + test_episodes=100, + max_steps=200, + save_interval=100, + gamma=0.9, + batch_size=256, + backtrack_iters=10, + backtrack_coeff=0.8, + train_critic_iters=80) + + return alg_params, learn_params + + +def box2d(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + damping_coeff=0.1, + cg_iters=10, + delta=0.01 + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TRPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) + + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + critic_lr = 1e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=2000, + test_episodes=100, + max_steps=200, + save_interval=100, + gamma=0.9, + batch_size=256, + backtrack_iters=10, + backtrack_coeff=0.8, + train_critic_iters=80) + + return alg_params, learn_params + + +def mujoco(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + damping_coeff=0.1, + cg_iters=10, + delta=0.01 + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TRPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) + + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + critic_lr = 1e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=2000, + test_episodes=100, + max_steps=200, + save_interval=100, + gamma=0.9, + batch_size=256, + backtrack_iters=10, + backtrack_coeff=0.8, + train_critic_iters=80) + + return alg_params, learn_params + + +def robotics(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + damping_coeff=0.1, + cg_iters=10, + delta=0.01 + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TRPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) + + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + critic_lr = 1e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=2000, + test_episodes=100, + max_steps=200, + save_interval=100, + gamma=0.9, + batch_size=256, + backtrack_iters=10, + backtrack_coeff=0.8, + train_critic_iters=80) + + return alg_params, learn_params + + +def dm_control(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + damping_coeff=0.1, + cg_iters=10, + delta=0.01 + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TRPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) + + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + critic_lr = 1e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=2000, + test_episodes=100, + max_steps=200, + save_interval=100, + gamma=0.9, + batch_size=256, + backtrack_iters=10, + backtrack_coeff=0.8, + train_critic_iters=80) + + return alg_params, learn_params + + +def rlbench(env, default_seed=True): + if default_seed: + # reproducible + seed = 2 + set_seed(seed, env) + + alg_params = dict( + damping_coeff=0.1, + cg_iters=10, + delta=0.01 + ) + + if alg_params.get('net_list') is None: + num_hidden_layer = 2 # number of hidden layers for the networks + hidden_dim = 64 # dimension of hidden layers for the networks + with tf.name_scope('TRPO'): + with tf.name_scope('V_Net'): + v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) + with tf.name_scope('Policy'): + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, + [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) + + net_list = [v_net, policy_net] + alg_params['net_list'] = net_list + + if alg_params.get('optimizers_list') is None: + critic_lr = 1e-3 + optimizers_list = [tf.optimizers.Adam(critic_lr)] + alg_params['optimizers_list'] = optimizers_list + + learn_params = dict(train_episodes=2000, + test_episodes=100, + max_steps=200, + save_interval=100, + gamma=0.9, + batch_size=256, + backtrack_iters=10, + backtrack_coeff=0.8, + train_critic_iters=80) + + return alg_params, learn_params diff --git a/rlzoo/algorithms/trpo/run_trpo.py b/rlzoo/algorithms/trpo/run_trpo.py old mode 100644 new mode 100755 index 9bfd2cf..37de726 --- a/rlzoo/algorithms/trpo/run_trpo.py +++ b/rlzoo/algorithms/trpo/run_trpo.py @@ -1,58 +1,58 @@ -from rlzoo.common.utils import set_seed -from rlzoo.algorithms.trpo.trpo import TRPO -from rlzoo.common.policy_networks import * -from rlzoo.common.value_networks import * -import gym - -""" load environment """ -env = gym.make('Pendulum-v0').unwrapped - -# reproducible -seed = 2 -set_seed(seed, env) - -""" build networks for the algorithm """ -name = 'TRPO' -hidden_dim = 64 -num_hidden_layer = 2 -critic = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') - -actor = StochasticPolicyNetwork(env.observation_space, env.action_space, [hidden_dim] * num_hidden_layer, - output_activation=tf.nn.tanh, name=name + '_policy') -net_list = critic, actor - -critic_lr = 1e-3 -optimizers_list = [tf.optimizers.Adam(critic_lr)] - -""" create model """ -model = TRPO(net_list, optimizers_list, damping_coeff=0.1, cg_iters=10, delta=0.01) -""" -full list of arguments for the algorithm ----------------------------------------- -net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization -optimizers_list: a list of optimizers for all networks and differentiable variables -damping_coeff: Artifact for numerical stability -cg_iters: Number of iterations of conjugate gradient to perform -delta: KL-divergence limit for TRPO update. -""" - -model.learn(env, mode='train', render=False, train_episodes=2000, max_steps=200, save_interval=100, - gamma=0.9, batch_size=256, backtrack_iters=10, backtrack_coeff=0.8, train_critic_iters=80) -""" -full list of parameters for training ---------------------------------------- -env: learning environment -train_episodes: total number of episodes for training -test_episodes: total number of episodes for testing -max_steps: maximum number of steps for one episode -save_interval: time steps for saving -gamma: reward discount factor -mode: train or test -render: render each step -batch_size: update batch size -backtrack_iters: Maximum number of steps allowed in the backtracking line search -backtrack_coeff: How far back to step during backtracking line search -train_critic_iters: critic update iteration steps -""" - -model.learn(env, test_episodes=100, max_steps=200, mode='test', render=True) +from rlzoo.common.utils import set_seed +from rlzoo.algorithms.trpo.trpo import TRPO +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +import gym + +""" load environment """ +env = gym.make('Pendulum-v0').unwrapped + +# reproducible +seed = 2 +set_seed(seed, env) + +""" build networks for the algorithm """ +name = 'TRPO' +hidden_dim = 64 +num_hidden_layer = 2 +critic = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') + +actor = StochasticPolicyNetwork(env.observation_space, env.action_space, [hidden_dim] * num_hidden_layer, + output_activation=tf.nn.tanh, name=name + '_policy') +net_list = critic, actor + +critic_lr = 1e-3 +optimizers_list = [tf.optimizers.Adam(critic_lr)] + +""" create model """ +model = TRPO(net_list, optimizers_list, damping_coeff=0.1, cg_iters=10, delta=0.01) +""" +full list of arguments for the algorithm +---------------------------------------- +net_list: a list of networks (value and policy) used in the algorithm, from common functions or customization +optimizers_list: a list of optimizers for all networks and differentiable variables +damping_coeff: Artifact for numerical stability +cg_iters: Number of iterations of conjugate gradient to perform +delta: KL-divergence limit for TRPO update. +""" + +model.learn(env, mode='train', render=False, train_episodes=2000, max_steps=200, save_interval=100, + gamma=0.9, batch_size=256, backtrack_iters=10, backtrack_coeff=0.8, train_critic_iters=80) +""" +full list of parameters for training +--------------------------------------- +env: learning environment +train_episodes: total number of episodes for training +test_episodes: total number of episodes for testing +max_steps: maximum number of steps for one episode +save_interval: time steps for saving +gamma: reward discount factor +mode: train or test +render: render each step +batch_size: update batch size +backtrack_iters: Maximum number of steps allowed in the backtracking line search +backtrack_coeff: How far back to step during backtracking line search +train_critic_iters: critic update iteration steps +""" + +model.learn(env, test_episodes=100, max_steps=200, mode='test', render=True) diff --git a/rlzoo/algorithms/trpo/trpo.py b/rlzoo/algorithms/trpo/trpo.py old mode 100644 new mode 100755 diff --git a/rlzoo/common/__init__.py b/rlzoo/common/__init__.py old mode 100644 new mode 100755 diff --git a/rlzoo/common/basic_nets.py b/rlzoo/common/basic_nets.py old mode 100644 new mode 100755 index 4a9a272..d6b9510 --- a/rlzoo/common/basic_nets.py +++ b/rlzoo/common/basic_nets.py @@ -1,149 +1,149 @@ -"""Basic neural networks""" -import tensorflow as tf -import tensorlayer as tl -from tensorlayer.layers import Dense, Input -from gym import spaces -from collections import OrderedDict - - -def MLP(input_dim, hidden_dim_list, w_init=tf.initializers.Orthogonal(0.2), - activation=tf.nn.relu, *args, **kwargs): - """Multiple fully-connected layers for approximation - - :param input_dim: (int) size of input tensor - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) initialization method for weights - :param activation: (callable) activation function of hidden layers - - Return: - input tensor, output tensor - """ - - l = inputs = Input([None, input_dim]) - for i in range(len(hidden_dim_list)): - l = Dense(n_units=hidden_dim_list[i], act=activation, W_init=w_init)(l) - outputs = l - - return inputs, outputs - - -def MLPModel(input_dim, hidden_dim_list, w_init=tf.initializers.Orthogonal(0.2), - activation=tf.nn.relu, *args, **kwargs): - """Multiple fully-connected layers for approximation - - :param input_dim: (int) size of input tensor - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) initialization method for weights - :param activation: (callable) activation function of hidden layers - - Return: - input tensor, output tensor - """ - l = inputs = Input([None, input_dim], name='Input_Layer') - for i in range(len(hidden_dim_list)): - l = Dense(n_units=hidden_dim_list[i], act=activation, W_init=w_init, name='Hidden_Layer%d' % (i + 1))(l) - outputs = l - - return tl.models.Model(inputs=inputs, outputs=outputs) - - -def CNN(input_shape, conv_kwargs=None): - """Multiple convolutional layers for approximation - Default setting is equal to architecture used in DQN - - :param input_shape: (tuple[int]) (H, W, C) - :param conv_kwargs: (list[param]) list of conv parameters for tl.layers.Conv2d - - Return: - input tensor, output tensor - """ - if not conv_kwargs: - in_channels = input_shape[-1] - conv_kwargs = [ - { - 'in_channels': in_channels, 'n_filter': 32, 'act': tf.nn.relu, - 'filter_size': (8, 8), 'strides': (4, 4), 'padding': 'VALID', - 'W_init': tf.initializers.GlorotUniform() - }, - { - 'in_channels': 32, 'n_filter': 64, 'act': tf.nn.relu, - 'filter_size': (4, 4), 'strides': (2, 2), 'padding': 'VALID', - 'W_init': tf.initializers.GlorotUniform() - }, - { - 'in_channels': 64, 'n_filter': 64, 'act': tf.nn.relu, - 'filter_size': (3, 3), 'strides': (1, 1), 'padding': 'VALID', - 'W_init': tf.initializers.GlorotUniform() - } - ] - l = inputs = tl.layers.Input((1,) + input_shape) - - for i, kwargs in enumerate(conv_kwargs): - # kwargs['name'] = kwargs.get('name', 'cnn_layer{}'.format(i + 1)) - l = tl.layers.Conv2d(**kwargs)(l) - outputs = tl.layers.Flatten()(l) - - return inputs, outputs - - -def CNNModel(input_shape, conv_kwargs=None): - """Multiple convolutional layers for approximation - Default setting is equal to architecture used in DQN - - :param input_shape: (tuple[int]) (H, W, C) - :param conv_kwargs: (list[param]) list of conv parameters for tl.layers.Conv2d - - Return: - tl.model.Model - """ - if not conv_kwargs: - in_channels = input_shape[-1] - conv_kwargs = [ - { - 'in_channels': in_channels, 'n_filter': 32, 'act': tf.nn.relu, - 'filter_size': (8, 8), 'strides': (4, 4), 'padding': 'VALID', - 'W_init': tf.initializers.GlorotUniform() - }, - { - 'in_channels': 32, 'n_filter': 64, 'act': tf.nn.relu, - 'filter_size': (4, 4), 'strides': (2, 2), 'padding': 'VALID', - 'W_init': tf.initializers.GlorotUniform() - }, - { - 'in_channels': 64, 'n_filter': 64, 'act': tf.nn.relu, - 'filter_size': (3, 3), 'strides': (1, 1), 'padding': 'VALID', - 'W_init': tf.initializers.GlorotUniform() - } - ] - - ni = tl.layers.Input((1,) + input_shape, name='CNN_Input') - hi = ni - - for i, kwargs in enumerate(conv_kwargs): - kwargs['name'] = kwargs.get('name', 'CNN_Layer{}'.format(i + 1)) - hi = tl.layers.Conv2d(**kwargs)(hi) - no = tl.layers.Flatten(name='Flatten_Layer')(hi) - - return tl.models.Model(inputs=ni, outputs=no) - - -def CreateInputLayer(state_space, conv_kwargs=None): - def CreateSingleInput(single_state_space): - single_state_shape = single_state_space.shape - # build structure - if len(single_state_shape) == 1: - l = inputs = Input((None,) + single_state_shape, name='input_layer') - else: - with tf.name_scope('CNN'): - inputs, l = CNN(single_state_shape, conv_kwargs=conv_kwargs) - return inputs, l, single_state_shape - - if isinstance(state_space, spaces.Dict): - input_dict, layer_dict, shape_dict = OrderedDict(), OrderedDict(), OrderedDict() - for k, v in state_space.spaces.items(): - input_dict[k], layer_dict[k], shape_dict[k] = CreateSingleInput(v) - return input_dict, layer_dict, shape_dict - if isinstance(state_space, spaces.Space): - return CreateSingleInput(state_space) - else: - raise ValueError('state space error') +"""Basic neural networks""" +import tensorflow as tf +import tensorlayer as tl +from tensorlayer.layers import Dense, Input +from gym import spaces +from collections import OrderedDict + + +def MLP(input_dim, hidden_dim_list, w_init=tf.initializers.Orthogonal(0.2), + activation=tf.nn.relu, *args, **kwargs): + """Multiple fully-connected layers for approximation + + :param input_dim: (int) size of input tensor + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) initialization method for weights + :param activation: (callable) activation function of hidden layers + + Return: + input tensor, output tensor + """ + + l = inputs = Input([None, input_dim]) + for i in range(len(hidden_dim_list)): + l = Dense(n_units=hidden_dim_list[i], act=activation, W_init=w_init)(l) + outputs = l + + return inputs, outputs + + +def MLPModel(input_dim, hidden_dim_list, w_init=tf.initializers.Orthogonal(0.2), + activation=tf.nn.relu, *args, **kwargs): + """Multiple fully-connected layers for approximation + + :param input_dim: (int) size of input tensor + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) initialization method for weights + :param activation: (callable) activation function of hidden layers + + Return: + input tensor, output tensor + """ + l = inputs = Input([None, input_dim], name='Input_Layer') + for i in range(len(hidden_dim_list)): + l = Dense(n_units=hidden_dim_list[i], act=activation, W_init=w_init, name='Hidden_Layer%d' % (i + 1))(l) + outputs = l + + return tl.models.Model(inputs=inputs, outputs=outputs) + + +def CNN(input_shape, conv_kwargs=None): + """Multiple convolutional layers for approximation + Default setting is equal to architecture used in DQN + + :param input_shape: (tuple[int]) (H, W, C) + :param conv_kwargs: (list[param]) list of conv parameters for tl.layers.Conv2d + + Return: + input tensor, output tensor + """ + if not conv_kwargs: + in_channels = input_shape[-1] + conv_kwargs = [ + { + 'in_channels': in_channels, 'n_filter': 32, 'act': tf.nn.relu, + 'filter_size': (8, 8), 'strides': (4, 4), 'padding': 'VALID', + 'W_init': tf.initializers.GlorotUniform() + }, + { + 'in_channels': 32, 'n_filter': 64, 'act': tf.nn.relu, + 'filter_size': (4, 4), 'strides': (2, 2), 'padding': 'VALID', + 'W_init': tf.initializers.GlorotUniform() + }, + { + 'in_channels': 64, 'n_filter': 64, 'act': tf.nn.relu, + 'filter_size': (3, 3), 'strides': (1, 1), 'padding': 'VALID', + 'W_init': tf.initializers.GlorotUniform() + } + ] + l = inputs = tl.layers.Input((1,) + input_shape) + + for i, kwargs in enumerate(conv_kwargs): + # kwargs['name'] = kwargs.get('name', 'cnn_layer{}'.format(i + 1)) + l = tl.layers.Conv2d(**kwargs)(l) + outputs = tl.layers.Flatten()(l) + + return inputs, outputs + + +def CNNModel(input_shape, conv_kwargs=None): + """Multiple convolutional layers for approximation + Default setting is equal to architecture used in DQN + + :param input_shape: (tuple[int]) (H, W, C) + :param conv_kwargs: (list[param]) list of conv parameters for tl.layers.Conv2d + + Return: + tl.model.Model + """ + if not conv_kwargs: + in_channels = input_shape[-1] + conv_kwargs = [ + { + 'in_channels': in_channels, 'n_filter': 32, 'act': tf.nn.relu, + 'filter_size': (8, 8), 'strides': (4, 4), 'padding': 'VALID', + 'W_init': tf.initializers.GlorotUniform() + }, + { + 'in_channels': 32, 'n_filter': 64, 'act': tf.nn.relu, + 'filter_size': (4, 4), 'strides': (2, 2), 'padding': 'VALID', + 'W_init': tf.initializers.GlorotUniform() + }, + { + 'in_channels': 64, 'n_filter': 64, 'act': tf.nn.relu, + 'filter_size': (3, 3), 'strides': (1, 1), 'padding': 'VALID', + 'W_init': tf.initializers.GlorotUniform() + } + ] + + ni = tl.layers.Input((1,) + input_shape, name='CNN_Input') + hi = ni + + for i, kwargs in enumerate(conv_kwargs): + kwargs['name'] = kwargs.get('name', 'CNN_Layer{}'.format(i + 1)) + hi = tl.layers.Conv2d(**kwargs)(hi) + no = tl.layers.Flatten(name='Flatten_Layer')(hi) + + return tl.models.Model(inputs=ni, outputs=no) + + +def CreateInputLayer(state_space, conv_kwargs=None): + def CreateSingleInput(single_state_space): + single_state_shape = single_state_space.shape + # build structure + if len(single_state_shape) == 1: + l = inputs = Input((None,) + single_state_shape, name='input_layer') + else: + with tf.name_scope('CNN'): + inputs, l = CNN(single_state_shape, conv_kwargs=conv_kwargs) + return inputs, l, single_state_shape + + if isinstance(state_space, spaces.Dict): + input_dict, layer_dict, shape_dict = OrderedDict(), OrderedDict(), OrderedDict() + for k, v in state_space.spaces.items(): + input_dict[k], layer_dict[k], shape_dict[k] = CreateSingleInput(v) + return input_dict, layer_dict, shape_dict + if isinstance(state_space, spaces.Space): + return CreateSingleInput(state_space) + else: + raise ValueError('state space error') diff --git a/rlzoo/common/buffer.py b/rlzoo/common/buffer.py old mode 100644 new mode 100755 index 6455a5d..88f7a7c --- a/rlzoo/common/buffer.py +++ b/rlzoo/common/buffer.py @@ -1,306 +1,306 @@ -""" -Functions for utilization. - -# Requirements -tensorflow==2.0.0a0 -tensorlayer==2.0.1 - -""" -import inspect -import operator -import random - -import numpy as np - - -class ReplayBuffer(object): - """A standard ring buffer for storing transitions and sampling for training""" - def __init__(self, capacity): - self.capacity = capacity # mamimum number of samples - self.buffer = [] - self.position = 0 # pointer - - def push(self, state, action, reward, next_state, done): - if len(self.buffer) < self.capacity: - self.buffer.append(None) - self.buffer[self.position] = (state, action, reward, next_state, done) - self.position = int((self.position + 1) % self.capacity) # as a ring buffer - - def sample(self, batch_size): - indexes = range(len(self)) - # sample with replacement - idxes = [random.choice(indexes) for _ in range(batch_size)] - return self._encode_sample(idxes) - - def _encode_sample(self, idxes): - states, actions, rewards, next_states, dones = [], [], [], [], [] - for i in idxes: - state, action, reward, next_state, done = self.buffer[i] - states.append(state) - actions.append(action) - rewards.append(reward) - next_states.append(next_state) - dones.append(done) - return ( - np.stack(states), - np.stack(actions), - np.stack(rewards), - np.stack(next_states), - np.stack(dones), - ) - - def __len__(self): - return len(self.buffer) - - -class SegmentTree(object): - def __init__(self, capacity, operation, neutral_element): - """Build a Segment Tree data structure. - - https://en.wikipedia.org/wiki/Segment_tree - - Can be used as regular array, but with two - important differences: - - a) setting item's value is slightly slower. - It is O(lg capacity) instead of O(1). - b) user has access to an efficient ( O(log segment size) ) - `reduce` operation which reduces `operation` over - a contiguous subsequence of items in the array. - - :param apacity: (int) - Total size of the array - must be a power of two. - :param operation: (lambda obj, obj -> obj) - and operation for combining elements (eg. sum, max) - must form a mathematical group together with the set of - possible values for array elements (i.e. be associative) - :param neutral_element: (obj) - neutral element for the operation above. eg. float('-inf') - for max and 0 for sum. - """ - assert capacity > 0 and capacity & (capacity - 1) == 0, \ - "capacity must be positive and a power of 2." - self._capacity = capacity - self._value = [neutral_element for _ in range(2 * capacity)] - self._operation = operation - - def _reduce_helper(self, start, end, node, node_start, node_end): - if start == node_start and end == node_end: - return self._value[node] - mid = (node_start + node_end) // 2 - if end <= mid: - return self._reduce_helper(start, end, 2 * node, node_start, mid) - else: - if mid + 1 <= start: - return self._reduce_helper(start, end, 2 * node + 1, mid + 1, node_end) - else: - return self._operation( - self._reduce_helper(start, mid, 2 * node, node_start, mid), - self._reduce_helper(mid + 1, end, 2 * node + 1, mid + 1, node_end) - ) - - def reduce(self, start=0, end=None): - """Returns result of applying `self.operation` - to a contiguous subsequence of the array. - - :param start: (int) beginning of the subsequence - :param end: (int) end of the subsequences - - Returns: - reduced: (obj) result of reducing self.operation over the specified range of array. - """ - if end is None: - end = self._capacity - if end < 0: - end += self._capacity - end -= 1 - return self._reduce_helper(start, end, 1, 0, self._capacity - 1) - - def __setitem__(self, idx, val): - # index of the leaf - idx += self._capacity - self._value[idx] = val - idx //= 2 - while idx >= 1: - self._value[idx] = self._operation(self._value[2 * idx], self._value[2 * idx + 1]) - idx //= 2 - - def __getitem__(self, idx): - assert 0 <= idx < self._capacity - return self._value[self._capacity + idx] - - -class SumSegmentTree(SegmentTree): - - def __init__(self, capacity): - super(SumSegmentTree, self).__init__(capacity=capacity, operation=operator.add, neutral_element=0.0) - - def sum(self, start=0, end=None): - """Returns arr[start] + ... + arr[end]""" - return super(SumSegmentTree, self).reduce(start, end) - - def find_prefixsum_idx(self, prefixsum): - """Find the highest index `i` in the array such that - sum(arr[0] + arr[1] + ... + arr[i - i]) <= prefixsum - - if array values are probabilities, this function - allows to sample indexes according to the discrete - probability efficiently. - - :param perfixsum: (float) - upperbound on the sum of array prefix - - Returns: - idx: (int) - highest index satisfying the prefixsum constraint - """ - assert 0 <= prefixsum <= self.sum() + 1e-5 - idx = 1 - while idx < self._capacity: # while non-leaf - if self._value[2 * idx] > prefixsum: - idx = 2 * idx - else: - prefixsum -= self._value[2 * idx] - idx = 2 * idx + 1 - return idx - self._capacity - - -class MinSegmentTree(SegmentTree): - - def __init__(self, capacity): - super(MinSegmentTree, self).__init__(capacity=capacity, operation=min, neutral_element=float('inf')) - - def min(self, start=0, end=None): - """Returns min(arr[start], ..., arr[end])""" - - return super(MinSegmentTree, self).reduce(start, end) - - -class PrioritizedReplayBuffer(ReplayBuffer): # is it succeed from the ReplayBuffer above? - def __init__(self, capacity, alpha, beta): - """Create Prioritized Replay buffer. - - :param capacity: (int) - Max number of transitions to store in the buffer. When the buffer - overflows the old memories are dropped. - :param alpha: (float) - how much prioritization is used - (0 - no prioritization, 1 - full prioritization) - - See Also: - ReplayBuffer.__init__ - """ - super(PrioritizedReplayBuffer, self).__init__(capacity) - assert alpha >= 0 - self._alpha = alpha - - it_capacity = 1 - while it_capacity < capacity: - it_capacity *= 2 - - self._it_sum = SumSegmentTree(it_capacity) - self._it_min = MinSegmentTree(it_capacity) - self._max_priority = 1.0 - self.beta = beta - - def push(self, *args): - """See ReplayBuffer.store_effect""" - idx = self.position - super().push(*args) - self._it_sum[idx] = self._max_priority ** self._alpha - self._it_min[idx] = self._max_priority ** self._alpha - - def _sample_proportional(self, batch_size): - res = [] - p_total = self._it_sum.sum(0, len(self.buffer) - 1) - every_range_len = p_total / batch_size - for i in range(batch_size): - mass = random.random() * every_range_len + i * every_range_len - idx = self._it_sum.find_prefixsum_idx(mass) - res.append(idx) - return res - - def sample(self, batch_size): - """Sample a batch of experiences""" - idxes = self._sample_proportional(batch_size) - - it_sum = self._it_sum.sum() - p_min = self._it_min.min() / it_sum - max_weight = (p_min * len(self.buffer))**(-self.beta) - - p_samples = np.asarray([self._it_sum[idx] for idx in idxes]) / it_sum - weights = (p_samples * len(self.buffer)) ** (-self.beta) / max_weight - encoded_sample = self._encode_sample(idxes) - return encoded_sample + (weights, idxes) - - def update_priorities(self, idxes, priorities): - """Update priorities of sampled transitions""" - assert len(idxes) == len(priorities) - for idx, priority in zip(idxes, priorities): - assert priority > 0 - assert 0 <= idx < len(self.buffer) - self._it_sum[idx] = priority ** self._alpha - self._it_min[idx] = priority ** self._alpha - - self._max_priority = max(self._max_priority, priority) - - -class HindsightReplayBuffer(ReplayBuffer): - """Hindsight Experience Replay - In this buffer, state is a tuple consists of (observation, goal) - """ - GOAL_FUTURE = 'future' - GOAL_EPISODE = 'episode' - GOAL_RANDOM = 'random' - - def __init__(self, capacity, hindsight_freq, goal_type, reward_func, done_func): - """ - :param hindsight_freq (int): How many hindsight transitions will be generated for each real transition - :param goal_type (str): The generatation method of hindsight goals. Should be HER_GOAL_* - :param reward_func (callable): goal (np.array) X next_state (np.array) -> reward (float) - :param done_func (callable): goal (np.array) X next_state (np.array) -> done_flag (bool) - """ - super().__init__(capacity) - self.hindsight_freq = hindsight_freq - self.goal_type = goal_type - self.reward_func = reward_func - self.done_func = done_func - - def _sample_goals(self, episode, t): - goals = [] - episode_len = len(episode) - for _ in range(self.hindsight_freq): - if self.goal_type == HindsightReplayBuffer.GOAL_FUTURE: - index = random.choice(range(t + 1, episode_len)) - source = episode - elif self.goal_type == HindsightReplayBuffer.GOAL_EPISODE: - index = random.choice(range(episode_len)) - source = episode - elif self.goal_type == HindsightReplayBuffer.GOAL_RANDOM: - index = random.choice(range(len(self))) - source = self.buffer - else: - raise ValueError("Invalid goal type %s" % self.goal_type) - goals.append(source[index][0][0]) # return the observation - return goals - - def push(self, *args, **kwargs): - if inspect.stack()[1][3] != 'push_episode': - raise ValueError("Please use `push_episode` methods in HER") - else: - super().push(*args, **kwargs) - - def push_episode(self, states, actions, rewards, next_states, dones): - episode = list(zip(states, actions, rewards, next_states, dones)) - episode_len = len(states) - for t, (state, action, reward, next_state, done) in enumerate(episode): - self.push(state, action, reward, next_state, done) - if self.goal_type == HindsightReplayBuffer.GOAL_FUTURE and t == episode_len - 1: - break - for goal in self._sample_goals(episode, t): - s = (state[0], goal) - a = action - r = self.reward_func(goal, next_state[0]) - s_ = (next_state[0], goal) - d = self.done_func(goal, next_state[0]) - self.push(s, a, r, s_, d) +""" +Functions for utilization. + +# Requirements +tensorflow==2.0.0a0 +tensorlayer==2.0.1 + +""" +import inspect +import operator +import random + +import numpy as np + + +class ReplayBuffer(object): + """A standard ring buffer for storing transitions and sampling for training""" + def __init__(self, capacity): + self.capacity = capacity # mamimum number of samples + self.buffer = [] + self.position = 0 # pointer + + def push(self, state, action, reward, next_state, done): + if len(self.buffer) < self.capacity: + self.buffer.append(None) + self.buffer[self.position] = (state, action, reward, next_state, done) + self.position = int((self.position + 1) % self.capacity) # as a ring buffer + + def sample(self, batch_size): + indexes = range(len(self)) + # sample with replacement + idxes = [random.choice(indexes) for _ in range(batch_size)] + return self._encode_sample(idxes) + + def _encode_sample(self, idxes): + states, actions, rewards, next_states, dones = [], [], [], [], [] + for i in idxes: + state, action, reward, next_state, done = self.buffer[i] + states.append(state) + actions.append(action) + rewards.append(reward) + next_states.append(next_state) + dones.append(done) + return ( + np.stack(states), + np.stack(actions), + np.stack(rewards), + np.stack(next_states), + np.stack(dones), + ) + + def __len__(self): + return len(self.buffer) + + +class SegmentTree(object): + def __init__(self, capacity, operation, neutral_element): + """Build a Segment Tree data structure. + + https://en.wikipedia.org/wiki/Segment_tree + + Can be used as regular array, but with two + important differences: + + a) setting item's value is slightly slower. + It is O(lg capacity) instead of O(1). + b) user has access to an efficient ( O(log segment size) ) + `reduce` operation which reduces `operation` over + a contiguous subsequence of items in the array. + + :param apacity: (int) + Total size of the array - must be a power of two. + :param operation: (lambda obj, obj -> obj) + and operation for combining elements (eg. sum, max) + must form a mathematical group together with the set of + possible values for array elements (i.e. be associative) + :param neutral_element: (obj) + neutral element for the operation above. eg. float('-inf') + for max and 0 for sum. + """ + assert capacity > 0 and capacity & (capacity - 1) == 0, \ + "capacity must be positive and a power of 2." + self._capacity = capacity + self._value = [neutral_element for _ in range(2 * capacity)] + self._operation = operation + + def _reduce_helper(self, start, end, node, node_start, node_end): + if start == node_start and end == node_end: + return self._value[node] + mid = (node_start + node_end) // 2 + if end <= mid: + return self._reduce_helper(start, end, 2 * node, node_start, mid) + else: + if mid + 1 <= start: + return self._reduce_helper(start, end, 2 * node + 1, mid + 1, node_end) + else: + return self._operation( + self._reduce_helper(start, mid, 2 * node, node_start, mid), + self._reduce_helper(mid + 1, end, 2 * node + 1, mid + 1, node_end) + ) + + def reduce(self, start=0, end=None): + """Returns result of applying `self.operation` + to a contiguous subsequence of the array. + + :param start: (int) beginning of the subsequence + :param end: (int) end of the subsequences + + Returns: + reduced: (obj) result of reducing self.operation over the specified range of array. + """ + if end is None: + end = self._capacity + if end < 0: + end += self._capacity + end -= 1 + return self._reduce_helper(start, end, 1, 0, self._capacity - 1) + + def __setitem__(self, idx, val): + # index of the leaf + idx += self._capacity + self._value[idx] = val + idx //= 2 + while idx >= 1: + self._value[idx] = self._operation(self._value[2 * idx], self._value[2 * idx + 1]) + idx //= 2 + + def __getitem__(self, idx): + assert 0 <= idx < self._capacity + return self._value[self._capacity + idx] + + +class SumSegmentTree(SegmentTree): + + def __init__(self, capacity): + super(SumSegmentTree, self).__init__(capacity=capacity, operation=operator.add, neutral_element=0.0) + + def sum(self, start=0, end=None): + """Returns arr[start] + ... + arr[end]""" + return super(SumSegmentTree, self).reduce(start, end) + + def find_prefixsum_idx(self, prefixsum): + """Find the highest index `i` in the array such that + sum(arr[0] + arr[1] + ... + arr[i - i]) <= prefixsum + + if array values are probabilities, this function + allows to sample indexes according to the discrete + probability efficiently. + + :param perfixsum: (float) + upperbound on the sum of array prefix + + Returns: + idx: (int) + highest index satisfying the prefixsum constraint + """ + assert 0 <= prefixsum <= self.sum() + 1e-5 + idx = 1 + while idx < self._capacity: # while non-leaf + if self._value[2 * idx] > prefixsum: + idx = 2 * idx + else: + prefixsum -= self._value[2 * idx] + idx = 2 * idx + 1 + return idx - self._capacity + + +class MinSegmentTree(SegmentTree): + + def __init__(self, capacity): + super(MinSegmentTree, self).__init__(capacity=capacity, operation=min, neutral_element=float('inf')) + + def min(self, start=0, end=None): + """Returns min(arr[start], ..., arr[end])""" + + return super(MinSegmentTree, self).reduce(start, end) + + +class PrioritizedReplayBuffer(ReplayBuffer): # is it succeed from the ReplayBuffer above? + def __init__(self, capacity, alpha, beta): + """Create Prioritized Replay buffer. + + :param capacity: (int) + Max number of transitions to store in the buffer. When the buffer + overflows the old memories are dropped. + :param alpha: (float) + how much prioritization is used + (0 - no prioritization, 1 - full prioritization) + + See Also: + ReplayBuffer.__init__ + """ + super(PrioritizedReplayBuffer, self).__init__(capacity) + assert alpha >= 0 + self._alpha = alpha + + it_capacity = 1 + while it_capacity < capacity: + it_capacity *= 2 + + self._it_sum = SumSegmentTree(it_capacity) + self._it_min = MinSegmentTree(it_capacity) + self._max_priority = 1.0 + self.beta = beta + + def push(self, *args): + """See ReplayBuffer.store_effect""" + idx = self.position + super().push(*args) + self._it_sum[idx] = self._max_priority ** self._alpha + self._it_min[idx] = self._max_priority ** self._alpha + + def _sample_proportional(self, batch_size): + res = [] + p_total = self._it_sum.sum(0, len(self.buffer) - 1) + every_range_len = p_total / batch_size + for i in range(batch_size): + mass = random.random() * every_range_len + i * every_range_len + idx = self._it_sum.find_prefixsum_idx(mass) + res.append(idx) + return res + + def sample(self, batch_size): + """Sample a batch of experiences""" + idxes = self._sample_proportional(batch_size) + + it_sum = self._it_sum.sum() + p_min = self._it_min.min() / it_sum + max_weight = (p_min * len(self.buffer))**(-self.beta) + + p_samples = np.asarray([self._it_sum[idx] for idx in idxes]) / it_sum + weights = (p_samples * len(self.buffer)) ** (-self.beta) / max_weight + encoded_sample = self._encode_sample(idxes) + return encoded_sample + (weights, idxes) + + def update_priorities(self, idxes, priorities): + """Update priorities of sampled transitions""" + assert len(idxes) == len(priorities) + for idx, priority in zip(idxes, priorities): + assert priority > 0 + assert 0 <= idx < len(self.buffer) + self._it_sum[idx] = priority ** self._alpha + self._it_min[idx] = priority ** self._alpha + + self._max_priority = max(self._max_priority, priority) + + +class HindsightReplayBuffer(ReplayBuffer): + """Hindsight Experience Replay + In this buffer, state is a tuple consists of (observation, goal) + """ + GOAL_FUTURE = 'future' + GOAL_EPISODE = 'episode' + GOAL_RANDOM = 'random' + + def __init__(self, capacity, hindsight_freq, goal_type, reward_func, done_func): + """ + :param hindsight_freq (int): How many hindsight transitions will be generated for each real transition + :param goal_type (str): The generatation method of hindsight goals. Should be HER_GOAL_* + :param reward_func (callable): goal (np.array) X next_state (np.array) -> reward (float) + :param done_func (callable): goal (np.array) X next_state (np.array) -> done_flag (bool) + """ + super().__init__(capacity) + self.hindsight_freq = hindsight_freq + self.goal_type = goal_type + self.reward_func = reward_func + self.done_func = done_func + + def _sample_goals(self, episode, t): + goals = [] + episode_len = len(episode) + for _ in range(self.hindsight_freq): + if self.goal_type == HindsightReplayBuffer.GOAL_FUTURE: + index = random.choice(range(t + 1, episode_len)) + source = episode + elif self.goal_type == HindsightReplayBuffer.GOAL_EPISODE: + index = random.choice(range(episode_len)) + source = episode + elif self.goal_type == HindsightReplayBuffer.GOAL_RANDOM: + index = random.choice(range(len(self))) + source = self.buffer + else: + raise ValueError("Invalid goal type %s" % self.goal_type) + goals.append(source[index][0][0]) # return the observation + return goals + + def push(self, *args, **kwargs): + if inspect.stack()[1][3] != 'push_episode': + raise ValueError("Please use `push_episode` methods in HER") + else: + super().push(*args, **kwargs) + + def push_episode(self, states, actions, rewards, next_states, dones): + episode = list(zip(states, actions, rewards, next_states, dones)) + episode_len = len(states) + for t, (state, action, reward, next_state, done) in enumerate(episode): + self.push(state, action, reward, next_state, done) + if self.goal_type == HindsightReplayBuffer.GOAL_FUTURE and t == episode_len - 1: + break + for goal in self._sample_goals(episode, t): + s = (state[0], goal) + a = action + r = self.reward_func(goal, next_state[0]) + s_ = (next_state[0], goal) + d = self.done_func(goal, next_state[0]) + self.push(s, a, r, s_, d) diff --git a/rlzoo/common/build_rlbench_env.py b/rlzoo/common/build_rlbench_env.py old mode 100644 new mode 100755 index 19f6c84..ac1aafb --- a/rlzoo/common/build_rlbench_env.py +++ b/rlzoo/common/build_rlbench_env.py @@ -1,162 +1,162 @@ -import sys -from collections import OrderedDict - -import numpy as np -from gym import spaces - -from pyrep.const import RenderMode -from pyrep.objects.dummy import Dummy -from pyrep.objects.vision_sensor import VisionSensor -from rlbench.environment import Environment -from rlbench.action_modes import ArmActionMode, ActionMode -from rlbench.observation_config import ObservationConfig -from rlbench.tasks import * - - -# Don't forget to add: export PYTHONPATH=PATH_TO_YOUR_LOCAL_RLBENCH_REPO - -# list of state types -state_types = ['left_shoulder_rgb', - 'left_shoulder_depth', - 'left_shoulder_mask', - 'right_shoulder_rgb', - 'right_shoulder_depth', - 'right_shoulder_mask', - 'wrist_rgb', - 'wrist_depth', - 'wrist_mask', - 'joint_velocities', - 'joint_velocities_noise', - 'joint_positions', - 'joint_positions_noise', - 'joint_forces', - 'joint_forces_noise', - 'gripper_pose', - 'gripper_touch_forces', - 'task_low_dim_state'] - - -class RLBenchEnv(): - """ make RLBench env to have same interfaces as openai.gym """ - - def __init__(self, task_name: str, state_type: list = 'state', ): - # render_mode=None): - """ - create RL Bench environment - :param task_name: task names can be found in rlbench.tasks - :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb'] - """ - if state_type == 'state' or state_type == 'vision' or isinstance(state_type, list): - self._state_type = state_type - else: - raise ValueError('State type value error, your value is {}'.format(state_type)) - # self._render_mode = render_mode - self._render_mode = None - obs_config = ObservationConfig() - obs_config.set_all(True) - action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) - self.env = Environment( - action_mode, obs_config=obs_config, headless=True) - self.env.launch() - try: - self.task = self.env.get_task(getattr(sys.modules[__name__], task_name)) - except: - raise NotImplementedError - - _, obs = self.task.reset() - self.spec = Spec(task_name) - - if self._state_type == 'state': - self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) - elif self._state_type == 'vision': - space_dict = OrderedDict() - space_dict["state"] = spaces.Box( - low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) - for i in ["left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb", "front_rgb"]: - space_dict[i] = spaces.Box( - low=0, high=1, shape=getattr(obs, i).shape) - self.observation_space = spaces.Dict(space_dict) - else: - space_dict = OrderedDict() - for name in self._state_type: - if name.split('_')[-1] in ('rgb', 'depth', 'mask'): - space_dict[name] = spaces.Box( - low=0, high=1, shape=getattr(obs, name).shape) - else: - space_dict[name] = spaces.Box( - low=-np.inf, high=np.inf, - shape=getattr(obs, name).shape) - self.observation_space = spaces.Dict(space_dict) - self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32) - - # if render_mode is not None: - # # Add the camera to the scene - # cam_placeholder = Dummy('cam_cinematic_placeholder') - # self._gym_cam = VisionSensor.create([640, 360]) - # self._gym_cam.set_pose(cam_placeholder.get_pose()) - # if render_mode == 'human': - # self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) - # else: - # self._gym_cam.set_render_mode(RenderMode.OPENGL3) - - def _extract_obs(self, obs): - if self._state_type == 'state': - return np.array(obs.get_low_dim_data(), np.float32) - elif self._state_type == 'vision': - return np.array([np.array(obs.get_low_dim_data(), np.float32), - np.array(obs.left_shoulder_rgb, np.float32), - np.array(obs.right_shoulder_rgb, np.float32), - np.array(obs.wrist_rgb, np.float32), - np.array(obs.front_rgb, np.float32), ]) - else: - result = ['tag'] - for name in self._state_type: - result.append(np.array(getattr(obs, name), np.float32)) - return np.delete(np.array(result,), 0, 0) - - def seed(self, seed_value): - # set seed as in openai.gym env - pass - - def render(self, mode='human'): - # todo render available at any time - if self._render_mode is None: - self._render_mode = mode - # Add the camera to the scene - cam_placeholder = Dummy('cam_cinematic_placeholder') - self._gym_cam = VisionSensor.create([640, 360]) - self._gym_cam.set_pose(cam_placeholder.get_pose()) - if mode == 'human': - self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) - else: - self._gym_cam.set_render_mode(RenderMode.OPENGL3) - - if mode != self._render_mode: - raise ValueError( - 'The render mode must match the render mode selected in the ' - 'constructor. \nI.e. if you want "human" render mode, then ' - 'create the env by calling: ' - 'gym.make("reach_target-state-v0", render_mode="human").\n' - 'You passed in mode %s, but expected %s.' % ( - mode, self._render_mode)) - if mode == 'rgb_array': - return self._gym_cam.capture_rgb() - - def reset(self): - descriptions, obs = self.task.reset() - return self._extract_obs(obs) - - def step(self, action): - obs, reward, terminate = self.task.step(action) - return self._extract_obs(obs), reward, terminate, None - - def close(self): - self.env.shutdown() - - -class Spec(): - """ a fake spec """ - - def __init__(self, id_name): - self.id = id_name +import sys +from collections import OrderedDict + +import numpy as np +from gym import spaces + +from pyrep.const import RenderMode +from pyrep.objects.dummy import Dummy +from pyrep.objects.vision_sensor import VisionSensor +from rlbench.environment import Environment +from rlbench.action_modes import ArmActionMode, ActionMode +from rlbench.observation_config import ObservationConfig +from rlbench.tasks import * + + +# Don't forget to add: export PYTHONPATH=PATH_TO_YOUR_LOCAL_RLBENCH_REPO + +# list of state types +state_types = ['left_shoulder_rgb', + 'left_shoulder_depth', + 'left_shoulder_mask', + 'right_shoulder_rgb', + 'right_shoulder_depth', + 'right_shoulder_mask', + 'wrist_rgb', + 'wrist_depth', + 'wrist_mask', + 'joint_velocities', + 'joint_velocities_noise', + 'joint_positions', + 'joint_positions_noise', + 'joint_forces', + 'joint_forces_noise', + 'gripper_pose', + 'gripper_touch_forces', + 'task_low_dim_state'] + + +class RLBenchEnv(): + """ make RLBench env to have same interfaces as openai.gym """ + + def __init__(self, task_name: str, state_type: list = 'state', ): + # render_mode=None): + """ + create RL Bench environment + :param task_name: task names can be found in rlbench.tasks + :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb'] + """ + if state_type == 'state' or state_type == 'vision' or isinstance(state_type, list): + self._state_type = state_type + else: + raise ValueError('State type value error, your value is {}'.format(state_type)) + # self._render_mode = render_mode + self._render_mode = None + obs_config = ObservationConfig() + obs_config.set_all(True) + action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) + self.env = Environment( + action_mode, obs_config=obs_config, headless=True) + self.env.launch() + try: + self.task = self.env.get_task(getattr(sys.modules[__name__], task_name)) + except: + raise NotImplementedError + + _, obs = self.task.reset() + self.spec = Spec(task_name) + + if self._state_type == 'state': + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) + elif self._state_type == 'vision': + space_dict = OrderedDict() + space_dict["state"] = spaces.Box( + low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) + for i in ["left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb", "front_rgb"]: + space_dict[i] = spaces.Box( + low=0, high=1, shape=getattr(obs, i).shape) + self.observation_space = spaces.Dict(space_dict) + else: + space_dict = OrderedDict() + for name in self._state_type: + if name.split('_')[-1] in ('rgb', 'depth', 'mask'): + space_dict[name] = spaces.Box( + low=0, high=1, shape=getattr(obs, name).shape) + else: + space_dict[name] = spaces.Box( + low=-np.inf, high=np.inf, + shape=getattr(obs, name).shape) + self.observation_space = spaces.Dict(space_dict) + self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32) + + # if render_mode is not None: + # # Add the camera to the scene + # cam_placeholder = Dummy('cam_cinematic_placeholder') + # self._gym_cam = VisionSensor.create([640, 360]) + # self._gym_cam.set_pose(cam_placeholder.get_pose()) + # if render_mode == 'human': + # self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) + # else: + # self._gym_cam.set_render_mode(RenderMode.OPENGL3) + + def _extract_obs(self, obs): + if self._state_type == 'state': + return np.array(obs.get_low_dim_data(), np.float32) + elif self._state_type == 'vision': + return np.array([np.array(obs.get_low_dim_data(), np.float32), + np.array(obs.left_shoulder_rgb, np.float32), + np.array(obs.right_shoulder_rgb, np.float32), + np.array(obs.wrist_rgb, np.float32), + np.array(obs.front_rgb, np.float32), ]) + else: + result = ['tag'] + for name in self._state_type: + result.append(np.array(getattr(obs, name), np.float32)) + return np.delete(np.array(result,), 0, 0) + + def seed(self, seed_value): + # set seed as in openai.gym env + pass + + def render(self, mode='human'): + # todo render available at any time + if self._render_mode is None: + self._render_mode = mode + # Add the camera to the scene + cam_placeholder = Dummy('cam_cinematic_placeholder') + self._gym_cam = VisionSensor.create([640, 360]) + self._gym_cam.set_pose(cam_placeholder.get_pose()) + if mode == 'human': + self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) + else: + self._gym_cam.set_render_mode(RenderMode.OPENGL3) + + if mode != self._render_mode: + raise ValueError( + 'The render mode must match the render mode selected in the ' + 'constructor. \nI.e. if you want "human" render mode, then ' + 'create the env by calling: ' + 'gym.make("reach_target-state-v0", render_mode="human").\n' + 'You passed in mode %s, but expected %s.' % ( + mode, self._render_mode)) + if mode == 'rgb_array': + return self._gym_cam.capture_rgb() + + def reset(self): + descriptions, obs = self.task.reset() + return self._extract_obs(obs) + + def step(self, action): + obs, reward, terminate = self.task.step(action) + return self._extract_obs(obs), reward, terminate, None + + def close(self): + self.env.shutdown() + + +class Spec(): + """ a fake spec """ + + def __init__(self, id_name): + self.id = id_name diff --git a/rlzoo/common/distributions.py b/rlzoo/common/distributions.py old mode 100644 new mode 100755 index 8c95036..b191290 --- a/rlzoo/common/distributions.py +++ b/rlzoo/common/distributions.py @@ -1,207 +1,207 @@ -"""Definition of parametrized distributions. Adapted from openai/baselines""" -import copy -from functools import wraps - -import numpy as np -import tensorflow as tf -from gym import spaces - - -def expand_dims(func): - @wraps(func) - def wrapper(*args, **kwargs): - result = func(*args, **kwargs) - result = tf.expand_dims(result, axis=-1) - return result - - return wrapper - - -class Distribution(object): - """A particular probability distribution""" - - def set_param(self, *args, **kwargs): - raise NotImplementedError - - def sample(self, *args, **kwargs): - """Sampling from distribution. Allow explore parameters.""" - raise NotImplementedError - - def logp(self, x): - """Calculate log probability of a sample.""" - return -self.neglogp(x) - - def neglogp(self, x): - """Calculate negative log probability of a sample.""" - raise NotImplementedError - - def kl(self, *parameters): - """Calculate Kullback–Leibler divergence""" - raise NotImplementedError - - def entropy(self): - """Calculate the entropy of distribution.""" - raise NotImplementedError - - -class Categorical(Distribution): - """Creates a categorical distribution""" - - def __init__(self, ndim, logits=None): - """ - Args: - ndim (int): total number of actions - logits (tensor): logits variables - """ - self._ndim = ndim - self._logits = logits - self.param = self._logits - - @property - def ndim(self): - return copy.copy(self._ndim) - - def set_param(self, logits): - """ - Args: - logits (tensor): logits variables to set - """ - self._logits = logits - self.param = self._logits - - def get_param(self): - return copy.deepcopy(self._logits) - - def sample(self): - """ Sample actions from distribution, using the Gumbel-Softmax trick """ - u = np.array(np.random.uniform(0, 1, size=np.shape(self._logits)), dtype=np.float32) - res = tf.argmax(self._logits - tf.math.log(-tf.math.log(u)), axis=-1) - return res - - def greedy_sample(self): - """ Get actions greedily """ - _probs = tf.nn.softmax(self._logits) - return tf.argmax(_probs, axis=-1) - - def logp(self, x): - return -self.neglogp(x) - - @expand_dims - def neglogp(self, x): - x = np.array(x) - if np.any(x % 1): - raise ValueError('Input float actions in discrete action space') - x = tf.convert_to_tensor(x, tf.int32) - x = tf.one_hot(x, self._ndim, axis=-1) - return tf.nn.softmax_cross_entropy_with_logits(x, self._logits) - - @expand_dims - def kl(self, logits): - """ - Args: - logits (tensor): logits variables of another distribution - """ - a0 = self._logits - tf.reduce_max(self._logits, axis=-1, keepdims=True) - a1 = logits - tf.reduce_max(logits, axis=-1, keepdims=True) - ea0 = tf.exp(a0) - ea1 = tf.exp(a1) - z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True) - z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True) - p0 = ea0 / z0 - return tf.reduce_sum( - p0 * (a0 - tf.math.log(z0) - a1 + tf.math.log(z1)), axis=-1) - - @expand_dims - def entropy(self): - a0 = self._logits - tf.reduce_max(self._logits, axis=-1, keepdims=True) - ea0 = tf.exp(a0) - z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True) - p0 = ea0 / z0 - return tf.reduce_sum(p0 * (tf.math.log(z0) - a0), axis=-1) - - -class DiagGaussian(Distribution): - """Creates a diagonal Gaussian distribution """ - - def __init__(self, ndim, mean_logstd=None): - """ - Args: - ndim (int): the dimenstion of actions - mean_logstd (tensor): mean and logstd stacked on the last axis - """ - self._ndim = ndim - self.mean = None - self.logstd = None - self.std = None - self.action_mean = None - self.action_scale = None - self.param = self.mean, self.logstd - if mean_logstd is not None: - self.set_param(mean_logstd) - - @property - def ndim(self): - return copy.copy(self._ndim) - - def set_param(self, mean_logstd): - """ - Args: - mean_logstd (tensor): mean and log std - """ - self.mean, self.logstd = mean_logstd - self.std = tf.math.exp(self.logstd) - self.param = self.mean, self.logstd - - def get_param(self): - """ Get parameters """ - return copy.deepcopy(self.mean), copy.deepcopy(self.logstd) - - def sample(self): - """ Get actions in deterministic or stochastic manner """ - return self.mean, self.std * np.random.normal(0, 1, np.shape(self.mean)) - - def greedy_sample(self): - """ Get actions greedily/deterministically """ - return self.mean - - def logp(self, x): - return -self.neglogp(x) - - @expand_dims - def neglogp(self, x): - # here we reverse the action normalization to make the computation of negative log probability correct - x = (x - self.action_mean)/self.action_scale - - return 0.5 * tf.reduce_sum(tf.square((x - self.mean) / self.std), axis=-1) \ - + 0.5 * np.log(2.0 * np.pi) * float(self._ndim) + tf.reduce_sum(self.logstd, axis=-1) - - @expand_dims - def kl(self, mean_logstd): - """ - Args: - mean_logstd (tensor): mean and logstd of another distribution - """ - mean, logstd = mean_logstd - return tf.reduce_sum( - logstd - self.logstd + - (tf.square(self.std) + tf.square(self.mean - mean)) - / (2.0 * tf.square(tf.math.exp(logstd))) - 0.5, axis=-1) - - @expand_dims - def entropy(self): - return tf.reduce_sum( - self.logstd + 0.5 * np.log(2.0 * np.pi * np.e), axis=-1) - - -def make_dist(ac_space): - """Get distribution based on action space - - :param ac_space: gym.spaces.Space - """ - if isinstance(ac_space, spaces.Discrete): - return Categorical(ac_space.n) - elif isinstance(ac_space, spaces.Box): - assert len(ac_space.shape) == 1 - return DiagGaussian(ac_space.shape[0]) - else: - raise NotImplementedError +"""Definition of parametrized distributions. Adapted from openai/baselines""" +import copy +from functools import wraps + +import numpy as np +import tensorflow as tf +from gym import spaces + + +def expand_dims(func): + @wraps(func) + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + result = tf.expand_dims(result, axis=-1) + return result + + return wrapper + + +class Distribution(object): + """A particular probability distribution""" + + def set_param(self, *args, **kwargs): + raise NotImplementedError + + def sample(self, *args, **kwargs): + """Sampling from distribution. Allow explore parameters.""" + raise NotImplementedError + + def logp(self, x): + """Calculate log probability of a sample.""" + return -self.neglogp(x) + + def neglogp(self, x): + """Calculate negative log probability of a sample.""" + raise NotImplementedError + + def kl(self, *parameters): + """Calculate Kullback–Leibler divergence""" + raise NotImplementedError + + def entropy(self): + """Calculate the entropy of distribution.""" + raise NotImplementedError + + +class Categorical(Distribution): + """Creates a categorical distribution""" + + def __init__(self, ndim, logits=None): + """ + Args: + ndim (int): total number of actions + logits (tensor): logits variables + """ + self._ndim = ndim + self._logits = logits + self.param = self._logits + + @property + def ndim(self): + return copy.copy(self._ndim) + + def set_param(self, logits): + """ + Args: + logits (tensor): logits variables to set + """ + self._logits = logits + self.param = self._logits + + def get_param(self): + return copy.deepcopy(self._logits) + + def sample(self): + """ Sample actions from distribution, using the Gumbel-Softmax trick """ + u = np.array(np.random.uniform(0, 1, size=np.shape(self._logits)), dtype=np.float32) + res = tf.argmax(self._logits - tf.math.log(-tf.math.log(u)), axis=-1) + return res + + def greedy_sample(self): + """ Get actions greedily """ + _probs = tf.nn.softmax(self._logits) + return tf.argmax(_probs, axis=-1) + + def logp(self, x): + return -self.neglogp(x) + + @expand_dims + def neglogp(self, x): + x = np.array(x) + if np.any(x % 1): + raise ValueError('Input float actions in discrete action space') + x = tf.convert_to_tensor(x, tf.int32) + x = tf.one_hot(x, self._ndim, axis=-1) + return tf.nn.softmax_cross_entropy_with_logits(x, self._logits) + + @expand_dims + def kl(self, logits): + """ + Args: + logits (tensor): logits variables of another distribution + """ + a0 = self._logits - tf.reduce_max(self._logits, axis=-1, keepdims=True) + a1 = logits - tf.reduce_max(logits, axis=-1, keepdims=True) + ea0 = tf.exp(a0) + ea1 = tf.exp(a1) + z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True) + z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True) + p0 = ea0 / z0 + return tf.reduce_sum( + p0 * (a0 - tf.math.log(z0) - a1 + tf.math.log(z1)), axis=-1) + + @expand_dims + def entropy(self): + a0 = self._logits - tf.reduce_max(self._logits, axis=-1, keepdims=True) + ea0 = tf.exp(a0) + z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True) + p0 = ea0 / z0 + return tf.reduce_sum(p0 * (tf.math.log(z0) - a0), axis=-1) + + +class DiagGaussian(Distribution): + """Creates a diagonal Gaussian distribution """ + + def __init__(self, ndim, mean_logstd=None): + """ + Args: + ndim (int): the dimenstion of actions + mean_logstd (tensor): mean and logstd stacked on the last axis + """ + self._ndim = ndim + self.mean = None + self.logstd = None + self.std = None + self.action_mean = None + self.action_scale = None + self.param = self.mean, self.logstd + if mean_logstd is not None: + self.set_param(mean_logstd) + + @property + def ndim(self): + return copy.copy(self._ndim) + + def set_param(self, mean_logstd): + """ + Args: + mean_logstd (tensor): mean and log std + """ + self.mean, self.logstd = mean_logstd + self.std = tf.math.exp(self.logstd) + self.param = self.mean, self.logstd + + def get_param(self): + """ Get parameters """ + return copy.deepcopy(self.mean), copy.deepcopy(self.logstd) + + def sample(self): + """ Get actions in deterministic or stochastic manner """ + return self.mean, self.std * np.random.normal(0, 1, np.shape(self.mean)) + + def greedy_sample(self): + """ Get actions greedily/deterministically """ + return self.mean + + def logp(self, x): + return -self.neglogp(x) + + @expand_dims + def neglogp(self, x): + # here we reverse the action normalization to make the computation of negative log probability correct + x = (x - self.action_mean)/self.action_scale + + return 0.5 * tf.reduce_sum(tf.square((x - self.mean) / self.std), axis=-1) \ + + 0.5 * np.log(2.0 * np.pi) * float(self._ndim) + tf.reduce_sum(self.logstd, axis=-1) + + @expand_dims + def kl(self, mean_logstd): + """ + Args: + mean_logstd (tensor): mean and logstd of another distribution + """ + mean, logstd = mean_logstd + return tf.reduce_sum( + logstd - self.logstd + + (tf.square(self.std) + tf.square(self.mean - mean)) + / (2.0 * tf.square(tf.math.exp(logstd))) - 0.5, axis=-1) + + @expand_dims + def entropy(self): + return tf.reduce_sum( + self.logstd + 0.5 * np.log(2.0 * np.pi * np.e), axis=-1) + + +def make_dist(ac_space): + """Get distribution based on action space + + :param ac_space: gym.spaces.Space + """ + if isinstance(ac_space, spaces.Discrete): + return Categorical(ac_space.n) + elif isinstance(ac_space, spaces.Box): + assert len(ac_space.shape) == 1 + return DiagGaussian(ac_space.shape[0]) + else: + raise NotImplementedError diff --git a/rlzoo/common/env_list.py b/rlzoo/common/env_list.py old mode 100644 new mode 100755 index 6be577b..540c343 --- a/rlzoo/common/env_list.py +++ b/rlzoo/common/env_list.py @@ -1,902 +1,902 @@ -def get_envlist(env_type): - """ get list of env names wrt the type of env """ - try: - l = all_env_list[env_type] - except: - print('Env Type {:s} Not Found!'.format(env_type)) - return l - - -all_env_list = { - ## Gym - # Atari - 'atari': ['AirRaid-v0', - 'AirRaid-v4', - 'AirRaidDeterministic-v0', - 'AirRaidDeterministic-v4', - 'AirRaidNoFrameskip-v0', - 'AirRaidNoFrameskip-v4', - 'AirRaid-ram-v0', - 'AirRaid-ram-v4', - 'AirRaid-ramDeterministic-v0', - 'AirRaid-ramDeterministic-v4', - 'AirRaid-ramNoFrameskip-v0', - 'AirRaid-ramNoFrameskip-v4', - 'Alien-v0', - 'Alien-v4', - 'AlienDeterministic-v0', - 'AlienDeterministic-v4', - 'AlienNoFrameskip-v0', - 'AlienNoFrameskip-v4', - 'Alien-ram-v0', - 'Alien-ram-v4', - 'Alien-ramDeterministic-v0', - 'Alien-ramDeterministic-v4', - 'Alien-ramNoFrameskip-v0', - 'Alien-ramNoFrameskip-v4', - 'Amidar-v0', - 'Amidar-v4', - 'AmidarDeterministic-v0', - 'AmidarDeterministic-v4', - 'AmidarNoFrameskip-v0', - 'AmidarNoFrameskip-v4', - 'Amidar-ram-v0', - 'Amidar-ram-v4', - 'Amidar-ramDeterministic-v0', - 'Amidar-ramDeterministic-v4', - 'Amidar-ramNoFrameskip-v0', - 'Amidar-ramNoFrameskip-v4', - 'Assault-v0', - 'Assault-v4', - 'AssaultDeterministic-v0', - 'AssaultDeterministic-v4', - 'AssaultNoFrameskip-v0', - 'AssaultNoFrameskip-v4', - 'Assault-ram-v0', - 'Assault-ram-v4', - 'Assault-ramDeterministic-v0', - 'Assault-ramDeterministic-v4', - 'Assault-ramNoFrameskip-v0', - 'Assault-ramNoFrameskip-v4', - 'Asterix-v0', - 'Asterix-v4', - 'AsterixDeterministic-v0', - 'AsterixDeterministic-v4', - 'AsterixNoFrameskip-v0', - 'AsterixNoFrameskip-v4', - 'Asterix-ram-v0', - 'Asterix-ram-v4', - 'Asterix-ramDeterministic-v0', - 'Asterix-ramDeterministic-v4', - 'Asterix-ramNoFrameskip-v0', - 'Asterix-ramNoFrameskip-v4', - 'Asteroids-v0', - 'Asteroids-v4', - 'AsteroidsDeterministic-v0', - 'AsteroidsDeterministic-v4', - 'AsteroidsNoFrameskip-v0', - 'AsteroidsNoFrameskip-v4', - 'Asteroids-ram-v0', - 'Asteroids-ram-v4', - 'Asteroids-ramDeterministic-v0', - 'Asteroids-ramDeterministic-v4', - 'Asteroids-ramNoFrameskip-v0', - 'Asteroids-ramNoFrameskip-v4', - 'Atlantis-v0', - 'Atlantis-v4', - 'AtlantisDeterministic-v0', - 'AtlantisDeterministic-v4', - 'AtlantisNoFrameskip-v0', - 'AtlantisNoFrameskip-v4', - 'Atlantis-ram-v0', - 'Atlantis-ram-v4', - 'Atlantis-ramDeterministic-v0', - 'Atlantis-ramDeterministic-v4', - 'Atlantis-ramNoFrameskip-v0', - 'Atlantis-ramNoFrameskip-v4', - 'BankHeist-v0', - 'BankHeist-v4', - 'BankHeistDeterministic-v0', - 'BankHeistDeterministic-v4', - 'BankHeistNoFrameskip-v0', - 'BankHeistNoFrameskip-v4', - 'BankHeist-ram-v0', - 'BankHeist-ram-v4', - 'BankHeist-ramDeterministic-v0', - 'BankHeist-ramDeterministic-v4', - 'BankHeist-ramNoFrameskip-v0', - 'BankHeist-ramNoFrameskip-v4', - 'BattleZone-v0', - 'BattleZone-v4', - 'BattleZoneDeterministic-v0', - 'BattleZoneDeterministic-v4', - 'BattleZoneNoFrameskip-v0', - 'BattleZoneNoFrameskip-v4', - 'BattleZone-ram-v0', - 'BattleZone-ram-v4', - 'BattleZone-ramDeterministic-v0', - 'BattleZone-ramDeterministic-v4', - 'BattleZone-ramNoFrameskip-v0', - 'BattleZone-ramNoFrameskip-v4', - 'BeamRider-v0', - 'BeamRider-v4', - 'BeamRiderDeterministic-v0', - 'BeamRiderDeterministic-v4', - 'BeamRiderNoFrameskip-v0', - 'BeamRiderNoFrameskip-v4', - 'BeamRider-ram-v0', - 'BeamRider-ram-v4', - 'BeamRider-ramDeterministic-v0', - 'BeamRider-ramDeterministic-v4', - 'BeamRider-ramNoFrameskip-v0', - 'BeamRider-ramNoFrameskip-v4', - 'Berzerk-v0', - 'Berzerk-v4', - 'BerzerkDeterministic-v0', - 'BerzerkDeterministic-v4', - 'BerzerkNoFrameskip-v0', - 'BerzerkNoFrameskip-v4', - 'Berzerk-ram-v0', - 'Berzerk-ram-v4', - 'Berzerk-ramDeterministic-v0', - 'Berzerk-ramDeterministic-v4', - 'Berzerk-ramNoFrameskip-v0', - 'Berzerk-ramNoFrameskip-v4', - 'Bowling-v0', - 'Bowling-v4', - 'BowlingDeterministic-v0', - 'BowlingDeterministic-v4', - 'BowlingNoFrameskip-v0', - 'BowlingNoFrameskip-v4', - 'Bowling-ram-v0', - 'Bowling-ram-v4', - 'Bowling-ramDeterministic-v0', - 'Bowling-ramDeterministic-v4', - 'Bowling-ramNoFrameskip-v0', - 'Bowling-ramNoFrameskip-v4', - 'Boxing-v0', - 'Boxing-v4', - 'BoxingDeterministic-v0', - 'BoxingDeterministic-v4', - 'BoxingNoFrameskip-v0', - 'BoxingNoFrameskip-v4', - 'Boxing-ram-v0', - 'Boxing-ram-v4', - 'Boxing-ramDeterministic-v0', - 'Boxing-ramDeterministic-v4', - 'Boxing-ramNoFrameskip-v0', - 'Boxing-ramNoFrameskip-v4', - 'Breakout-v0', - 'Breakout-v4', - 'BreakoutDeterministic-v0', - 'BreakoutDeterministic-v4', - 'BreakoutNoFrameskip-v0', - 'BreakoutNoFrameskip-v4', - 'Breakout-ram-v0', - 'Breakout-ram-v4', - 'Breakout-ramDeterministic-v0', - 'Breakout-ramDeterministic-v4', - 'Breakout-ramNoFrameskip-v0', - 'Breakout-ramNoFrameskip-v4', - 'Carnival-v0', - 'Carnival-v4', - 'CarnivalDeterministic-v0', - 'CarnivalDeterministic-v4', - 'CarnivalNoFrameskip-v0', - 'CarnivalNoFrameskip-v4', - 'Carnival-ram-v0', - 'Carnival-ram-v4', - 'Carnival-ramDeterministic-v0', - 'Carnival-ramDeterministic-v4', - 'Carnival-ramNoFrameskip-v0', - 'Carnival-ramNoFrameskip-v4', - 'Centipede-v0', - 'Centipede-v4', - 'CentipedeDeterministic-v0', - 'CentipedeDeterministic-v4', - 'CentipedeNoFrameskip-v0', - 'CentipedeNoFrameskip-v4', - 'Centipede-ram-v0', - 'Centipede-ram-v4', - 'Centipede-ramDeterministic-v0', - 'Centipede-ramDeterministic-v4', - 'Centipede-ramNoFrameskip-v0', - 'Centipede-ramNoFrameskip-v4', - 'ChopperCommand-v0', - 'ChopperCommand-v4', - 'ChopperCommandDeterministic-v0', - 'ChopperCommandDeterministic-v4', - 'ChopperCommandNoFrameskip-v0', - 'ChopperCommandNoFrameskip-v4', - 'ChopperCommand-ram-v0', - 'ChopperCommand-ram-v4', - 'ChopperCommand-ramDeterministic-v0', - 'ChopperCommand-ramDeterministic-v4', - 'ChopperCommand-ramNoFrameskip-v0', - 'ChopperCommand-ramNoFrameskip-v4', - 'CrazyClimber-v0', - 'CrazyClimber-v4', - 'CrazyClimberDeterministic-v0', - 'CrazyClimberDeterministic-v4', - 'CrazyClimberNoFrameskip-v0', - 'CrazyClimberNoFrameskip-v4', - 'CrazyClimber-ram-v0', - 'CrazyClimber-ram-v4', - 'CrazyClimber-ramDeterministic-v0', - 'CrazyClimber-ramDeterministic-v4', - 'CrazyClimber-ramNoFrameskip-v0', - 'CrazyClimber-ramNoFrameskip-v4', - 'DemonAttack-v0', - 'DemonAttack-v4', - 'DemonAttackDeterministic-v0', - 'DemonAttackDeterministic-v4', - 'DemonAttackNoFrameskip-v0', - 'DemonAttackNoFrameskip-v4', - 'DemonAttack-ram-v0', - 'DemonAttack-ram-v4', - 'DemonAttack-ramDeterministic-v0', - 'DemonAttack-ramDeterministic-v4', - 'DemonAttack-ramNoFrameskip-v0', - 'DemonAttack-ramNoFrameskip-v4', - 'DoubleDunk-v0', - 'DoubleDunk-v4', - 'DoubleDunkDeterministic-v0', - 'DoubleDunkDeterministic-v4', - 'DoubleDunkNoFrameskip-v0', - 'DoubleDunkNoFrameskip-v4', - 'DoubleDunk-ram-v0', - 'DoubleDunk-ram-v4', - 'DoubleDunk-ramDeterministic-v0', - 'DoubleDunk-ramDeterministic-v4', - 'DoubleDunk-ramNoFrameskip-v0', - 'DoubleDunk-ramNoFrameskip-v4', - 'ElevatorAction-v0', - 'ElevatorAction-v4', - 'ElevatorActionDeterministic-v0', - 'ElevatorActionDeterministic-v4', - 'ElevatorActionNoFrameskip-v0', - 'ElevatorActionNoFrameskip-v4', - 'ElevatorAction-ram-v0', - 'ElevatorAction-ram-v4', - 'ElevatorAction-ramDeterministic-v0', - 'ElevatorAction-ramDeterministic-v4', - 'ElevatorAction-ramNoFrameskip-v0', - 'ElevatorAction-ramNoFrameskip-v4', - 'Enduro-v0', - 'Enduro-v4', - 'EnduroDeterministic-v0', - 'EnduroDeterministic-v4', - 'EnduroNoFrameskip-v0', - 'EnduroNoFrameskip-v4', - 'Enduro-ram-v0', - 'Enduro-ram-v4', - 'Enduro-ramDeterministic-v0', - 'Enduro-ramDeterministic-v4', - 'Enduro-ramNoFrameskip-v0', - 'Enduro-ramNoFrameskip-v4', - 'FishingDerby-v0', - 'FishingDerby-v4', - 'FishingDerbyDeterministic-v0', - 'FishingDerbyDeterministic-v4', - 'FishingDerbyNoFrameskip-v0', - 'FishingDerbyNoFrameskip-v4', - 'FishingDerby-ram-v0', - 'FishingDerby-ram-v4', - 'FishingDerby-ramDeterministic-v0', - 'FishingDerby-ramDeterministic-v4', - 'FishingDerby-ramNoFrameskip-v0', - 'FishingDerby-ramNoFrameskip-v4', - 'Freeway-v0', - 'Freeway-v4', - 'FreewayDeterministic-v0', - 'FreewayDeterministic-v4', - 'FreewayNoFrameskip-v0', - 'FreewayNoFrameskip-v4', - 'Freeway-ram-v0', - 'Freeway-ram-v4', - 'Freeway-ramDeterministic-v0', - 'Freeway-ramDeterministic-v4', - 'Freeway-ramNoFrameskip-v0', - 'Freeway-ramNoFrameskip-v4', - 'Frostbite-v0', - 'Frostbite-v4', - 'FrostbiteDeterministic-v0', - 'FrostbiteDeterministic-v4', - 'FrostbiteNoFrameskip-v0', - 'FrostbiteNoFrameskip-v4', - 'Frostbite-ram-v0', - 'Frostbite-ram-v4', - 'Frostbite-ramDeterministic-v0', - 'Frostbite-ramDeterministic-v4', - 'Frostbite-ramNoFrameskip-v0', - 'Frostbite-ramNoFrameskip-v4', - 'Gopher-v0', - 'Gopher-v4', - 'GopherDeterministic-v0', - 'GopherDeterministic-v4', - 'GopherNoFrameskip-v0', - 'GopherNoFrameskip-v4', - 'Gopher-ram-v0', - 'Gopher-ram-v4', - 'Gopher-ramDeterministic-v0', - 'Gopher-ramDeterministic-v4', - 'Gopher-ramNoFrameskip-v0', - 'Gopher-ramNoFrameskip-v4', - 'Gravitar-v0', - 'Gravitar-v4', - 'GravitarDeterministic-v0', - 'GravitarDeterministic-v4', - 'GravitarNoFrameskip-v0', - 'GravitarNoFrameskip-v4', - 'Gravitar-ram-v0', - 'Gravitar-ram-v4', - 'Gravitar-ramDeterministic-v0', - 'Gravitar-ramDeterministic-v4', - 'Gravitar-ramNoFrameskip-v0', - 'Gravitar-ramNoFrameskip-v4', - 'Hero-v0', - 'Hero-v4', - 'HeroDeterministic-v0', - 'HeroDeterministic-v4', - 'HeroNoFrameskip-v0', - 'HeroNoFrameskip-v4', - 'Hero-ram-v0', - 'Hero-ram-v4', - 'Hero-ramDeterministic-v0', - 'Hero-ramDeterministic-v4', - 'Hero-ramNoFrameskip-v0', - 'Hero-ramNoFrameskip-v4', - 'IceHockey-v0', - 'IceHockey-v4', - 'IceHockeyDeterministic-v0', - 'IceHockeyDeterministic-v4', - 'IceHockeyNoFrameskip-v0', - 'IceHockeyNoFrameskip-v4', - 'IceHockey-ram-v0', - 'IceHockey-ram-v4', - 'IceHockey-ramDeterministic-v0', - 'IceHockey-ramDeterministic-v4', - 'IceHockey-ramNoFrameskip-v0', - 'IceHockey-ramNoFrameskip-v4', - 'Jamesbond-v0', - 'Jamesbond-v4', - 'JamesbondDeterministic-v0', - 'JamesbondDeterministic-v4', - 'JamesbondNoFrameskip-v0', - 'JamesbondNoFrameskip-v4', - 'Jamesbond-ram-v0', - 'Jamesbond-ram-v4', - 'Jamesbond-ramDeterministic-v0', - 'Jamesbond-ramDeterministic-v4', - 'Jamesbond-ramNoFrameskip-v0', - 'Jamesbond-ramNoFrameskip-v4', - 'JourneyEscape-v0', - 'JourneyEscape-v4', - 'JourneyEscapeDeterministic-v0', - 'JourneyEscapeDeterministic-v4', - 'JourneyEscapeNoFrameskip-v0', - 'JourneyEscapeNoFrameskip-v4', - 'JourneyEscape-ram-v0', - 'JourneyEscape-ram-v4', - 'JourneyEscape-ramDeterministic-v0', - 'JourneyEscape-ramDeterministic-v4', - 'JourneyEscape-ramNoFrameskip-v0', - 'JourneyEscape-ramNoFrameskip-v4', - 'Kangaroo-v0', - 'Kangaroo-v4', - 'KangarooDeterministic-v0', - 'KangarooDeterministic-v4', - 'KangarooNoFrameskip-v0', - 'KangarooNoFrameskip-v4', - 'Kangaroo-ram-v0', - 'Kangaroo-ram-v4', - 'Kangaroo-ramDeterministic-v0', - 'Kangaroo-ramDeterministic-v4', - 'Kangaroo-ramNoFrameskip-v0', - 'Kangaroo-ramNoFrameskip-v4', - 'Krull-v0', - 'Krull-v4', - 'KrullDeterministic-v0', - 'KrullDeterministic-v4', - 'KrullNoFrameskip-v0', - 'KrullNoFrameskip-v4', - 'Krull-ram-v0', - 'Krull-ram-v4', - 'Krull-ramDeterministic-v0', - 'Krull-ramDeterministic-v4', - 'Krull-ramNoFrameskip-v0', - 'Krull-ramNoFrameskip-v4', - 'KungFuMaster-v0', - 'KungFuMaster-v4', - 'KungFuMasterDeterministic-v0', - 'KungFuMasterDeterministic-v4', - 'KungFuMasterNoFrameskip-v0', - 'KungFuMasterNoFrameskip-v4', - 'KungFuMaster-ram-v0', - 'KungFuMaster-ram-v4', - 'KungFuMaster-ramDeterministic-v0', - 'KungFuMaster-ramDeterministic-v4', - 'KungFuMaster-ramNoFrameskip-v0', - 'KungFuMaster-ramNoFrameskip-v4', - 'MontezumaRevenge-v0', - 'MontezumaRevenge-v4', - 'MontezumaRevengeDeterministic-v0', - 'MontezumaRevengeDeterministic-v4', - 'MontezumaRevengeNoFrameskip-v0', - 'MontezumaRevengeNoFrameskip-v4', - 'MontezumaRevenge-ram-v0', - 'MontezumaRevenge-ram-v4', - 'MontezumaRevenge-ramDeterministic-v0', - 'MontezumaRevenge-ramDeterministic-v4', - 'MontezumaRevenge-ramNoFrameskip-v0', - 'MontezumaRevenge-ramNoFrameskip-v4', - 'MsPacman-v0', - 'MsPacman-v4', - 'MsPacmanDeterministic-v0', - 'MsPacmanDeterministic-v4', - 'MsPacmanNoFrameskip-v0', - 'MsPacmanNoFrameskip-v4', - 'MsPacman-ram-v0', - 'MsPacman-ram-v4', - 'MsPacman-ramDeterministic-v0', - 'MsPacman-ramDeterministic-v4', - 'MsPacman-ramNoFrameskip-v0', - 'MsPacman-ramNoFrameskip-v4', - 'NameThisGame-v0', - 'NameThisGame-v4', - 'NameThisGameDeterministic-v0', - 'NameThisGameDeterministic-v4', - 'NameThisGameNoFrameskip-v0', - 'NameThisGameNoFrameskip-v4', - 'NameThisGame-ram-v0', - 'NameThisGame-ram-v4', - 'NameThisGame-ramDeterministic-v0', - 'NameThisGame-ramDeterministic-v4', - 'NameThisGame-ramNoFrameskip-v0', - 'NameThisGame-ramNoFrameskip-v4', - 'Phoenix-v0', - 'Phoenix-v4', - 'PhoenixDeterministic-v0', - 'PhoenixDeterministic-v4', - 'PhoenixNoFrameskip-v0', - 'PhoenixNoFrameskip-v4', - 'Phoenix-ram-v0', - 'Phoenix-ram-v4', - 'Phoenix-ramDeterministic-v0', - 'Phoenix-ramDeterministic-v4', - 'Phoenix-ramNoFrameskip-v0', - 'Phoenix-ramNoFrameskip-v4', - 'Pitfall-v0', - 'Pitfall-v4', - 'PitfallDeterministic-v0', - 'PitfallDeterministic-v4', - 'PitfallNoFrameskip-v0', - 'PitfallNoFrameskip-v4', - 'Pitfall-ram-v0', - 'Pitfall-ram-v4', - 'Pitfall-ramDeterministic-v0', - 'Pitfall-ramDeterministic-v4', - 'Pitfall-ramNoFrameskip-v0', - 'Pitfall-ramNoFrameskip-v4', - 'Pong-v0', - 'Pong-v4', - 'PongDeterministic-v0', - 'PongDeterministic-v4', - 'PongNoFrameskip-v0', - 'PongNoFrameskip-v4', - 'Pong-ram-v0', - 'Pong-ram-v4', - 'Pong-ramDeterministic-v0', - 'Pong-ramDeterministic-v4', - 'Pong-ramNoFrameskip-v0', - 'Pong-ramNoFrameskip-v4', - 'Pooyan-v0', - 'Pooyan-v4', - 'PooyanDeterministic-v0', - 'PooyanDeterministic-v4', - 'PooyanNoFrameskip-v0', - 'PooyanNoFrameskip-v4', - 'Pooyan-ram-v0', - 'Pooyan-ram-v4', - 'Pooyan-ramDeterministic-v0', - 'Pooyan-ramDeterministic-v4', - 'Pooyan-ramNoFrameskip-v0', - 'Pooyan-ramNoFrameskip-v4', - 'PrivateEye-v0', - 'PrivateEye-v4', - 'PrivateEyeDeterministic-v0', - 'PrivateEyeDeterministic-v4', - 'PrivateEyeNoFrameskip-v0', - 'PrivateEyeNoFrameskip-v4', - 'PrivateEye-ram-v0', - 'PrivateEye-ram-v4', - 'PrivateEye-ramDeterministic-v0', - 'PrivateEye-ramDeterministic-v4', - 'PrivateEye-ramNoFrameskip-v0', - 'PrivateEye-ramNoFrameskip-v4', - 'Qbert-v0', - 'Qbert-v4', - 'QbertDeterministic-v0', - 'QbertDeterministic-v4', - 'QbertNoFrameskip-v0', - 'QbertNoFrameskip-v4', - 'Qbert-ram-v0', - 'Qbert-ram-v4', - 'Qbert-ramDeterministic-v0', - 'Qbert-ramDeterministic-v4', - 'Qbert-ramNoFrameskip-v0', - 'Qbert-ramNoFrameskip-v4', - 'Riverraid-v0', - 'Riverraid-v4', - 'RiverraidDeterministic-v0', - 'RiverraidDeterministic-v4', - 'RiverraidNoFrameskip-v0', - 'RiverraidNoFrameskip-v4', - 'Riverraid-ram-v0', - 'Riverraid-ram-v4', - 'Riverraid-ramDeterministic-v0', - 'Riverraid-ramDeterministic-v4', - 'Riverraid-ramNoFrameskip-v0', - 'Riverraid-ramNoFrameskip-v4', - 'RoadRunner-v0', - 'RoadRunner-v4', - 'RoadRunnerDeterministic-v0', - 'RoadRunnerDeterministic-v4', - 'RoadRunnerNoFrameskip-v0', - 'RoadRunnerNoFrameskip-v4', - 'RoadRunner-ram-v0', - 'RoadRunner-ram-v4', - 'RoadRunner-ramDeterministic-v0', - 'RoadRunner-ramDeterministic-v4', - 'RoadRunner-ramNoFrameskip-v0', - 'RoadRunner-ramNoFrameskip-v4', - 'Robotank-v0', - 'Robotank-v4', - 'RobotankDeterministic-v0', - 'RobotankDeterministic-v4', - 'RobotankNoFrameskip-v0', - 'RobotankNoFrameskip-v4', - 'Robotank-ram-v0', - 'Robotank-ram-v4', - 'Robotank-ramDeterministic-v0', - 'Robotank-ramDeterministic-v4', - 'Robotank-ramNoFrameskip-v0', - 'Robotank-ramNoFrameskip-v4', - 'Seaquest-v0', - 'Seaquest-v4', - 'SeaquestDeterministic-v0', - 'SeaquestDeterministic-v4', - 'SeaquestNoFrameskip-v0', - 'SeaquestNoFrameskip-v4', - 'Seaquest-ram-v0', - 'Seaquest-ram-v4', - 'Seaquest-ramDeterministic-v0', - 'Seaquest-ramDeterministic-v4', - 'Seaquest-ramNoFrameskip-v0', - 'Seaquest-ramNoFrameskip-v4', - 'Skiing-v0', - 'Skiing-v4', - 'SkiingDeterministic-v0', - 'SkiingDeterministic-v4', - 'SkiingNoFrameskip-v0', - 'SkiingNoFrameskip-v4', - 'Skiing-ram-v0', - 'Skiing-ram-v4', - 'Skiing-ramDeterministic-v0', - 'Skiing-ramDeterministic-v4', - 'Skiing-ramNoFrameskip-v0', - 'Skiing-ramNoFrameskip-v4', - 'Solaris-v0', - 'Solaris-v4', - 'SolarisDeterministic-v0', - 'SolarisDeterministic-v4', - 'SolarisNoFrameskip-v0', - 'SolarisNoFrameskip-v4', - 'Solaris-ram-v0', - 'Solaris-ram-v4', - 'Solaris-ramDeterministic-v0', - 'Solaris-ramDeterministic-v4', - 'Solaris-ramNoFrameskip-v0', - 'Solaris-ramNoFrameskip-v4', - 'SpaceInvaders-v0', - 'SpaceInvaders-v4', - 'SpaceInvadersDeterministic-v0', - 'SpaceInvadersDeterministic-v4', - 'SpaceInvadersNoFrameskip-v0', - 'SpaceInvadersNoFrameskip-v4', - 'SpaceInvaders-ram-v0', - 'SpaceInvaders-ram-v4', - 'SpaceInvaders-ramDeterministic-v0', - 'SpaceInvaders-ramDeterministic-v4', - 'SpaceInvaders-ramNoFrameskip-v0', - 'SpaceInvaders-ramNoFrameskip-v4', - 'StarGunner-v0', - 'StarGunner-v4', - 'StarGunnerDeterministic-v0', - 'StarGunnerDeterministic-v4', - 'StarGunnerNoFrameskip-v0', - 'StarGunnerNoFrameskip-v4', - 'StarGunner-ram-v0', - 'StarGunner-ram-v4', - 'StarGunner-ramDeterministic-v0', - 'StarGunner-ramDeterministic-v4', - 'StarGunner-ramNoFrameskip-v0', - 'StarGunner-ramNoFrameskip-v4', - 'Tennis-v0', - 'Tennis-v4', - 'TennisDeterministic-v0', - 'TennisDeterministic-v4', - 'TennisNoFrameskip-v0', - 'TennisNoFrameskip-v4', - 'Tennis-ram-v0', - 'Tennis-ram-v4', - 'Tennis-ramDeterministic-v0', - 'Tennis-ramDeterministic-v4', - 'Tennis-ramNoFrameskip-v0', - 'Tennis-ramNoFrameskip-v4', - 'TimePilot-v0', - 'TimePilot-v4', - 'TimePilotDeterministic-v0', - 'TimePilotDeterministic-v4', - 'TimePilotNoFrameskip-v0', - 'TimePilotNoFrameskip-v4', - 'TimePilot-ram-v0', - 'TimePilot-ram-v4', - 'TimePilot-ramDeterministic-v0', - 'TimePilot-ramDeterministic-v4', - 'TimePilot-ramNoFrameskip-v0', - 'TimePilot-ramNoFrameskip-v4', - 'Tutankham-v0', - 'Tutankham-v4', - 'TutankhamDeterministic-v0', - 'TutankhamDeterministic-v4', - 'TutankhamNoFrameskip-v0', - 'TutankhamNoFrameskip-v4', - 'Tutankham-ram-v0', - 'Tutankham-ram-v4', - 'Tutankham-ramDeterministic-v0', - 'Tutankham-ramDeterministic-v4', - 'Tutankham-ramNoFrameskip-v0', - 'Tutankham-ramNoFrameskip-v4', - 'UpNDown-v0', - 'UpNDown-v4', - 'UpNDownDeterministic-v0', - 'UpNDownDeterministic-v4', - 'UpNDownNoFrameskip-v0', - 'UpNDownNoFrameskip-v4', - 'UpNDown-ram-v0', - 'UpNDown-ram-v4', - 'UpNDown-ramDeterministic-v0', - 'UpNDown-ramDeterministic-v4', - 'UpNDown-ramNoFrameskip-v0', - 'UpNDown-ramNoFrameskip-v4', - 'Venture-v0', - 'Venture-v4', - 'VentureDeterministic-v0', - 'VentureDeterministic-v4', - 'VentureNoFrameskip-v0', - 'VentureNoFrameskip-v4', - 'Venture-ram-v0', - 'Venture-ram-v4', - 'Venture-ramDeterministic-v0', - 'Venture-ramDeterministic-v4', - 'Venture-ramNoFrameskip-v0', - 'Venture-ramNoFrameskip-v4', - 'VideoPinball-v0', - 'VideoPinball-v4', - 'VideoPinballDeterministic-v0', - 'VideoPinballDeterministic-v4', - 'VideoPinballNoFrameskip-v0', - 'VideoPinballNoFrameskip-v4', - 'VideoPinball-ram-v0', - 'VideoPinball-ram-v4', - 'VideoPinball-ramDeterministic-v0', - 'VideoPinball-ramDeterministic-v4', - 'VideoPinball-ramNoFrameskip-v0', - 'VideoPinball-ramNoFrameskip-v4', - 'WizardOfWor-v0', - 'WizardOfWor-v4', - 'WizardOfWorDeterministic-v0', - 'WizardOfWorDeterministic-v4', - 'WizardOfWorNoFrameskip-v0', - 'WizardOfWorNoFrameskip-v4', - 'WizardOfWor-ram-v0', - 'WizardOfWor-ram-v4', - 'WizardOfWor-ramDeterministic-v0', - 'WizardOfWor-ramDeterministic-v4', - 'WizardOfWor-ramNoFrameskip-v0', - 'WizardOfWor-ramNoFrameskip-v4', - 'YarsRevenge-v0', - 'YarsRevenge-v4', - 'YarsRevengeDeterministic-v0', - 'YarsRevengeDeterministic-v4', - 'YarsRevengeNoFrameskip-v0', - 'YarsRevengeNoFrameskip-v4', - 'YarsRevenge-ram-v0', - 'YarsRevenge-ram-v4', - 'YarsRevenge-ramDeterministic-v0', - 'YarsRevenge-ramDeterministic-v4', - 'YarsRevenge-ramNoFrameskip-v0', - 'YarsRevenge-ramNoFrameskip-v4', - 'Zaxxon-v0', - 'Zaxxon-v4', - 'ZaxxonDeterministic-v0', - 'ZaxxonDeterministic-v4', - 'ZaxxonNoFrameskip-v0', - 'ZaxxonNoFrameskip-v4', - 'Zaxxon-ram-v0', - 'Zaxxon-ram-v4', - 'Zaxxon-ramDeterministic-v0', - 'Zaxxon-ramDeterministic-v4', - 'Zaxxon-ramNoFrameskip-v0', - 'Zaxxon-ramNoFrameskip-v4'], - - # Classic control - 'classic_control': [ - 'Acrobot-v1', - 'CartPole-v1', - 'CartPole-v0', - 'MountainCar-v0', - 'MountainCarContinuous-v0', - 'Pendulum-v0' - ], - - # Box2D - 'box2d': [ - 'BipedalWalker-v2', - 'BipedalWalkerHardcore-v2', - 'CarRacing-v0', - 'LunarLander-v2', - 'LunarLanderContinuous-v2' - ], - - # MuJoCo - 'mujoco': [ - 'Ant-v2', - 'HalfCheetah-v2', - 'Hopper-v2', - 'Humanoid-v2', - 'HumanoidStandup-v2', - 'InvertedDoublePendulum-v2', - 'InvertedPendulum-v2', - 'Reacher-v2', - 'Swimmer-v2', - 'Walker2d-v2' - ], - - # Robotics - 'robotics': [ - 'FetchPickAndPlace-v1', - 'FetchPush-v1', - 'FetchReach-v1', - 'FetchSlide-v1', - 'HandManipulateBlock-v0', - 'HandManipulateEgg-v0', - 'HandManipulatePen-v0', - 'HandReach-v0' - ], - - ## Deepmind Control Suite (need check!) - 'dm_control': [ - 'AcrobotSparse-v0', - 'BallincupCatch-v0', - 'CartpoleSwingup-v0', - 'FingerTurn-v0', - 'FishSwim-v0', - 'CheetahRun-v0', - 'HopperHop-v0', - 'HumanoidStand-v0', - 'HumanoidWalk-v0', - 'HumanoidRun-v0', - 'ManipulatorBringball-v0', - 'PendulumSwingup-v0', - 'Pointmass-v0', - 'ReacherHard-v0', - 'Swimmer-v0', - 'WalkerRun-v0' - ], - - ## RLBench - 'rlbench': [ - 'BeatTheBuzz', - 'BlockPyramid', - 'ChangeChannel', - 'ChangeClock', - 'CloseBox', - 'CloseDoor', - 'CloseDrawer', - 'CloseFridge', - 'CloseGrill', - 'CloseJar', - 'CloseLaptopLid', - 'CloseMicrowave', - 'EmptyContainer', - 'EmptyDishwasher', - 'GetIceFromFridge', - 'HangFrameOnHanger', - 'HannoiSquare', - 'HitBallWithQueue', - 'Hockey', - 'InsertUsbInComputer', - 'LampOff', - 'LampOn', - 'LightBulbIn', - 'LightBulbOut', - 'MeatOffGrill', - 'MeatOnGrill', - 'MoveHanger', - 'OpenBox', - 'OpenDoor', - 'OpenDrawer', - 'OpenFridge', - 'OpenGrill', - 'OpenJar', - 'OpenMicrowave', - 'OpenOven', - 'OpenWindow', - 'OpenWineBottle', - 'PhoneOnBase', - 'PickAndLift', - 'PickUpCup', - 'PlaceCups', - 'PlaceHangerOnRack', - 'PlaceShapeInShapeSorter', - 'PlayJenga', - 'PlugChargerInPowerSupply', - 'PourFromCupToCup', - 'PressSwitch', - 'PushButton', - 'PushButtons', - 'PutBooksOnBookshelf', - 'PutBottleInFridge', - 'PutGroceriesInCupboard', - 'PutItemInDrawer', - 'PutKnifeInKnifeBlock', - 'PutKnifeOnChoppingBoard', - 'PutMoneyInSafe', - 'PutPlateInColoredDishRack', - 'PutRubbishInBin', - 'PutShoesInBox', - 'PutToiletRollOnStand', - 'PutTrayInOven', - 'PutUmbrellaInUmbrellaStand', - 'ReachAndDrag', - 'ReachTarget', - 'RemoveCups', - 'ScoopWithSpatula', - 'ScrewNail', - 'SetTheTable', - 'SetupCheckers', - 'SlideBlockToTarget', - 'SlideCabinetOpen', - 'SlideCabinetOpenAndPlaceCups', - 'SolvePuzzle', - 'StackBlocks', - 'StackCups', - 'StackWine', - 'StraightenRope', - 'SweepToDustpan', - 'TakeCupOutFromCabinet', - 'TakeFrameOffHanger', - 'TakeItemOutOfDrawer', - 'TakeLidOffSaucepan', - 'TakeMoneyOutSafe', - 'TakeOffWeighingScales', - 'TakePlateOffColoredDishRack', - 'TakeShoesOutOfBox', - 'TakeToiletRollOffStand', - 'TakeTrayOutOfOven', - 'TakeUmbrellaOutOfUmbrellaStand', - 'TakeUsbOutOfComputer', - 'ToiletSeatDown', - 'ToiletSeatUp', - 'TurnOvenOn', - 'TurnTap', - 'TvOff', - 'TvOn', - 'UnplugCharger', - 'WaterPlants', - 'WeighingScales', - 'WipeDesk' - ] -} +def get_envlist(env_type): + """ get list of env names wrt the type of env """ + try: + l = all_env_list[env_type] + except: + print('Env Type {:s} Not Found!'.format(env_type)) + return l + + +all_env_list = { + ## Gym + # Atari + 'atari': ['AirRaid-v0', + 'AirRaid-v4', + 'AirRaidDeterministic-v0', + 'AirRaidDeterministic-v4', + 'AirRaidNoFrameskip-v0', + 'AirRaidNoFrameskip-v4', + 'AirRaid-ram-v0', + 'AirRaid-ram-v4', + 'AirRaid-ramDeterministic-v0', + 'AirRaid-ramDeterministic-v4', + 'AirRaid-ramNoFrameskip-v0', + 'AirRaid-ramNoFrameskip-v4', + 'Alien-v0', + 'Alien-v4', + 'AlienDeterministic-v0', + 'AlienDeterministic-v4', + 'AlienNoFrameskip-v0', + 'AlienNoFrameskip-v4', + 'Alien-ram-v0', + 'Alien-ram-v4', + 'Alien-ramDeterministic-v0', + 'Alien-ramDeterministic-v4', + 'Alien-ramNoFrameskip-v0', + 'Alien-ramNoFrameskip-v4', + 'Amidar-v0', + 'Amidar-v4', + 'AmidarDeterministic-v0', + 'AmidarDeterministic-v4', + 'AmidarNoFrameskip-v0', + 'AmidarNoFrameskip-v4', + 'Amidar-ram-v0', + 'Amidar-ram-v4', + 'Amidar-ramDeterministic-v0', + 'Amidar-ramDeterministic-v4', + 'Amidar-ramNoFrameskip-v0', + 'Amidar-ramNoFrameskip-v4', + 'Assault-v0', + 'Assault-v4', + 'AssaultDeterministic-v0', + 'AssaultDeterministic-v4', + 'AssaultNoFrameskip-v0', + 'AssaultNoFrameskip-v4', + 'Assault-ram-v0', + 'Assault-ram-v4', + 'Assault-ramDeterministic-v0', + 'Assault-ramDeterministic-v4', + 'Assault-ramNoFrameskip-v0', + 'Assault-ramNoFrameskip-v4', + 'Asterix-v0', + 'Asterix-v4', + 'AsterixDeterministic-v0', + 'AsterixDeterministic-v4', + 'AsterixNoFrameskip-v0', + 'AsterixNoFrameskip-v4', + 'Asterix-ram-v0', + 'Asterix-ram-v4', + 'Asterix-ramDeterministic-v0', + 'Asterix-ramDeterministic-v4', + 'Asterix-ramNoFrameskip-v0', + 'Asterix-ramNoFrameskip-v4', + 'Asteroids-v0', + 'Asteroids-v4', + 'AsteroidsDeterministic-v0', + 'AsteroidsDeterministic-v4', + 'AsteroidsNoFrameskip-v0', + 'AsteroidsNoFrameskip-v4', + 'Asteroids-ram-v0', + 'Asteroids-ram-v4', + 'Asteroids-ramDeterministic-v0', + 'Asteroids-ramDeterministic-v4', + 'Asteroids-ramNoFrameskip-v0', + 'Asteroids-ramNoFrameskip-v4', + 'Atlantis-v0', + 'Atlantis-v4', + 'AtlantisDeterministic-v0', + 'AtlantisDeterministic-v4', + 'AtlantisNoFrameskip-v0', + 'AtlantisNoFrameskip-v4', + 'Atlantis-ram-v0', + 'Atlantis-ram-v4', + 'Atlantis-ramDeterministic-v0', + 'Atlantis-ramDeterministic-v4', + 'Atlantis-ramNoFrameskip-v0', + 'Atlantis-ramNoFrameskip-v4', + 'BankHeist-v0', + 'BankHeist-v4', + 'BankHeistDeterministic-v0', + 'BankHeistDeterministic-v4', + 'BankHeistNoFrameskip-v0', + 'BankHeistNoFrameskip-v4', + 'BankHeist-ram-v0', + 'BankHeist-ram-v4', + 'BankHeist-ramDeterministic-v0', + 'BankHeist-ramDeterministic-v4', + 'BankHeist-ramNoFrameskip-v0', + 'BankHeist-ramNoFrameskip-v4', + 'BattleZone-v0', + 'BattleZone-v4', + 'BattleZoneDeterministic-v0', + 'BattleZoneDeterministic-v4', + 'BattleZoneNoFrameskip-v0', + 'BattleZoneNoFrameskip-v4', + 'BattleZone-ram-v0', + 'BattleZone-ram-v4', + 'BattleZone-ramDeterministic-v0', + 'BattleZone-ramDeterministic-v4', + 'BattleZone-ramNoFrameskip-v0', + 'BattleZone-ramNoFrameskip-v4', + 'BeamRider-v0', + 'BeamRider-v4', + 'BeamRiderDeterministic-v0', + 'BeamRiderDeterministic-v4', + 'BeamRiderNoFrameskip-v0', + 'BeamRiderNoFrameskip-v4', + 'BeamRider-ram-v0', + 'BeamRider-ram-v4', + 'BeamRider-ramDeterministic-v0', + 'BeamRider-ramDeterministic-v4', + 'BeamRider-ramNoFrameskip-v0', + 'BeamRider-ramNoFrameskip-v4', + 'Berzerk-v0', + 'Berzerk-v4', + 'BerzerkDeterministic-v0', + 'BerzerkDeterministic-v4', + 'BerzerkNoFrameskip-v0', + 'BerzerkNoFrameskip-v4', + 'Berzerk-ram-v0', + 'Berzerk-ram-v4', + 'Berzerk-ramDeterministic-v0', + 'Berzerk-ramDeterministic-v4', + 'Berzerk-ramNoFrameskip-v0', + 'Berzerk-ramNoFrameskip-v4', + 'Bowling-v0', + 'Bowling-v4', + 'BowlingDeterministic-v0', + 'BowlingDeterministic-v4', + 'BowlingNoFrameskip-v0', + 'BowlingNoFrameskip-v4', + 'Bowling-ram-v0', + 'Bowling-ram-v4', + 'Bowling-ramDeterministic-v0', + 'Bowling-ramDeterministic-v4', + 'Bowling-ramNoFrameskip-v0', + 'Bowling-ramNoFrameskip-v4', + 'Boxing-v0', + 'Boxing-v4', + 'BoxingDeterministic-v0', + 'BoxingDeterministic-v4', + 'BoxingNoFrameskip-v0', + 'BoxingNoFrameskip-v4', + 'Boxing-ram-v0', + 'Boxing-ram-v4', + 'Boxing-ramDeterministic-v0', + 'Boxing-ramDeterministic-v4', + 'Boxing-ramNoFrameskip-v0', + 'Boxing-ramNoFrameskip-v4', + 'Breakout-v0', + 'Breakout-v4', + 'BreakoutDeterministic-v0', + 'BreakoutDeterministic-v4', + 'BreakoutNoFrameskip-v0', + 'BreakoutNoFrameskip-v4', + 'Breakout-ram-v0', + 'Breakout-ram-v4', + 'Breakout-ramDeterministic-v0', + 'Breakout-ramDeterministic-v4', + 'Breakout-ramNoFrameskip-v0', + 'Breakout-ramNoFrameskip-v4', + 'Carnival-v0', + 'Carnival-v4', + 'CarnivalDeterministic-v0', + 'CarnivalDeterministic-v4', + 'CarnivalNoFrameskip-v0', + 'CarnivalNoFrameskip-v4', + 'Carnival-ram-v0', + 'Carnival-ram-v4', + 'Carnival-ramDeterministic-v0', + 'Carnival-ramDeterministic-v4', + 'Carnival-ramNoFrameskip-v0', + 'Carnival-ramNoFrameskip-v4', + 'Centipede-v0', + 'Centipede-v4', + 'CentipedeDeterministic-v0', + 'CentipedeDeterministic-v4', + 'CentipedeNoFrameskip-v0', + 'CentipedeNoFrameskip-v4', + 'Centipede-ram-v0', + 'Centipede-ram-v4', + 'Centipede-ramDeterministic-v0', + 'Centipede-ramDeterministic-v4', + 'Centipede-ramNoFrameskip-v0', + 'Centipede-ramNoFrameskip-v4', + 'ChopperCommand-v0', + 'ChopperCommand-v4', + 'ChopperCommandDeterministic-v0', + 'ChopperCommandDeterministic-v4', + 'ChopperCommandNoFrameskip-v0', + 'ChopperCommandNoFrameskip-v4', + 'ChopperCommand-ram-v0', + 'ChopperCommand-ram-v4', + 'ChopperCommand-ramDeterministic-v0', + 'ChopperCommand-ramDeterministic-v4', + 'ChopperCommand-ramNoFrameskip-v0', + 'ChopperCommand-ramNoFrameskip-v4', + 'CrazyClimber-v0', + 'CrazyClimber-v4', + 'CrazyClimberDeterministic-v0', + 'CrazyClimberDeterministic-v4', + 'CrazyClimberNoFrameskip-v0', + 'CrazyClimberNoFrameskip-v4', + 'CrazyClimber-ram-v0', + 'CrazyClimber-ram-v4', + 'CrazyClimber-ramDeterministic-v0', + 'CrazyClimber-ramDeterministic-v4', + 'CrazyClimber-ramNoFrameskip-v0', + 'CrazyClimber-ramNoFrameskip-v4', + 'DemonAttack-v0', + 'DemonAttack-v4', + 'DemonAttackDeterministic-v0', + 'DemonAttackDeterministic-v4', + 'DemonAttackNoFrameskip-v0', + 'DemonAttackNoFrameskip-v4', + 'DemonAttack-ram-v0', + 'DemonAttack-ram-v4', + 'DemonAttack-ramDeterministic-v0', + 'DemonAttack-ramDeterministic-v4', + 'DemonAttack-ramNoFrameskip-v0', + 'DemonAttack-ramNoFrameskip-v4', + 'DoubleDunk-v0', + 'DoubleDunk-v4', + 'DoubleDunkDeterministic-v0', + 'DoubleDunkDeterministic-v4', + 'DoubleDunkNoFrameskip-v0', + 'DoubleDunkNoFrameskip-v4', + 'DoubleDunk-ram-v0', + 'DoubleDunk-ram-v4', + 'DoubleDunk-ramDeterministic-v0', + 'DoubleDunk-ramDeterministic-v4', + 'DoubleDunk-ramNoFrameskip-v0', + 'DoubleDunk-ramNoFrameskip-v4', + 'ElevatorAction-v0', + 'ElevatorAction-v4', + 'ElevatorActionDeterministic-v0', + 'ElevatorActionDeterministic-v4', + 'ElevatorActionNoFrameskip-v0', + 'ElevatorActionNoFrameskip-v4', + 'ElevatorAction-ram-v0', + 'ElevatorAction-ram-v4', + 'ElevatorAction-ramDeterministic-v0', + 'ElevatorAction-ramDeterministic-v4', + 'ElevatorAction-ramNoFrameskip-v0', + 'ElevatorAction-ramNoFrameskip-v4', + 'Enduro-v0', + 'Enduro-v4', + 'EnduroDeterministic-v0', + 'EnduroDeterministic-v4', + 'EnduroNoFrameskip-v0', + 'EnduroNoFrameskip-v4', + 'Enduro-ram-v0', + 'Enduro-ram-v4', + 'Enduro-ramDeterministic-v0', + 'Enduro-ramDeterministic-v4', + 'Enduro-ramNoFrameskip-v0', + 'Enduro-ramNoFrameskip-v4', + 'FishingDerby-v0', + 'FishingDerby-v4', + 'FishingDerbyDeterministic-v0', + 'FishingDerbyDeterministic-v4', + 'FishingDerbyNoFrameskip-v0', + 'FishingDerbyNoFrameskip-v4', + 'FishingDerby-ram-v0', + 'FishingDerby-ram-v4', + 'FishingDerby-ramDeterministic-v0', + 'FishingDerby-ramDeterministic-v4', + 'FishingDerby-ramNoFrameskip-v0', + 'FishingDerby-ramNoFrameskip-v4', + 'Freeway-v0', + 'Freeway-v4', + 'FreewayDeterministic-v0', + 'FreewayDeterministic-v4', + 'FreewayNoFrameskip-v0', + 'FreewayNoFrameskip-v4', + 'Freeway-ram-v0', + 'Freeway-ram-v4', + 'Freeway-ramDeterministic-v0', + 'Freeway-ramDeterministic-v4', + 'Freeway-ramNoFrameskip-v0', + 'Freeway-ramNoFrameskip-v4', + 'Frostbite-v0', + 'Frostbite-v4', + 'FrostbiteDeterministic-v0', + 'FrostbiteDeterministic-v4', + 'FrostbiteNoFrameskip-v0', + 'FrostbiteNoFrameskip-v4', + 'Frostbite-ram-v0', + 'Frostbite-ram-v4', + 'Frostbite-ramDeterministic-v0', + 'Frostbite-ramDeterministic-v4', + 'Frostbite-ramNoFrameskip-v0', + 'Frostbite-ramNoFrameskip-v4', + 'Gopher-v0', + 'Gopher-v4', + 'GopherDeterministic-v0', + 'GopherDeterministic-v4', + 'GopherNoFrameskip-v0', + 'GopherNoFrameskip-v4', + 'Gopher-ram-v0', + 'Gopher-ram-v4', + 'Gopher-ramDeterministic-v0', + 'Gopher-ramDeterministic-v4', + 'Gopher-ramNoFrameskip-v0', + 'Gopher-ramNoFrameskip-v4', + 'Gravitar-v0', + 'Gravitar-v4', + 'GravitarDeterministic-v0', + 'GravitarDeterministic-v4', + 'GravitarNoFrameskip-v0', + 'GravitarNoFrameskip-v4', + 'Gravitar-ram-v0', + 'Gravitar-ram-v4', + 'Gravitar-ramDeterministic-v0', + 'Gravitar-ramDeterministic-v4', + 'Gravitar-ramNoFrameskip-v0', + 'Gravitar-ramNoFrameskip-v4', + 'Hero-v0', + 'Hero-v4', + 'HeroDeterministic-v0', + 'HeroDeterministic-v4', + 'HeroNoFrameskip-v0', + 'HeroNoFrameskip-v4', + 'Hero-ram-v0', + 'Hero-ram-v4', + 'Hero-ramDeterministic-v0', + 'Hero-ramDeterministic-v4', + 'Hero-ramNoFrameskip-v0', + 'Hero-ramNoFrameskip-v4', + 'IceHockey-v0', + 'IceHockey-v4', + 'IceHockeyDeterministic-v0', + 'IceHockeyDeterministic-v4', + 'IceHockeyNoFrameskip-v0', + 'IceHockeyNoFrameskip-v4', + 'IceHockey-ram-v0', + 'IceHockey-ram-v4', + 'IceHockey-ramDeterministic-v0', + 'IceHockey-ramDeterministic-v4', + 'IceHockey-ramNoFrameskip-v0', + 'IceHockey-ramNoFrameskip-v4', + 'Jamesbond-v0', + 'Jamesbond-v4', + 'JamesbondDeterministic-v0', + 'JamesbondDeterministic-v4', + 'JamesbondNoFrameskip-v0', + 'JamesbondNoFrameskip-v4', + 'Jamesbond-ram-v0', + 'Jamesbond-ram-v4', + 'Jamesbond-ramDeterministic-v0', + 'Jamesbond-ramDeterministic-v4', + 'Jamesbond-ramNoFrameskip-v0', + 'Jamesbond-ramNoFrameskip-v4', + 'JourneyEscape-v0', + 'JourneyEscape-v4', + 'JourneyEscapeDeterministic-v0', + 'JourneyEscapeDeterministic-v4', + 'JourneyEscapeNoFrameskip-v0', + 'JourneyEscapeNoFrameskip-v4', + 'JourneyEscape-ram-v0', + 'JourneyEscape-ram-v4', + 'JourneyEscape-ramDeterministic-v0', + 'JourneyEscape-ramDeterministic-v4', + 'JourneyEscape-ramNoFrameskip-v0', + 'JourneyEscape-ramNoFrameskip-v4', + 'Kangaroo-v0', + 'Kangaroo-v4', + 'KangarooDeterministic-v0', + 'KangarooDeterministic-v4', + 'KangarooNoFrameskip-v0', + 'KangarooNoFrameskip-v4', + 'Kangaroo-ram-v0', + 'Kangaroo-ram-v4', + 'Kangaroo-ramDeterministic-v0', + 'Kangaroo-ramDeterministic-v4', + 'Kangaroo-ramNoFrameskip-v0', + 'Kangaroo-ramNoFrameskip-v4', + 'Krull-v0', + 'Krull-v4', + 'KrullDeterministic-v0', + 'KrullDeterministic-v4', + 'KrullNoFrameskip-v0', + 'KrullNoFrameskip-v4', + 'Krull-ram-v0', + 'Krull-ram-v4', + 'Krull-ramDeterministic-v0', + 'Krull-ramDeterministic-v4', + 'Krull-ramNoFrameskip-v0', + 'Krull-ramNoFrameskip-v4', + 'KungFuMaster-v0', + 'KungFuMaster-v4', + 'KungFuMasterDeterministic-v0', + 'KungFuMasterDeterministic-v4', + 'KungFuMasterNoFrameskip-v0', + 'KungFuMasterNoFrameskip-v4', + 'KungFuMaster-ram-v0', + 'KungFuMaster-ram-v4', + 'KungFuMaster-ramDeterministic-v0', + 'KungFuMaster-ramDeterministic-v4', + 'KungFuMaster-ramNoFrameskip-v0', + 'KungFuMaster-ramNoFrameskip-v4', + 'MontezumaRevenge-v0', + 'MontezumaRevenge-v4', + 'MontezumaRevengeDeterministic-v0', + 'MontezumaRevengeDeterministic-v4', + 'MontezumaRevengeNoFrameskip-v0', + 'MontezumaRevengeNoFrameskip-v4', + 'MontezumaRevenge-ram-v0', + 'MontezumaRevenge-ram-v4', + 'MontezumaRevenge-ramDeterministic-v0', + 'MontezumaRevenge-ramDeterministic-v4', + 'MontezumaRevenge-ramNoFrameskip-v0', + 'MontezumaRevenge-ramNoFrameskip-v4', + 'MsPacman-v0', + 'MsPacman-v4', + 'MsPacmanDeterministic-v0', + 'MsPacmanDeterministic-v4', + 'MsPacmanNoFrameskip-v0', + 'MsPacmanNoFrameskip-v4', + 'MsPacman-ram-v0', + 'MsPacman-ram-v4', + 'MsPacman-ramDeterministic-v0', + 'MsPacman-ramDeterministic-v4', + 'MsPacman-ramNoFrameskip-v0', + 'MsPacman-ramNoFrameskip-v4', + 'NameThisGame-v0', + 'NameThisGame-v4', + 'NameThisGameDeterministic-v0', + 'NameThisGameDeterministic-v4', + 'NameThisGameNoFrameskip-v0', + 'NameThisGameNoFrameskip-v4', + 'NameThisGame-ram-v0', + 'NameThisGame-ram-v4', + 'NameThisGame-ramDeterministic-v0', + 'NameThisGame-ramDeterministic-v4', + 'NameThisGame-ramNoFrameskip-v0', + 'NameThisGame-ramNoFrameskip-v4', + 'Phoenix-v0', + 'Phoenix-v4', + 'PhoenixDeterministic-v0', + 'PhoenixDeterministic-v4', + 'PhoenixNoFrameskip-v0', + 'PhoenixNoFrameskip-v4', + 'Phoenix-ram-v0', + 'Phoenix-ram-v4', + 'Phoenix-ramDeterministic-v0', + 'Phoenix-ramDeterministic-v4', + 'Phoenix-ramNoFrameskip-v0', + 'Phoenix-ramNoFrameskip-v4', + 'Pitfall-v0', + 'Pitfall-v4', + 'PitfallDeterministic-v0', + 'PitfallDeterministic-v4', + 'PitfallNoFrameskip-v0', + 'PitfallNoFrameskip-v4', + 'Pitfall-ram-v0', + 'Pitfall-ram-v4', + 'Pitfall-ramDeterministic-v0', + 'Pitfall-ramDeterministic-v4', + 'Pitfall-ramNoFrameskip-v0', + 'Pitfall-ramNoFrameskip-v4', + 'Pong-v0', + 'Pong-v4', + 'PongDeterministic-v0', + 'PongDeterministic-v4', + 'PongNoFrameskip-v0', + 'PongNoFrameskip-v4', + 'Pong-ram-v0', + 'Pong-ram-v4', + 'Pong-ramDeterministic-v0', + 'Pong-ramDeterministic-v4', + 'Pong-ramNoFrameskip-v0', + 'Pong-ramNoFrameskip-v4', + 'Pooyan-v0', + 'Pooyan-v4', + 'PooyanDeterministic-v0', + 'PooyanDeterministic-v4', + 'PooyanNoFrameskip-v0', + 'PooyanNoFrameskip-v4', + 'Pooyan-ram-v0', + 'Pooyan-ram-v4', + 'Pooyan-ramDeterministic-v0', + 'Pooyan-ramDeterministic-v4', + 'Pooyan-ramNoFrameskip-v0', + 'Pooyan-ramNoFrameskip-v4', + 'PrivateEye-v0', + 'PrivateEye-v4', + 'PrivateEyeDeterministic-v0', + 'PrivateEyeDeterministic-v4', + 'PrivateEyeNoFrameskip-v0', + 'PrivateEyeNoFrameskip-v4', + 'PrivateEye-ram-v0', + 'PrivateEye-ram-v4', + 'PrivateEye-ramDeterministic-v0', + 'PrivateEye-ramDeterministic-v4', + 'PrivateEye-ramNoFrameskip-v0', + 'PrivateEye-ramNoFrameskip-v4', + 'Qbert-v0', + 'Qbert-v4', + 'QbertDeterministic-v0', + 'QbertDeterministic-v4', + 'QbertNoFrameskip-v0', + 'QbertNoFrameskip-v4', + 'Qbert-ram-v0', + 'Qbert-ram-v4', + 'Qbert-ramDeterministic-v0', + 'Qbert-ramDeterministic-v4', + 'Qbert-ramNoFrameskip-v0', + 'Qbert-ramNoFrameskip-v4', + 'Riverraid-v0', + 'Riverraid-v4', + 'RiverraidDeterministic-v0', + 'RiverraidDeterministic-v4', + 'RiverraidNoFrameskip-v0', + 'RiverraidNoFrameskip-v4', + 'Riverraid-ram-v0', + 'Riverraid-ram-v4', + 'Riverraid-ramDeterministic-v0', + 'Riverraid-ramDeterministic-v4', + 'Riverraid-ramNoFrameskip-v0', + 'Riverraid-ramNoFrameskip-v4', + 'RoadRunner-v0', + 'RoadRunner-v4', + 'RoadRunnerDeterministic-v0', + 'RoadRunnerDeterministic-v4', + 'RoadRunnerNoFrameskip-v0', + 'RoadRunnerNoFrameskip-v4', + 'RoadRunner-ram-v0', + 'RoadRunner-ram-v4', + 'RoadRunner-ramDeterministic-v0', + 'RoadRunner-ramDeterministic-v4', + 'RoadRunner-ramNoFrameskip-v0', + 'RoadRunner-ramNoFrameskip-v4', + 'Robotank-v0', + 'Robotank-v4', + 'RobotankDeterministic-v0', + 'RobotankDeterministic-v4', + 'RobotankNoFrameskip-v0', + 'RobotankNoFrameskip-v4', + 'Robotank-ram-v0', + 'Robotank-ram-v4', + 'Robotank-ramDeterministic-v0', + 'Robotank-ramDeterministic-v4', + 'Robotank-ramNoFrameskip-v0', + 'Robotank-ramNoFrameskip-v4', + 'Seaquest-v0', + 'Seaquest-v4', + 'SeaquestDeterministic-v0', + 'SeaquestDeterministic-v4', + 'SeaquestNoFrameskip-v0', + 'SeaquestNoFrameskip-v4', + 'Seaquest-ram-v0', + 'Seaquest-ram-v4', + 'Seaquest-ramDeterministic-v0', + 'Seaquest-ramDeterministic-v4', + 'Seaquest-ramNoFrameskip-v0', + 'Seaquest-ramNoFrameskip-v4', + 'Skiing-v0', + 'Skiing-v4', + 'SkiingDeterministic-v0', + 'SkiingDeterministic-v4', + 'SkiingNoFrameskip-v0', + 'SkiingNoFrameskip-v4', + 'Skiing-ram-v0', + 'Skiing-ram-v4', + 'Skiing-ramDeterministic-v0', + 'Skiing-ramDeterministic-v4', + 'Skiing-ramNoFrameskip-v0', + 'Skiing-ramNoFrameskip-v4', + 'Solaris-v0', + 'Solaris-v4', + 'SolarisDeterministic-v0', + 'SolarisDeterministic-v4', + 'SolarisNoFrameskip-v0', + 'SolarisNoFrameskip-v4', + 'Solaris-ram-v0', + 'Solaris-ram-v4', + 'Solaris-ramDeterministic-v0', + 'Solaris-ramDeterministic-v4', + 'Solaris-ramNoFrameskip-v0', + 'Solaris-ramNoFrameskip-v4', + 'SpaceInvaders-v0', + 'SpaceInvaders-v4', + 'SpaceInvadersDeterministic-v0', + 'SpaceInvadersDeterministic-v4', + 'SpaceInvadersNoFrameskip-v0', + 'SpaceInvadersNoFrameskip-v4', + 'SpaceInvaders-ram-v0', + 'SpaceInvaders-ram-v4', + 'SpaceInvaders-ramDeterministic-v0', + 'SpaceInvaders-ramDeterministic-v4', + 'SpaceInvaders-ramNoFrameskip-v0', + 'SpaceInvaders-ramNoFrameskip-v4', + 'StarGunner-v0', + 'StarGunner-v4', + 'StarGunnerDeterministic-v0', + 'StarGunnerDeterministic-v4', + 'StarGunnerNoFrameskip-v0', + 'StarGunnerNoFrameskip-v4', + 'StarGunner-ram-v0', + 'StarGunner-ram-v4', + 'StarGunner-ramDeterministic-v0', + 'StarGunner-ramDeterministic-v4', + 'StarGunner-ramNoFrameskip-v0', + 'StarGunner-ramNoFrameskip-v4', + 'Tennis-v0', + 'Tennis-v4', + 'TennisDeterministic-v0', + 'TennisDeterministic-v4', + 'TennisNoFrameskip-v0', + 'TennisNoFrameskip-v4', + 'Tennis-ram-v0', + 'Tennis-ram-v4', + 'Tennis-ramDeterministic-v0', + 'Tennis-ramDeterministic-v4', + 'Tennis-ramNoFrameskip-v0', + 'Tennis-ramNoFrameskip-v4', + 'TimePilot-v0', + 'TimePilot-v4', + 'TimePilotDeterministic-v0', + 'TimePilotDeterministic-v4', + 'TimePilotNoFrameskip-v0', + 'TimePilotNoFrameskip-v4', + 'TimePilot-ram-v0', + 'TimePilot-ram-v4', + 'TimePilot-ramDeterministic-v0', + 'TimePilot-ramDeterministic-v4', + 'TimePilot-ramNoFrameskip-v0', + 'TimePilot-ramNoFrameskip-v4', + 'Tutankham-v0', + 'Tutankham-v4', + 'TutankhamDeterministic-v0', + 'TutankhamDeterministic-v4', + 'TutankhamNoFrameskip-v0', + 'TutankhamNoFrameskip-v4', + 'Tutankham-ram-v0', + 'Tutankham-ram-v4', + 'Tutankham-ramDeterministic-v0', + 'Tutankham-ramDeterministic-v4', + 'Tutankham-ramNoFrameskip-v0', + 'Tutankham-ramNoFrameskip-v4', + 'UpNDown-v0', + 'UpNDown-v4', + 'UpNDownDeterministic-v0', + 'UpNDownDeterministic-v4', + 'UpNDownNoFrameskip-v0', + 'UpNDownNoFrameskip-v4', + 'UpNDown-ram-v0', + 'UpNDown-ram-v4', + 'UpNDown-ramDeterministic-v0', + 'UpNDown-ramDeterministic-v4', + 'UpNDown-ramNoFrameskip-v0', + 'UpNDown-ramNoFrameskip-v4', + 'Venture-v0', + 'Venture-v4', + 'VentureDeterministic-v0', + 'VentureDeterministic-v4', + 'VentureNoFrameskip-v0', + 'VentureNoFrameskip-v4', + 'Venture-ram-v0', + 'Venture-ram-v4', + 'Venture-ramDeterministic-v0', + 'Venture-ramDeterministic-v4', + 'Venture-ramNoFrameskip-v0', + 'Venture-ramNoFrameskip-v4', + 'VideoPinball-v0', + 'VideoPinball-v4', + 'VideoPinballDeterministic-v0', + 'VideoPinballDeterministic-v4', + 'VideoPinballNoFrameskip-v0', + 'VideoPinballNoFrameskip-v4', + 'VideoPinball-ram-v0', + 'VideoPinball-ram-v4', + 'VideoPinball-ramDeterministic-v0', + 'VideoPinball-ramDeterministic-v4', + 'VideoPinball-ramNoFrameskip-v0', + 'VideoPinball-ramNoFrameskip-v4', + 'WizardOfWor-v0', + 'WizardOfWor-v4', + 'WizardOfWorDeterministic-v0', + 'WizardOfWorDeterministic-v4', + 'WizardOfWorNoFrameskip-v0', + 'WizardOfWorNoFrameskip-v4', + 'WizardOfWor-ram-v0', + 'WizardOfWor-ram-v4', + 'WizardOfWor-ramDeterministic-v0', + 'WizardOfWor-ramDeterministic-v4', + 'WizardOfWor-ramNoFrameskip-v0', + 'WizardOfWor-ramNoFrameskip-v4', + 'YarsRevenge-v0', + 'YarsRevenge-v4', + 'YarsRevengeDeterministic-v0', + 'YarsRevengeDeterministic-v4', + 'YarsRevengeNoFrameskip-v0', + 'YarsRevengeNoFrameskip-v4', + 'YarsRevenge-ram-v0', + 'YarsRevenge-ram-v4', + 'YarsRevenge-ramDeterministic-v0', + 'YarsRevenge-ramDeterministic-v4', + 'YarsRevenge-ramNoFrameskip-v0', + 'YarsRevenge-ramNoFrameskip-v4', + 'Zaxxon-v0', + 'Zaxxon-v4', + 'ZaxxonDeterministic-v0', + 'ZaxxonDeterministic-v4', + 'ZaxxonNoFrameskip-v0', + 'ZaxxonNoFrameskip-v4', + 'Zaxxon-ram-v0', + 'Zaxxon-ram-v4', + 'Zaxxon-ramDeterministic-v0', + 'Zaxxon-ramDeterministic-v4', + 'Zaxxon-ramNoFrameskip-v0', + 'Zaxxon-ramNoFrameskip-v4'], + + # Classic control + 'classic_control': [ + 'Acrobot-v1', + 'CartPole-v1', + 'CartPole-v0', + 'MountainCar-v0', + 'MountainCarContinuous-v0', + 'Pendulum-v0' + ], + + # Box2D + 'box2d': [ + 'BipedalWalker-v2', + 'BipedalWalkerHardcore-v2', + 'CarRacing-v0', + 'LunarLander-v2', + 'LunarLanderContinuous-v2' + ], + + # MuJoCo + 'mujoco': [ + 'Ant-v2', + 'HalfCheetah-v2', + 'Hopper-v2', + 'Humanoid-v2', + 'HumanoidStandup-v2', + 'InvertedDoublePendulum-v2', + 'InvertedPendulum-v2', + 'Reacher-v2', + 'Swimmer-v2', + 'Walker2d-v2' + ], + + # Robotics + 'robotics': [ + 'FetchPickAndPlace-v1', + 'FetchPush-v1', + 'FetchReach-v1', + 'FetchSlide-v1', + 'HandManipulateBlock-v0', + 'HandManipulateEgg-v0', + 'HandManipulatePen-v0', + 'HandReach-v0' + ], + + ## Deepmind Control Suite (need check!) + 'dm_control': [ + 'AcrobotSparse-v0', + 'BallincupCatch-v0', + 'CartpoleSwingup-v0', + 'FingerTurn-v0', + 'FishSwim-v0', + 'CheetahRun-v0', + 'HopperHop-v0', + 'HumanoidStand-v0', + 'HumanoidWalk-v0', + 'HumanoidRun-v0', + 'ManipulatorBringball-v0', + 'PendulumSwingup-v0', + 'Pointmass-v0', + 'ReacherHard-v0', + 'Swimmer-v0', + 'WalkerRun-v0' + ], + + ## RLBench + 'rlbench': [ + 'BeatTheBuzz', + 'BlockPyramid', + 'ChangeChannel', + 'ChangeClock', + 'CloseBox', + 'CloseDoor', + 'CloseDrawer', + 'CloseFridge', + 'CloseGrill', + 'CloseJar', + 'CloseLaptopLid', + 'CloseMicrowave', + 'EmptyContainer', + 'EmptyDishwasher', + 'GetIceFromFridge', + 'HangFrameOnHanger', + 'HannoiSquare', + 'HitBallWithQueue', + 'Hockey', + 'InsertUsbInComputer', + 'LampOff', + 'LampOn', + 'LightBulbIn', + 'LightBulbOut', + 'MeatOffGrill', + 'MeatOnGrill', + 'MoveHanger', + 'OpenBox', + 'OpenDoor', + 'OpenDrawer', + 'OpenFridge', + 'OpenGrill', + 'OpenJar', + 'OpenMicrowave', + 'OpenOven', + 'OpenWindow', + 'OpenWineBottle', + 'PhoneOnBase', + 'PickAndLift', + 'PickUpCup', + 'PlaceCups', + 'PlaceHangerOnRack', + 'PlaceShapeInShapeSorter', + 'PlayJenga', + 'PlugChargerInPowerSupply', + 'PourFromCupToCup', + 'PressSwitch', + 'PushButton', + 'PushButtons', + 'PutBooksOnBookshelf', + 'PutBottleInFridge', + 'PutGroceriesInCupboard', + 'PutItemInDrawer', + 'PutKnifeInKnifeBlock', + 'PutKnifeOnChoppingBoard', + 'PutMoneyInSafe', + 'PutPlateInColoredDishRack', + 'PutRubbishInBin', + 'PutShoesInBox', + 'PutToiletRollOnStand', + 'PutTrayInOven', + 'PutUmbrellaInUmbrellaStand', + 'ReachAndDrag', + 'ReachTarget', + 'RemoveCups', + 'ScoopWithSpatula', + 'ScrewNail', + 'SetTheTable', + 'SetupCheckers', + 'SlideBlockToTarget', + 'SlideCabinetOpen', + 'SlideCabinetOpenAndPlaceCups', + 'SolvePuzzle', + 'StackBlocks', + 'StackCups', + 'StackWine', + 'StraightenRope', + 'SweepToDustpan', + 'TakeCupOutFromCabinet', + 'TakeFrameOffHanger', + 'TakeItemOutOfDrawer', + 'TakeLidOffSaucepan', + 'TakeMoneyOutSafe', + 'TakeOffWeighingScales', + 'TakePlateOffColoredDishRack', + 'TakeShoesOutOfBox', + 'TakeToiletRollOffStand', + 'TakeTrayOutOfOven', + 'TakeUmbrellaOutOfUmbrellaStand', + 'TakeUsbOutOfComputer', + 'ToiletSeatDown', + 'ToiletSeatUp', + 'TurnOvenOn', + 'TurnTap', + 'TvOff', + 'TvOn', + 'UnplugCharger', + 'WaterPlants', + 'WeighingScales', + 'WipeDesk' + ] +} diff --git a/rlzoo/common/env_wrappers.py b/rlzoo/common/env_wrappers.py old mode 100644 new mode 100755 index c7f92cf..04c0345 --- a/rlzoo/common/env_wrappers.py +++ b/rlzoo/common/env_wrappers.py @@ -1,637 +1,637 @@ -"""Env wrappers -Most common wrappers can be checked from following links for usage: - -`https://pypi.org/project/gym-vec-env` - -`https://github.com/openai/baselines/blob/master/baselines/common/wrappers.py` -""" -from collections import deque -from functools import partial -from multiprocessing import Pipe, Process, cpu_count -from sys import platform - -import cv2 -import gym -import numpy as np -from gym import spaces -from gym.wrappers import FlattenDictWrapper - -from rlzoo.common.env_list import get_envlist - -__all__ = ( - 'build_env', # build env - 'TimeLimit', # Time limit wrapper - 'NoopResetEnv', # Run random number of no-ops on reset - 'FireResetEnv', # Reset wrapper for envs with fire action - 'EpisodicLifeEnv', # end-of-life == end-of-episode wrapper - 'MaxAndSkipEnv', # skip frame wrapper - 'ClipRewardEnv', # clip reward wrapper - 'WarpFrame', # warp observation wrapper - 'FrameStack', # stack frame wrapper - 'LazyFrames', # lazy store wrapper - 'RewardShaping', # reward shaping - 'SubprocVecEnv', # vectorized env wrapper - 'VecFrameStack', # stack frames in vectorized env - 'Monitor', # Episode reward and length monitor - 'NormalizedActions', # normalized action to actual space - 'DmObsTrans', # translate observations in dm_control environments -) -cv2.ocl.setUseOpenCL(False) - - -def build_env(env_id, env_type, vectorized=False, - seed=0, reward_shaping=None, nenv=1, **kwargs): - """ - Build env based on options - - :param env_id: (str) environment id - :param env_type: (str) atari, classic_control, box2d - :param vectorized: (bool) whether sampling parrallel - :param seed: (int) random seed for env - :param reward_shaping: (callable) callable function for reward shaping - :param nenv: (int) how many processes will be used in sampling - :param kwargs: (dict) - :param max_episode_steps: (int) the maximum episode steps - """ - nenv = nenv or cpu_count() // (1 + (platform == 'darwin')) - stack = env_type == 'atari' - if nenv > 1: - if vectorized: - env = _make_vec_env(env_id, env_type, nenv, seed, - reward_shaping, stack, **kwargs) - else: - env = [] - for _ in range(nenv): - single_env = _make_env(env_id, env_type, seed, - reward_shaping, stack, **kwargs) - env.append(single_env) # get env as a list of same single env - - else: - env = _make_env(env_id, env_type, seed, - reward_shaping, stack, **kwargs) - - return env - - -def check_name_in_list(env_id, env_type): - """ Check if env_id exists in the env_type list """ - env_list = get_envlist(env_type) - if env_id not in env_list: - print('Env ID {:s} Not Found In {:s}!'.format(env_id, env_type)) - else: - print('Env ID {:s} Exists!'.format(env_id)) - - -def _make_env(env_id, env_type, seed, reward_shaping, frame_stack, **kwargs): - """Make single env""" - check_name_in_list(env_id, env_type) # check existence of env_id in env_type - if env_type == 'atari': - env = gym.make(env_id) - env = NoopResetEnv(env, noop_max=30) - if 'NoFrameskip' in env.spec.id: - env = MaxAndSkipEnv(env, skip=4) - env = Monitor(env) - # deepmind wrap - env = EpisodicLifeEnv(env) - if 'FIRE' in env.unwrapped.get_action_meanings(): - env = FireResetEnv(env) - env = WarpFrame(env) - env = ClipRewardEnv(env) - if frame_stack: - env = FrameStack(env, 4) - elif env_type in ['classic_control', 'box2d', 'mujoco']: - env = gym.make(env_id).unwrapped - max_episode_steps = kwargs.get('max_episode_steps') - if max_episode_steps is not None: - env = TimeLimit(env.unwrapped, max_episode_steps) - env = Monitor(env) - elif env_type == 'robotics': - env = gym.make(env_id) - env = FlattenDictWrapper(env, ['observation', 'desired_goal']) - env = Monitor(env, info_keywords=('is_success',)) - elif env_type == 'dm_control': - env = gym.make('dm2gym:' + env_id, environment_kwargs={'flat_observation': True}) - env = DmObsTrans(env) - elif env_type == 'rlbench': - from rlzoo.common.build_rlbench_env import RLBenchEnv - state_type = kwargs.get('state_type') - env = RLBenchEnv(env_id) if state_type is None else RLBenchEnv(env_id, state_type) - else: - raise NotImplementedError - - if reward_shaping is not None: - if callable(reward_shaping): - env = RewardShaping(env, reward_shaping) - else: - raise ValueError('reward_shaping parameter must be callable') - env.seed(seed) - return env - - -def _make_vec_env(env_id, env_type, nenv, seed, - reward_shaping, frame_stack, **kwargs): - """Make vectorized env""" - env = SubprocVecEnv([partial( - _make_env, env_id, env_type, seed + i, reward_shaping, False, **kwargs - ) for i in range(nenv)]) - if frame_stack: - env = VecFrameStack(env, 4) - return env - - -class DmObsTrans(gym.Wrapper): - """ Observation process for DeepMind Control Suite environments """ - - def __init__(self, env): - self.env = env - super(DmObsTrans, self).__init__(env) - self.__need_trans = False - if isinstance(self.observation_space, gym.spaces.dict.Dict): - self.observation_space = self.observation_space['observations'] - self.__need_trans = True - - def step(self, ac): - observation, reward, done, info = self.env.step(ac) - if self.__need_trans: - observation = observation['observations'] - return observation, reward, done, info - - def reset(self, **kwargs): - observation = self.env.reset(**kwargs) - if self.__need_trans: - observation = observation['observations'] - return observation - - -class TimeLimit(gym.Wrapper): - - def __init__(self, env, max_episode_steps=None): - self.env = env - super(TimeLimit, self).__init__(env) - self._max_episode_steps = max_episode_steps - self._elapsed_steps = 0 - - def step(self, ac): - observation, reward, done, info = self.env.step(ac) - self._elapsed_steps += 1 - if self._elapsed_steps >= self._max_episode_steps: - done = True - info['TimeLimit.truncated'] = True - return observation, reward, done, info - - def reset(self, **kwargs): - self._elapsed_steps = 0 - return self.env.reset(**kwargs) - - -class NoopResetEnv(gym.Wrapper): - - def __init__(self, env, noop_max=30): - """Sample initial states by taking random number of no-ops on reset. - No-op is assumed to be action 0. - """ - super(NoopResetEnv, self).__init__(env) - self.noop_max = noop_max - self.override_num_noops = None - self.noop_action = 0 - assert env.unwrapped.get_action_meanings()[0] == 'NOOP' - - def reset(self, **kwargs): - """ Do no-op action for a number of steps in [1, noop_max].""" - self.env.reset(**kwargs) - if self.override_num_noops is not None: - noops = self.override_num_noops - else: - noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) - assert noops > 0 - obs = None - for _ in range(noops): - obs, _, done, _ = self.env.step(self.noop_action) - if done: - obs = self.env.reset(**kwargs) - return obs - - def step(self, ac): - return self.env.step(ac) - - -class FireResetEnv(gym.Wrapper): - - def __init__(self, env): - """Take action on reset for environments that are fixed until firing.""" - super(FireResetEnv, self).__init__(env) - assert env.unwrapped.get_action_meanings()[1] == 'FIRE' - assert len(env.unwrapped.get_action_meanings()) >= 3 - - def reset(self, **kwargs): - self.env.reset(**kwargs) - obs, _, done, _ = self.env.step(1) - if done: - self.env.reset(**kwargs) - obs, _, done, _ = self.env.step(2) - if done: - self.env.reset(**kwargs) - return obs - - def step(self, ac): - return self.env.step(ac) - - -class EpisodicLifeEnv(gym.Wrapper): - - def __init__(self, env): - """Make end-of-life == end-of-episode, but only reset on true game over. - Done by DeepMind for the DQN and co. since it helps value estimation. - """ - super(EpisodicLifeEnv, self).__init__(env) - self.lives = 0 - self.was_real_done = True - - def step(self, action): - obs, reward, done, info = self.env.step(action) - self.was_real_done = done - # check current lives, make loss of life terminal, - # then update lives to handle bonus lives - lives = self.env.unwrapped.ale.lives() - if 0 < lives < self.lives: - # for Qbert sometimes we stay in lives == 0 condition for a few - # frames so it's important to keep lives > 0, so that we only reset - # once the environment advertises done. - done = True - self.lives = lives - return obs, reward, done, info - - def reset(self, **kwargs): - """Reset only when lives are exhausted. - This way all states are still reachable even though lives are episodic, - and the learner need not know about any of this behind-the-scenes. - """ - if self.was_real_done: - obs = self.env.reset(**kwargs) - else: - # no-op step to advance from terminal/lost life state - obs, _, _, _ = self.env.step(0) - self.lives = self.env.unwrapped.ale.lives() - return obs - - -class MaxAndSkipEnv(gym.Wrapper): - - def __init__(self, env, skip=4): - """Return only every `skip`-th frame""" - super(MaxAndSkipEnv, self).__init__(env) - # most recent raw observations (for max pooling across time steps) - shape = (2,) + env.observation_space.shape - self._obs_buffer = np.zeros(shape, dtype=np.uint8) - self._skip = skip - - def step(self, action): - """Repeat action, sum reward, and max over last observations.""" - total_reward = 0.0 - done = info = None - for i in range(self._skip): - obs, reward, done, info = self.env.step(action) - if i == self._skip - 2: - self._obs_buffer[0] = obs - if i == self._skip - 1: - self._obs_buffer[1] = obs - total_reward += reward - if done: - break - # Note that the observation on the done=True frame doesn't matter - max_frame = self._obs_buffer.max(axis=0) - - return max_frame, total_reward, done, info - - def reset(self, **kwargs): - return self.env.reset(**kwargs) - - -class ClipRewardEnv(gym.RewardWrapper): - - def __init__(self, env): - super(ClipRewardEnv, self).__init__(env) - - def reward(self, reward): - """Bin reward to {+1, 0, -1} by its sign.""" - return np.sign(reward) - - -class WarpFrame(gym.ObservationWrapper): - - def __init__(self, env, width=84, height=84, grayscale=True): - """Warp frames to 84x84 as done in the Nature paper and later work.""" - super(WarpFrame, self).__init__(env) - self.width = width - self.height = height - self.grayscale = grayscale - shape = (self.height, self.width, 1 if self.grayscale else 3) - self.observation_space = spaces.Box(low=0, high=255, shape=shape, dtype=np.uint8) - - def observation(self, frame): - if self.grayscale: - frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) - size = (self.width, self.height) - frame = cv2.resize(frame, size, interpolation=cv2.INTER_AREA) - if self.grayscale: - frame = np.expand_dims(frame, -1) - return frame - - -class FrameStack(gym.Wrapper): - - def __init__(self, env, k): - """Stack k last frames. - Returns lazy array, which is much more memory efficient. - See Also `LazyFrames` - """ - super(FrameStack, self).__init__(env) - self.k = k - self.frames = deque([], maxlen=k) - shp = env.observation_space.shape - shape = shp[:-1] + (shp[-1] * k,) - self.observation_space = spaces.Box(low=0, high=255, shape=shape, dtype=env.observation_space.dtype) - - def reset(self): - ob = self.env.reset() - for _ in range(self.k): - self.frames.append(ob) - return np.asarray(self._get_ob()) - - def step(self, action): - ob, reward, done, info = self.env.step(action) - self.frames.append(ob) - return np.asarray(self._get_ob()), reward, done, info - - def _get_ob(self): - assert len(self.frames) == self.k - return LazyFrames(list(self.frames)) - - -class LazyFrames(object): - - def __init__(self, frames): - """This object ensures that common frames between the observations are - only stored once. It exists purely to optimize memory usage which can be - huge for DQN's 1M frames replay buffers. - - This object should only be converted to numpy array before being passed - to the model. You'd not believe how complex the previous solution was. - """ - self._frames = frames - self._out = None - - def _force(self): - if self._out is None: - self._out = np.concatenate(self._frames, axis=-1) - self._frames = None - return self._out - - def __array__(self, dtype=None): - out = self._force() - if dtype is not None: - out = out.astype(dtype) - return out - - def __len__(self): - return len(self._force()) - - def __getitem__(self, i): - return self._force()[i] - - -class RewardShaping(gym.RewardWrapper): - """Shaping the reward - For reward scale, func can be `lambda r: r * scale` - """ - - def __init__(self, env, func): - super(RewardShaping, self).__init__(env) - self.func = func - - def reward(self, reward): - return self.func(reward) - - -class VecFrameStack(object): - - def __init__(self, env, k): - self.env = env - self.k = k - self.action_space = env.action_space - self.frames = deque([], maxlen=k) - shp = env.observation_space.shape - shape = shp[:-1] + (shp[-1] * k,) - self.observation_space = spaces.Box(low=0, high=255, shape=shape, dtype=env.observation_space.dtype) - - def reset(self): - ob = self.env.reset() - for _ in range(self.k): - self.frames.append(ob) - return np.asarray(self._get_ob()) - - def step(self, action): - ob, reward, done, info = self.env.step(action) - self.frames.append(ob) - return np.asarray(self._get_ob()), reward, done, info - - def _get_ob(self): - assert len(self.frames) == self.k - return LazyFrames(list(self.frames)) - - -def _worker(remote, parent_remote, env_fn_wrapper): - parent_remote.close() - env = env_fn_wrapper.x() - while True: - cmd, data = remote.recv() - if cmd == 'step': - ob, reward, done, info = env.step(data) - if done: - ob = env.reset() - remote.send((ob, reward, done, info)) - elif cmd == 'reset': - ob = env.reset() - remote.send(ob) - elif cmd == 'reset_task': - ob = env._reset_task() - remote.send(ob) - elif cmd == 'close': - remote.close() - break - elif cmd == 'get_spaces': - remote.send((env.observation_space, env.action_space)) - else: - raise NotImplementedError - - -class CloudpickleWrapper(object): - """ - Uses cloudpickle to serialize contents - """ - - def __init__(self, x): - self.x = x - - def __getstate__(self): - import cloudpickle - return cloudpickle.dumps(self.x) - - def __setstate__(self, ob): - import pickle - self.x = pickle.loads(ob) - - -class SubprocVecEnv(object): - - def __init__(self, env_fns): - """ - envs: list of gym environments to run in subprocesses - """ - self.num_envs = len(env_fns) - - self.waiting = False - self.closed = False - nenvs = len(env_fns) - self.nenvs = nenvs - self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) - zipped_args = zip(self.work_remotes, self.remotes, env_fns) - self.ps = [ - Process(target=_worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) - for (work_remote, remote, env_fn) in zipped_args - ] - - for p in self.ps: - # if the main process crashes, we should not cause things to hang - p.daemon = True - p.start() - for remote in self.work_remotes: - remote.close() - - self.remotes[0].send(('get_spaces', None)) - observation_space, action_space = self.remotes[0].recv() - self.observation_space = observation_space - self.action_space = action_space - - def _step_async(self, actions): - """ - Tell all the environments to start taking a step - with the given actions. - Call step_wait() to get the results of the step. - You should not call this if a step_async run is - already pending. - """ - for remote, action in zip(self.remotes, actions): - remote.send(('step', action)) - self.waiting = True - - def _step_wait(self): - """ - Wait for the step taken with step_async(). - Returns (obs, rews, dones, infos): - - obs: an array of observations, or a tuple of - arrays of observations. - - rews: an array of rewards - - dones: an array of "episode done" booleans - - infos: a sequence of info objects - """ - results = [remote.recv() for remote in self.remotes] - self.waiting = False - obs, rews, dones, infos = zip(*results) - return np.stack(obs), np.stack(rews), np.stack(dones), infos - - def reset(self): - """ - Reset all the environments and return an array of - observations, or a tuple of observation arrays. - If step_async is still doing work, that work will - be cancelled and step_wait() should not be called - until step_async() is invoked again. - """ - for remote in self.remotes: - remote.send(('reset', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def _reset_task(self): - for remote in self.remotes: - remote.send(('reset_task', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def close(self): - if self.closed: - return - if self.waiting: - for remote in self.remotes: - remote.recv() - for remote in self.remotes: - remote.send(('close', None)) - for p in self.ps: - p.join() - self.closed = True - - def __len__(self): - return self.nenvs - - def step(self, actions): - self._step_async(actions) - return self._step_wait() - - -class Monitor(gym.Wrapper): - - def __init__(self, env, info_keywords=None): - super(Monitor, self).__init__(env) - self._monitor_rewards = None - self._info_keywords = info_keywords or [] - - def reset(self, **kwargs): - self._monitor_rewards = [] - return self.env.reset(**kwargs) - - def step(self, action): - o_, r, done, info = self.env.step(action) - self._monitor_rewards.append(r) - if done: - info['episode'] = { - 'r': sum(self._monitor_rewards), - 'l': len(self._monitor_rewards) - } - for keyword in self._info_keywords: - info['episode'][keyword] = info[keyword] - return o_, r, done, info - - -class NormalizedActions(gym.ActionWrapper): - - def _action(self, action): - low = self.action_space.low - high = self.action_space.high - - action = low + (action + 1.0) * 0.5 * (high - low) - action = np.clip(action, low, high) - - return action - - def _reverse_action(self, action): - low = self.action_space.low - high = self.action_space.high - - action = 2 * (action - low) / (high - low) - 1 - action = np.clip(action, low, high) - - return action - - -def close_env(env): - """ - close environment or environment list - """ - try: - env.close() - except: - pass - try: - for e in env: - e.close() - except: - pass +"""Env wrappers +Most common wrappers can be checked from following links for usage: + +`https://pypi.org/project/gym-vec-env` + +`https://github.com/openai/baselines/blob/master/baselines/common/wrappers.py` +""" +from collections import deque +from functools import partial +from multiprocessing import Pipe, Process, cpu_count +from sys import platform + +import cv2 +import gym +import numpy as np +from gym import spaces +from gym.wrappers import FlattenDictWrapper + +from rlzoo.common.env_list import get_envlist + +__all__ = ( + 'build_env', # build env + 'TimeLimit', # Time limit wrapper + 'NoopResetEnv', # Run random number of no-ops on reset + 'FireResetEnv', # Reset wrapper for envs with fire action + 'EpisodicLifeEnv', # end-of-life == end-of-episode wrapper + 'MaxAndSkipEnv', # skip frame wrapper + 'ClipRewardEnv', # clip reward wrapper + 'WarpFrame', # warp observation wrapper + 'FrameStack', # stack frame wrapper + 'LazyFrames', # lazy store wrapper + 'RewardShaping', # reward shaping + 'SubprocVecEnv', # vectorized env wrapper + 'VecFrameStack', # stack frames in vectorized env + 'Monitor', # Episode reward and length monitor + 'NormalizedActions', # normalized action to actual space + 'DmObsTrans', # translate observations in dm_control environments +) +cv2.ocl.setUseOpenCL(False) + + +def build_env(env_id, env_type, vectorized=False, + seed=0, reward_shaping=None, nenv=1, **kwargs): + """ + Build env based on options + + :param env_id: (str) environment id + :param env_type: (str) atari, classic_control, box2d + :param vectorized: (bool) whether sampling parrallel + :param seed: (int) random seed for env + :param reward_shaping: (callable) callable function for reward shaping + :param nenv: (int) how many processes will be used in sampling + :param kwargs: (dict) + :param max_episode_steps: (int) the maximum episode steps + """ + nenv = nenv or cpu_count() // (1 + (platform == 'darwin')) + stack = env_type == 'atari' + if nenv > 1: + if vectorized: + env = _make_vec_env(env_id, env_type, nenv, seed, + reward_shaping, stack, **kwargs) + else: + env = [] + for _ in range(nenv): + single_env = _make_env(env_id, env_type, seed, + reward_shaping, stack, **kwargs) + env.append(single_env) # get env as a list of same single env + + else: + env = _make_env(env_id, env_type, seed, + reward_shaping, stack, **kwargs) + + return env + + +def check_name_in_list(env_id, env_type): + """ Check if env_id exists in the env_type list """ + env_list = get_envlist(env_type) + if env_id not in env_list: + print('Env ID {:s} Not Found In {:s}!'.format(env_id, env_type)) + else: + print('Env ID {:s} Exists!'.format(env_id)) + + +def _make_env(env_id, env_type, seed, reward_shaping, frame_stack, **kwargs): + """Make single env""" + check_name_in_list(env_id, env_type) # check existence of env_id in env_type + if env_type == 'atari': + env = gym.make(env_id) + env = NoopResetEnv(env, noop_max=30) + if 'NoFrameskip' in env.spec.id: + env = MaxAndSkipEnv(env, skip=4) + env = Monitor(env) + # deepmind wrap + env = EpisodicLifeEnv(env) + if 'FIRE' in env.unwrapped.get_action_meanings(): + env = FireResetEnv(env) + env = WarpFrame(env) + env = ClipRewardEnv(env) + if frame_stack: + env = FrameStack(env, 4) + elif env_type in ['classic_control', 'box2d', 'mujoco']: + env = gym.make(env_id).unwrapped + max_episode_steps = kwargs.get('max_episode_steps') + if max_episode_steps is not None: + env = TimeLimit(env.unwrapped, max_episode_steps) + env = Monitor(env) + elif env_type == 'robotics': + env = gym.make(env_id) + env = FlattenDictWrapper(env, ['observation', 'desired_goal']) + env = Monitor(env, info_keywords=('is_success',)) + elif env_type == 'dm_control': + env = gym.make('dm2gym:' + env_id, environment_kwargs={'flat_observation': True}) + env = DmObsTrans(env) + elif env_type == 'rlbench': + from rlzoo.common.build_rlbench_env import RLBenchEnv + state_type = kwargs.get('state_type') + env = RLBenchEnv(env_id) if state_type is None else RLBenchEnv(env_id, state_type) + else: + raise NotImplementedError + + if reward_shaping is not None: + if callable(reward_shaping): + env = RewardShaping(env, reward_shaping) + else: + raise ValueError('reward_shaping parameter must be callable') + env.seed(seed) + return env + + +def _make_vec_env(env_id, env_type, nenv, seed, + reward_shaping, frame_stack, **kwargs): + """Make vectorized env""" + env = SubprocVecEnv([partial( + _make_env, env_id, env_type, seed + i, reward_shaping, False, **kwargs + ) for i in range(nenv)]) + if frame_stack: + env = VecFrameStack(env, 4) + return env + + +class DmObsTrans(gym.Wrapper): + """ Observation process for DeepMind Control Suite environments """ + + def __init__(self, env): + self.env = env + super(DmObsTrans, self).__init__(env) + self.__need_trans = False + if isinstance(self.observation_space, gym.spaces.dict.Dict): + self.observation_space = self.observation_space['observations'] + self.__need_trans = True + + def step(self, ac): + observation, reward, done, info = self.env.step(ac) + if self.__need_trans: + observation = observation['observations'] + return observation, reward, done, info + + def reset(self, **kwargs): + observation = self.env.reset(**kwargs) + if self.__need_trans: + observation = observation['observations'] + return observation + + +class TimeLimit(gym.Wrapper): + + def __init__(self, env, max_episode_steps=None): + self.env = env + super(TimeLimit, self).__init__(env) + self._max_episode_steps = max_episode_steps + self._elapsed_steps = 0 + + def step(self, ac): + observation, reward, done, info = self.env.step(ac) + self._elapsed_steps += 1 + if self._elapsed_steps >= self._max_episode_steps: + done = True + info['TimeLimit.truncated'] = True + return observation, reward, done, info + + def reset(self, **kwargs): + self._elapsed_steps = 0 + return self.env.reset(**kwargs) + + +class NoopResetEnv(gym.Wrapper): + + def __init__(self, env, noop_max=30): + """Sample initial states by taking random number of no-ops on reset. + No-op is assumed to be action 0. + """ + super(NoopResetEnv, self).__init__(env) + self.noop_max = noop_max + self.override_num_noops = None + self.noop_action = 0 + assert env.unwrapped.get_action_meanings()[0] == 'NOOP' + + def reset(self, **kwargs): + """ Do no-op action for a number of steps in [1, noop_max].""" + self.env.reset(**kwargs) + if self.override_num_noops is not None: + noops = self.override_num_noops + else: + noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) + assert noops > 0 + obs = None + for _ in range(noops): + obs, _, done, _ = self.env.step(self.noop_action) + if done: + obs = self.env.reset(**kwargs) + return obs + + def step(self, ac): + return self.env.step(ac) + + +class FireResetEnv(gym.Wrapper): + + def __init__(self, env): + """Take action on reset for environments that are fixed until firing.""" + super(FireResetEnv, self).__init__(env) + assert env.unwrapped.get_action_meanings()[1] == 'FIRE' + assert len(env.unwrapped.get_action_meanings()) >= 3 + + def reset(self, **kwargs): + self.env.reset(**kwargs) + obs, _, done, _ = self.env.step(1) + if done: + self.env.reset(**kwargs) + obs, _, done, _ = self.env.step(2) + if done: + self.env.reset(**kwargs) + return obs + + def step(self, ac): + return self.env.step(ac) + + +class EpisodicLifeEnv(gym.Wrapper): + + def __init__(self, env): + """Make end-of-life == end-of-episode, but only reset on true game over. + Done by DeepMind for the DQN and co. since it helps value estimation. + """ + super(EpisodicLifeEnv, self).__init__(env) + self.lives = 0 + self.was_real_done = True + + def step(self, action): + obs, reward, done, info = self.env.step(action) + self.was_real_done = done + # check current lives, make loss of life terminal, + # then update lives to handle bonus lives + lives = self.env.unwrapped.ale.lives() + if 0 < lives < self.lives: + # for Qbert sometimes we stay in lives == 0 condition for a few + # frames so it's important to keep lives > 0, so that we only reset + # once the environment advertises done. + done = True + self.lives = lives + return obs, reward, done, info + + def reset(self, **kwargs): + """Reset only when lives are exhausted. + This way all states are still reachable even though lives are episodic, + and the learner need not know about any of this behind-the-scenes. + """ + if self.was_real_done: + obs = self.env.reset(**kwargs) + else: + # no-op step to advance from terminal/lost life state + obs, _, _, _ = self.env.step(0) + self.lives = self.env.unwrapped.ale.lives() + return obs + + +class MaxAndSkipEnv(gym.Wrapper): + + def __init__(self, env, skip=4): + """Return only every `skip`-th frame""" + super(MaxAndSkipEnv, self).__init__(env) + # most recent raw observations (for max pooling across time steps) + shape = (2,) + env.observation_space.shape + self._obs_buffer = np.zeros(shape, dtype=np.uint8) + self._skip = skip + + def step(self, action): + """Repeat action, sum reward, and max over last observations.""" + total_reward = 0.0 + done = info = None + for i in range(self._skip): + obs, reward, done, info = self.env.step(action) + if i == self._skip - 2: + self._obs_buffer[0] = obs + if i == self._skip - 1: + self._obs_buffer[1] = obs + total_reward += reward + if done: + break + # Note that the observation on the done=True frame doesn't matter + max_frame = self._obs_buffer.max(axis=0) + + return max_frame, total_reward, done, info + + def reset(self, **kwargs): + return self.env.reset(**kwargs) + + +class ClipRewardEnv(gym.RewardWrapper): + + def __init__(self, env): + super(ClipRewardEnv, self).__init__(env) + + def reward(self, reward): + """Bin reward to {+1, 0, -1} by its sign.""" + return np.sign(reward) + + +class WarpFrame(gym.ObservationWrapper): + + def __init__(self, env, width=84, height=84, grayscale=True): + """Warp frames to 84x84 as done in the Nature paper and later work.""" + super(WarpFrame, self).__init__(env) + self.width = width + self.height = height + self.grayscale = grayscale + shape = (self.height, self.width, 1 if self.grayscale else 3) + self.observation_space = spaces.Box(low=0, high=255, shape=shape, dtype=np.uint8) + + def observation(self, frame): + if self.grayscale: + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) + size = (self.width, self.height) + frame = cv2.resize(frame, size, interpolation=cv2.INTER_AREA) + if self.grayscale: + frame = np.expand_dims(frame, -1) + return frame + + +class FrameStack(gym.Wrapper): + + def __init__(self, env, k): + """Stack k last frames. + Returns lazy array, which is much more memory efficient. + See Also `LazyFrames` + """ + super(FrameStack, self).__init__(env) + self.k = k + self.frames = deque([], maxlen=k) + shp = env.observation_space.shape + shape = shp[:-1] + (shp[-1] * k,) + self.observation_space = spaces.Box(low=0, high=255, shape=shape, dtype=env.observation_space.dtype) + + def reset(self): + ob = self.env.reset() + for _ in range(self.k): + self.frames.append(ob) + return np.asarray(self._get_ob()) + + def step(self, action): + ob, reward, done, info = self.env.step(action) + self.frames.append(ob) + return np.asarray(self._get_ob()), reward, done, info + + def _get_ob(self): + assert len(self.frames) == self.k + return LazyFrames(list(self.frames)) + + +class LazyFrames(object): + + def __init__(self, frames): + """This object ensures that common frames between the observations are + only stored once. It exists purely to optimize memory usage which can be + huge for DQN's 1M frames replay buffers. + + This object should only be converted to numpy array before being passed + to the model. You'd not believe how complex the previous solution was. + """ + self._frames = frames + self._out = None + + def _force(self): + if self._out is None: + self._out = np.concatenate(self._frames, axis=-1) + self._frames = None + return self._out + + def __array__(self, dtype=None): + out = self._force() + if dtype is not None: + out = out.astype(dtype) + return out + + def __len__(self): + return len(self._force()) + + def __getitem__(self, i): + return self._force()[i] + + +class RewardShaping(gym.RewardWrapper): + """Shaping the reward + For reward scale, func can be `lambda r: r * scale` + """ + + def __init__(self, env, func): + super(RewardShaping, self).__init__(env) + self.func = func + + def reward(self, reward): + return self.func(reward) + + +class VecFrameStack(object): + + def __init__(self, env, k): + self.env = env + self.k = k + self.action_space = env.action_space + self.frames = deque([], maxlen=k) + shp = env.observation_space.shape + shape = shp[:-1] + (shp[-1] * k,) + self.observation_space = spaces.Box(low=0, high=255, shape=shape, dtype=env.observation_space.dtype) + + def reset(self): + ob = self.env.reset() + for _ in range(self.k): + self.frames.append(ob) + return np.asarray(self._get_ob()) + + def step(self, action): + ob, reward, done, info = self.env.step(action) + self.frames.append(ob) + return np.asarray(self._get_ob()), reward, done, info + + def _get_ob(self): + assert len(self.frames) == self.k + return LazyFrames(list(self.frames)) + + +def _worker(remote, parent_remote, env_fn_wrapper): + parent_remote.close() + env = env_fn_wrapper.x() + while True: + cmd, data = remote.recv() + if cmd == 'step': + ob, reward, done, info = env.step(data) + if done: + ob = env.reset() + remote.send((ob, reward, done, info)) + elif cmd == 'reset': + ob = env.reset() + remote.send(ob) + elif cmd == 'reset_task': + ob = env._reset_task() + remote.send(ob) + elif cmd == 'close': + remote.close() + break + elif cmd == 'get_spaces': + remote.send((env.observation_space, env.action_space)) + else: + raise NotImplementedError + + +class CloudpickleWrapper(object): + """ + Uses cloudpickle to serialize contents + """ + + def __init__(self, x): + self.x = x + + def __getstate__(self): + import cloudpickle + return cloudpickle.dumps(self.x) + + def __setstate__(self, ob): + import pickle + self.x = pickle.loads(ob) + + +class SubprocVecEnv(object): + + def __init__(self, env_fns): + """ + envs: list of gym environments to run in subprocesses + """ + self.num_envs = len(env_fns) + + self.waiting = False + self.closed = False + nenvs = len(env_fns) + self.nenvs = nenvs + self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) + zipped_args = zip(self.work_remotes, self.remotes, env_fns) + self.ps = [ + Process(target=_worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) + for (work_remote, remote, env_fn) in zipped_args + ] + + for p in self.ps: + # if the main process crashes, we should not cause things to hang + p.daemon = True + p.start() + for remote in self.work_remotes: + remote.close() + + self.remotes[0].send(('get_spaces', None)) + observation_space, action_space = self.remotes[0].recv() + self.observation_space = observation_space + self.action_space = action_space + + def _step_async(self, actions): + """ + Tell all the environments to start taking a step + with the given actions. + Call step_wait() to get the results of the step. + You should not call this if a step_async run is + already pending. + """ + for remote, action in zip(self.remotes, actions): + remote.send(('step', action)) + self.waiting = True + + def _step_wait(self): + """ + Wait for the step taken with step_async(). + Returns (obs, rews, dones, infos): + - obs: an array of observations, or a tuple of + arrays of observations. + - rews: an array of rewards + - dones: an array of "episode done" booleans + - infos: a sequence of info objects + """ + results = [remote.recv() for remote in self.remotes] + self.waiting = False + obs, rews, dones, infos = zip(*results) + return np.stack(obs), np.stack(rews), np.stack(dones), infos + + def reset(self): + """ + Reset all the environments and return an array of + observations, or a tuple of observation arrays. + If step_async is still doing work, that work will + be cancelled and step_wait() should not be called + until step_async() is invoked again. + """ + for remote in self.remotes: + remote.send(('reset', None)) + return np.stack([remote.recv() for remote in self.remotes]) + + def _reset_task(self): + for remote in self.remotes: + remote.send(('reset_task', None)) + return np.stack([remote.recv() for remote in self.remotes]) + + def close(self): + if self.closed: + return + if self.waiting: + for remote in self.remotes: + remote.recv() + for remote in self.remotes: + remote.send(('close', None)) + for p in self.ps: + p.join() + self.closed = True + + def __len__(self): + return self.nenvs + + def step(self, actions): + self._step_async(actions) + return self._step_wait() + + +class Monitor(gym.Wrapper): + + def __init__(self, env, info_keywords=None): + super(Monitor, self).__init__(env) + self._monitor_rewards = None + self._info_keywords = info_keywords or [] + + def reset(self, **kwargs): + self._monitor_rewards = [] + return self.env.reset(**kwargs) + + def step(self, action): + o_, r, done, info = self.env.step(action) + self._monitor_rewards.append(r) + if done: + info['episode'] = { + 'r': sum(self._monitor_rewards), + 'l': len(self._monitor_rewards) + } + for keyword in self._info_keywords: + info['episode'][keyword] = info[keyword] + return o_, r, done, info + + +class NormalizedActions(gym.ActionWrapper): + + def _action(self, action): + low = self.action_space.low + high = self.action_space.high + + action = low + (action + 1.0) * 0.5 * (high - low) + action = np.clip(action, low, high) + + return action + + def _reverse_action(self, action): + low = self.action_space.low + high = self.action_space.high + + action = 2 * (action - low) / (high - low) - 1 + action = np.clip(action, low, high) + + return action + + +def close_env(env): + """ + close environment or environment list + """ + try: + env.close() + except: + pass + try: + for e in env: + e.close() + except: + pass diff --git a/rlzoo/common/math_utils.py b/rlzoo/common/math_utils.py old mode 100644 new mode 100755 index 5fef326..c9fad08 --- a/rlzoo/common/math_utils.py +++ b/rlzoo/common/math_utils.py @@ -1,15 +1,15 @@ -""" -Functions for mathematics utilization. - -# Requirements -tensorflow==2.0.0a0 -tensorlayer==2.0.1 - -""" - - -def flatten_dims(shapes): # will be moved to common - dim = 1 - for s in shapes: - dim *= s - return dim +""" +Functions for mathematics utilization. + +# Requirements +tensorflow==2.0.0a0 +tensorlayer==2.0.1 + +""" + + +def flatten_dims(shapes): # will be moved to common + dim = 1 + for s in shapes: + dim *= s + return dim diff --git a/rlzoo/common/policy_networks.py b/rlzoo/common/policy_networks.py old mode 100644 new mode 100755 index c89642e..cc3d773 --- a/rlzoo/common/policy_networks.py +++ b/rlzoo/common/policy_networks.py @@ -1,341 +1,366 @@ -""" -Functions for utilization. - -# Requirements -tensorflow==2.0.0a0 -tensorlayer==2.0.1 - -""" -import copy -import numpy as np -import tensorlayer as tl -from tensorlayer.models import Model - -from rlzoo.common.basic_nets import * -from rlzoo.common.distributions import make_dist - - -class StochasticContinuousPolicyNetwork(Model): - def __init__(self, state_shape, action_shape, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), - activation=tf.nn.relu, output_activation=None, log_std_min=-20, log_std_max=2, trainable=True): - """ - Stochastic continuous policy network with multiple fully-connected layers or convolutional layers (according to state shape) - - :param state_shape: (tuple[int]) shape of the state, for example, (state_dim, ) for single-dimensional state - :param action_shape: (tuple[int]) shape of the action, for example, (action_dim, ) for single-dimensional action - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) weights initialization - :param activation: (callable) activation function - :param output_activation: (callable or None) output activation function - :param log_std_min: (float) lower bound of standard deviation of action - :param log_std_max: (float) upper bound of standard deviation of action - :param trainable: (bool) set training and evaluation mode - """ - - action_dim = action_shape[0] - if len(state_shape) == 1: - with tf.name_scope('MLP'): - state_dim = state_shape[0] - inputs, l = MLP(state_dim, hidden_dim_list, w_init, activation) - else: - with tf.name_scope('CNN'): - inputs, l = CNN(state_shape, conv_kwargs=None) - with tf.name_scope('Output_Mean'): - mean_linear = Dense(n_units=action_dim, act=output_activation, W_init=w_init)(l) - with tf.name_scope('Output_Std'): - log_std_linear = Dense(n_units=action_dim, act=output_activation, W_init=w_init)(l) - log_std_linear = tl.layers.Lambda(lambda x: tf.clip_by_value(x, log_std_min, log_std_max), name='Lambda')( - log_std_linear) - - super().__init__(inputs=inputs, outputs=[mean_linear, log_std_linear]) - if trainable: - self.train() - else: - self.eval() - - -class DeterministicContinuousPolicyNetwork(Model): - def __init__(self, state_shape, action_shape, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), \ - activation=tf.nn.relu, output_activation=tf.nn.tanh, trainable=True): - """ - Deterministic continuous policy network with multiple fully-connected layers or convolutional layers (according to state shape) - - :param state_shape: (tuple[int]) shape of the state, for example, (state_dim, ) for single-dimensional state - :param action_shape: (tuple[int]) shape of the action, for example, (action_dim, ) for single-dimensional action - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) weights initialization - :param activation: (callable) activation function - :param output_activation: (callable or None) output activation function - :param trainable: (bool) set training and evaluation mode - """ - - action_dim = action_shape[0] - - if len(state_shape) == 1: - with tf.name_scope('MLP'): - state_dim = state_shape[0] - inputs, l = MLP(state_dim, hidden_dim_list, w_init, activation) - else: - with tf.name_scope('CNN'): - inputs, l = CNN(state_shape, conv_kwargs=None) - - with tf.name_scope('Output'): - outputs = Dense(n_units=action_dim, act=output_activation, W_init=w_init)(l) - - super().__init__(inputs=inputs, outputs=outputs) - if trainable: - self.train() - else: - self.eval() - - -class DeterministicPolicyNetwork(Model): - def __init__(self, state_space, action_space, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), - activation=tf.nn.relu, output_activation=tf.nn.tanh, trainable=True, name=None): - """ - Deterministic continuous/discrete policy network with multiple fully-connected layers - - :param state_space: (gym.spaces) space of the state from gym environments - :param action_space: (gym.spaces) space of the action from gym environments - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) weights initialization - :param activation: (callable) activation function - :param output_activation: (callable or None) output activation function - :param trainable: (bool) set training and evaluation mode - """ - self._state_space, self._action_space = state_space, action_space - - if isinstance(self._action_space, spaces.Discrete): - self._action_shape = self._action_space.n, - - elif isinstance(self._action_space, spaces.Box): - assert len(self._action_space.shape) == 1 - self._action_shape = self._action_space.shape - - assert all(self._action_space.low < self._action_space.high) - action_bounds = [self._action_space.low, self._action_space.high] - self._action_mean = np.mean(action_bounds, 0) - self._action_scale = action_bounds[1] - self._action_mean - else: - raise NotImplementedError - - obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) - - if isinstance(state_space, spaces.Dict): - assert isinstance(obs_inputs, dict) - assert isinstance(current_layer, dict) - self.input_dict = obs_inputs - obs_inputs = list(obs_inputs.values()) - current_layer = tl.layers.Concat(-1)(list(current_layer.values())) - - with tf.name_scope('MLP'): - for i, dim in enumerate(hidden_dim_list): - current_layer = Dense(n_units=dim, act=activation, W_init=w_init, name='hidden_layer%d' % (i + 1))(current_layer) - - with tf.name_scope('Output'): - outputs = Dense(n_units=self._action_shape[0], act=output_activation, W_init=w_init, name='outputs')(current_layer) - - if isinstance(self._action_space, spaces.Discrete): - outputs = tl.layers.Lambda(lambda x: tf.argmax(tf.nn.softmax(x), axis=-1))(outputs) - elif isinstance(self._action_space, spaces.Box): - outputs = tl.layers.Lambda(lambda x: x * self._action_scale + self._action_mean)(outputs) - outputs = tl.layers.Lambda(lambda x: tf.clip_by_value(x, self._action_space.low, - self._action_space.high))(outputs) - - # make model - super().__init__(inputs=obs_inputs, outputs=outputs, name=name) - print('Policy network created') - if trainable: - self.train() - else: - self.eval() - - def __call__(self, states, *args, **kwargs): - if isinstance(self._state_space, spaces.Dict): - states = np.array(states).transpose([1, 0]).tolist() - else: - if np.shape(states)[1:] != self.state_shape: - raise ValueError( - 'Input state shape error. shape can be {} but your shape is {}'.format((None,) + self.state_shape, - np.shape(states))) - states = np.array(states, dtype=np.float32) - return super().__call__(states, *args, **kwargs) - - def random_sample(self): - """ generate random actions for exploration """ - - if isinstance(self._action_space, spaces.Discrete): - return np.random.choice(self._action_space.n, 1)[0] - else: - return np.random.uniform(self._action_space.low, self._action_space.high, self._action_shape) - - @property - def state_space(self): - return copy.deepcopy(self._state_space) - - @property - def action_space(self): - return copy.deepcopy(self._action_space) - - @property - def state_shape(self): - return copy.deepcopy(self._state_shape) - - @property - def action_shape(self): - return copy.deepcopy(self._action_shape) - - -class StochasticPolicyNetwork(Model): - def __init__(self, state_space, action_space, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), - activation=tf.nn.relu, output_activation=tf.nn.tanh, log_std_min=-20, log_std_max=2, trainable=True, - name=None, state_conditioned=False): - """ - Stochastic continuous/discrete policy network with multiple fully-connected layers - - :param state_space: (gym.spaces) space of the state from gym environments - :param action_space: (gym.spaces) space of the action from gym environments - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) weights initialization - :param activation: (callable) activation function - :param output_activation: (callable or None) output activation function - :param log_std_min: (float) lower bound of standard deviation of action - :param log_std_max: (float) upper bound of standard deviation of action - :param trainable: (bool) set training and evaluation mode - - Tips: We recommend to use tf.nn.tanh for output_activation, especially for continuous action space, \ - to ensure the final action range is exactly the same as declared in action space after action normalization. - """ - self._state_space, self._action_space = state_space, action_space - - if isinstance(self._action_space, spaces.Discrete): - self._action_shape = self._action_space.n, - self.policy_dist = make_dist(self._action_space) # create action distribution - elif isinstance(self._action_space, spaces.Box): # normalize action - assert len(self._action_space.shape) == 1 - self._action_shape = self._action_space.shape - - assert all(self._action_space.low < self._action_space.high) - action_bounds = [self._action_space.low, self._action_space.high] - self._action_mean = np.mean(action_bounds, 0) - self._action_scale = action_bounds[1] - self._action_mean - - self.policy_dist = make_dist(self._action_space) # create action distribution - self.policy_dist.action_mean = self._action_mean - self.policy_dist.action_scale = self._action_scale - else: - raise NotImplementedError - - self._state_conditioned = state_conditioned - - obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) - - # build structure - if isinstance(state_space, spaces.Dict): - assert isinstance(obs_inputs, dict) - assert isinstance(current_layer, dict) - self.input_dict = obs_inputs - obs_inputs = list(obs_inputs.values()) - current_layer = tl.layers.Concat(-1)(list(current_layer.values())) - - with tf.name_scope('MLP'): - for i, dim in enumerate(hidden_dim_list): - current_layer = Dense(n_units=dim, act=activation, - W_init=w_init, name='hidden_layer%d' % (i + 1))(current_layer) - - with tf.name_scope('Output'): - if isinstance(action_space, spaces.Discrete): - outputs = Dense(n_units=self.policy_dist.ndim, act=output_activation, W_init=w_init)(current_layer) - elif isinstance(action_space, spaces.Box): - mu = Dense(n_units=self.policy_dist.ndim, act=output_activation, W_init=w_init)(current_layer) - - if self._state_conditioned: - log_sigma = Dense(n_units=self.policy_dist.ndim, act=None, W_init=w_init)(current_layer) - log_sigma = tl.layers.Lambda(lambda x: tf.clip_by_value(x, log_std_min, log_std_max))(log_sigma) - outputs = [mu, log_sigma] - else: - outputs = mu - self._log_sigma = tf.Variable(np.zeros(self.policy_dist.ndim, dtype=np.float32)) - else: - raise NotImplementedError - - # make model - super().__init__(inputs=obs_inputs, outputs=outputs, name=name) - if isinstance(self._action_space, spaces.Box) and not self._state_conditioned: - self.trainable_weights.append(self._log_sigma) - - if trainable: - self.train() - else: - self.eval() - - def __call__(self, states, *args, greedy=False, **kwargs): - if isinstance(self._state_space, spaces.Dict): - states = np.array(states).transpose([1, 0]).tolist() - else: - if np.shape(states)[1:] != self.state_shape: - raise ValueError( - 'Input state shape error. Shape should be {} but your shape is {}'.format((None,) + self.state_shape, - np.shape(states))) - states = np.array(states, dtype=np.float32) - params = super().__call__(states, *args, **kwargs) - if isinstance(self._action_space, spaces.Box) and not self._state_conditioned: - params = params, self._log_sigma - self.policy_dist.set_param(params) - if greedy: - result = self.policy_dist.greedy_sample() - else: - result = self.policy_dist.sample() - - if isinstance(self._action_space, spaces.Box): # normalize action - if greedy: - result = result * self._action_scale + self._action_mean - else: - result, explore = result - result = result * self._action_scale + self._action_mean + explore - - result = tf.clip_by_value(result, self._action_space.low, self._action_space.high) - return result - - def random_sample(self): - """ generate random actions for exploration """ - - if isinstance(self._action_space, spaces.Discrete): - return np.random.choice(self._action_space.n, 1)[0] - else: - return np.random.uniform(self._action_space.low, self._action_space.high, self._action_shape) - - @property - def state_space(self): - return copy.deepcopy(self._state_space) - - @property - def action_space(self): - return copy.deepcopy(self._action_space) - - @property - def state_shape(self): - return copy.deepcopy(self._state_shape) - - @property - def action_shape(self): - return copy.deepcopy(self._action_shape) - - -if __name__ == '__main__': - import gym - from rlzoo.common.env_wrappers import * - from rlzoo.common.value_networks import * - EnvName = 'CartPole-v0' - EnvType = 'classic_control' - - # env = build_env(EnvName, EnvType, state_type='vision', nenv=2) - # env = build_env(EnvName, EnvType, state_type='vision') - env = build_env(EnvName, EnvType) - s = env.reset() - print(s) - - # policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, [64, 64]) - policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, [64, 64]) - a = policy_net([s, s]) - print(a) +""" +Functions for utilization. + +# Requirements +tensorflow==2.0.0a0 +tensorlayer==2.0.1 + +""" +import copy +import numpy as np +import tensorlayer as tl +from tensorlayer.models import Model + +from rlzoo.common.basic_nets import * +from rlzoo.common.distributions import make_dist + + +class StochasticContinuousPolicyNetwork(Model): + def __init__(self, state_shape, action_shape, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), + activation=tf.nn.relu, output_activation=None, log_std_min=-20, log_std_max=2, trainable=True): + """ + Stochastic continuous policy network with multiple fully-connected layers or convolutional layers (according to state shape) + + :param state_shape: (tuple[int]) shape of the state, for example, (state_dim, ) for single-dimensional state + :param action_shape: (tuple[int]) shape of the action, for example, (action_dim, ) for single-dimensional action + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) weights initialization + :param activation: (callable) activation function + :param output_activation: (callable or None) output activation function + :param log_std_min: (float) lower bound of standard deviation of action + :param log_std_max: (float) upper bound of standard deviation of action + :param trainable: (bool) set training and evaluation mode + """ + + action_dim = action_shape[0] + if len(state_shape) == 1: + with tf.name_scope('MLP'): + state_dim = state_shape[0] + inputs, l = MLP(state_dim, hidden_dim_list, w_init, activation) + else: + with tf.name_scope('CNN'): + inputs, l = CNN(state_shape, conv_kwargs=None) + with tf.name_scope('Output_Mean'): + mean_linear = Dense(n_units=action_dim, act=output_activation, W_init=w_init)(l) + with tf.name_scope('Output_Std'): + log_std_linear = Dense(n_units=action_dim, act=output_activation, W_init=w_init)(l) + log_std_linear = tl.layers.Lambda(lambda x: tf.clip_by_value(x, log_std_min, log_std_max), name='Lambda')( + log_std_linear) + + super().__init__(inputs=inputs, outputs=[mean_linear, log_std_linear]) + if trainable: + self.train() + else: + self.eval() + + +class DeterministicContinuousPolicyNetwork(Model): + def __init__(self, state_shape, action_shape, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), \ + activation=tf.nn.relu, output_activation=tf.nn.tanh, trainable=True): + """ + Deterministic continuous policy network with multiple fully-connected layers or convolutional layers (according to state shape) + + :param state_shape: (tuple[int]) shape of the state, for example, (state_dim, ) for single-dimensional state + :param action_shape: (tuple[int]) shape of the action, for example, (action_dim, ) for single-dimensional action + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) weights initialization + :param activation: (callable) activation function + :param output_activation: (callable or None) output activation function + :param trainable: (bool) set training and evaluation mode + """ + + action_dim = action_shape[0] + + if len(state_shape) == 1: + with tf.name_scope('MLP'): + state_dim = state_shape[0] + inputs, l = MLP(state_dim, hidden_dim_list, w_init, activation) + else: + with tf.name_scope('CNN'): + inputs, l = CNN(state_shape, conv_kwargs=None) + + with tf.name_scope('Output'): + outputs = Dense(n_units=action_dim, act=output_activation, W_init=w_init)(l) + + super().__init__(inputs=inputs, outputs=outputs) + if trainable: + self.train() + else: + self.eval() + + +class DeterministicPolicyNetwork(Model): + def __init__(self, state_space, action_space, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), + activation=tf.nn.relu, output_activation=tf.nn.tanh, trainable=True, name=None): + """ + Deterministic continuous/discrete policy network with multiple fully-connected layers + + :param state_space: (gym.spaces) space of the state from gym environments + :param action_space: (gym.spaces) space of the action from gym environments + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) weights initialization + :param activation: (callable) activation function + :param output_activation: (callable or None) output activation function + :param trainable: (bool) set training and evaluation mode + """ + self._state_space, self._action_space = state_space, action_space + + if isinstance(self._action_space, spaces.Discrete): + self._action_shape = self._action_space.n, + + elif isinstance(self._action_space, spaces.Box): + assert len(self._action_space.shape) == 1 + self._action_shape = self._action_space.shape + + assert all(self._action_space.low < self._action_space.high) + action_bounds = [self._action_space.low, self._action_space.high] + self._action_mean = np.mean(action_bounds, 0) + self._action_scale = action_bounds[1] - self._action_mean + else: + raise NotImplementedError + + obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) + + if isinstance(state_space, spaces.Dict): + assert isinstance(obs_inputs, dict) + assert isinstance(current_layer, dict) + self.input_dict = obs_inputs + obs_inputs = list(obs_inputs.values()) + current_layer = tl.layers.Concat(-1)(list(current_layer.values())) + + with tf.name_scope('MLP'): + for i, dim in enumerate(hidden_dim_list): + current_layer = Dense(n_units=dim, act=activation, W_init=w_init, name='hidden_layer%d' % (i + 1))(current_layer) + + with tf.name_scope('Output'): + outputs = Dense(n_units=self._action_shape[0], act=output_activation, W_init=w_init, name='outputs')(current_layer) + + if isinstance(self._action_space, spaces.Discrete): + outputs = tl.layers.Lambda(lambda x: tf.argmax(tf.nn.softmax(x), axis=-1))(outputs) + elif isinstance(self._action_space, spaces.Box): + outputs = tl.layers.Lambda(lambda x: x * self._action_scale + self._action_mean)(outputs) + outputs = tl.layers.Lambda(lambda x: tf.clip_by_value(x, self._action_space.low, + self._action_space.high))(outputs) + + # make model + super().__init__(inputs=obs_inputs, outputs=outputs, name=name) + print('Policy network created') + if trainable: + self.train() + else: + self.eval() + + def __call__(self, states, *args, **kwargs): + if isinstance(self._state_space, spaces.Dict): + states = np.array(states).transpose([1, 0]).tolist() + else: + if np.shape(states)[1:] != self.state_shape: + raise ValueError( + 'Input state shape error. shape can be {} but your shape is {}'.format((None,) + self.state_shape, + np.shape(states))) + states = np.array(states, dtype=np.float32) + return super().__call__(states, *args, **kwargs) + + def random_sample(self): + """ generate random actions for exploration """ + + if isinstance(self._action_space, spaces.Discrete): + return np.random.choice(self._action_space.n, 1)[0] + else: + return np.random.uniform(self._action_space.low, self._action_space.high, self._action_shape) + + @property + def state_space(self): + return copy.deepcopy(self._state_space) + + @property + def action_space(self): + return copy.deepcopy(self._action_space) + + @property + def state_shape(self): + return copy.deepcopy(self._state_shape) + + @property + def action_shape(self): + return copy.deepcopy(self._action_shape) + + +class StochasticPolicyNetwork(Model): + def __init__(self, state_space, action_space, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), + activation=tf.nn.relu, output_activation=tf.nn.tanh, log_std_min=-20, log_std_max=2, trainable=True, + name=None, state_conditioned=False): + """ + Stochastic continuous/discrete policy network with multiple fully-connected layers + + :param state_space: (gym.spaces) space of the state from gym environments + :param action_space: (gym.spaces) space of the action from gym environments + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) weights initialization + :param activation: (callable) activation function + :param output_activation: (callable or None) output activation function + :param log_std_min: (float) lower bound of standard deviation of action + :param log_std_max: (float) upper bound of standard deviation of action + :param trainable: (bool) set training and evaluation mode + + Tips: We recommend to use tf.nn.tanh for output_activation, especially for continuous action space, \ + to ensure the final action range is exactly the same as declared in action space after action normalization. + """ + self._state_space, self._action_space = state_space, action_space + + if isinstance(self._action_space, spaces.Discrete): + self._action_shape = self._action_space.n, + self.policy_dist = make_dist(self._action_space) # create action distribution + elif isinstance(self._action_space, spaces.Box): # normalize action + assert len(self._action_space.shape) == 1 + self._action_shape = self._action_space.shape + + assert all(self._action_space.low < self._action_space.high) + action_bounds = [self._action_space.low, self._action_space.high] + self._action_mean = np.mean(action_bounds, 0) + self._action_scale = action_bounds[1] - self._action_mean + + self.policy_dist = make_dist(self._action_space) # create action distribution + self.policy_dist.action_mean = self._action_mean + self.policy_dist.action_scale = self._action_scale + else: + raise NotImplementedError + + self._state_conditioned = state_conditioned + + obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) + + # build structure + if isinstance(state_space, spaces.Dict): + assert isinstance(obs_inputs, dict) + assert isinstance(current_layer, dict) + self.input_dict = obs_inputs + obs_inputs = list(obs_inputs.values()) + current_layer = tl.layers.Concat(-1)(list(current_layer.values())) + + with tf.name_scope('MLP'): + for i, dim in enumerate(hidden_dim_list): + current_layer = Dense(n_units=dim, act=activation, + W_init=w_init, name='hidden_layer%d' % (i + 1))(current_layer) + + with tf.name_scope('Output'): + if isinstance(action_space, spaces.Discrete): + outputs = Dense(n_units=self.policy_dist.ndim, act=output_activation, W_init=w_init)(current_layer) + elif isinstance(action_space, spaces.Box): + mu = Dense(n_units=self.policy_dist.ndim, act=output_activation, W_init=w_init)(current_layer) + + if self._state_conditioned: + log_sigma = Dense(n_units=self.policy_dist.ndim, act=None, W_init=w_init)(current_layer) + log_sigma = tl.layers.Lambda(lambda x: tf.clip_by_value(x, log_std_min, log_std_max))(log_sigma) + outputs = [mu, log_sigma] + else: + outputs = mu + self._log_sigma = tf.Variable(np.zeros(self.policy_dist.ndim, dtype=np.float32)) + else: + raise NotImplementedError + + # make model + super().__init__(inputs=obs_inputs, outputs=outputs, name=name) + if isinstance(self._action_space, spaces.Box) and not self._state_conditioned: + self.trainable_weights.append(self._log_sigma) + + if trainable: + self.train() + else: + self.eval() + + def __call__(self, states, *args, greedy=False, **kwargs): + if isinstance(self._state_space, spaces.Dict): + states = np.array(states).transpose([1, 0]).tolist() + else: + if np.shape(states)[1:] != self.state_shape: + raise ValueError( + 'Input state shape error. Shape should be {} but your shape is {}'.format((None,) + self.state_shape, + np.shape(states))) + states = np.array(states, dtype=np.float32) + params = super().__call__(states, *args, **kwargs) + if isinstance(self._action_space, spaces.Box) and not self._state_conditioned: + params = params, self._log_sigma + self.policy_dist.set_param(params) + if greedy: + result = self.policy_dist.greedy_sample() + else: + result = self.policy_dist.sample() + + if isinstance(self._action_space, spaces.Box): # normalize action + if greedy: + result = result * self._action_scale + self._action_mean + else: + result, explore = result + result = result * self._action_scale + self._action_mean + explore + + result = tf.clip_by_value(result, self._action_space.low, self._action_space.high) + return result + + def random_sample(self): + """ generate random actions for exploration """ + + if isinstance(self._action_space, spaces.Discrete): + return np.random.choice(self._action_space.n, 1)[0] + else: + return np.random.uniform(self._action_space.low, self._action_space.high, self._action_shape) + + @property + def state_space(self): + return copy.deepcopy(self._state_space) + + @property + def action_space(self): + return copy.deepcopy(self._action_space) + + @property + def state_shape(self): + return copy.deepcopy(self._state_shape) + + @property + def action_shape(self): + return copy.deepcopy(self._action_shape) + + +if __name__ == '__main__': + import gym + from rlzoo.common.env_wrappers import * + from rlzoo.common.value_networks import * + # EnvName = 'PongNoFrameskip-v4' + # EnvName = 'Pong-v4' + # EnvType = 'atari' + + EnvName = 'CartPole-v0' + # EnvName = 'Pendulum-v0' + EnvType = 'classic_control' + + # EnvName = 'BipedalWalker-v2' + # EnvType = 'box2d' + + # EnvName = 'Ant-v2' + # EnvType = 'mujoco' + + # EnvName = 'FetchPush-v1' + # EnvType = 'robotics' + + # EnvName = 'FishSwim-v0' + # EnvType = 'dm_control' + + # EnvName = 'ReachTarget' + # EnvType = 'rlbench' + # env = build_env(EnvName, EnvType, nenv=2) + + # env = build_env(EnvName, EnvType, state_type='vision', nenv=2) + # env = build_env(EnvName, EnvType, state_type='vision') + env = build_env(EnvName, EnvType) + s = env.reset() + print(s) + + # policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, [64, 64]) + policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, [64, 64]) + a = policy_net([s, s]) + print(a) + # q_net = QNetwork(env.observation_space, env.action_space, [64, 64], state_only=False, dueling=False) + # q = q_net([[s], a]) + print('-'*100) + # print(q) diff --git a/rlzoo/common/utils.py b/rlzoo/common/utils.py old mode 100644 new mode 100755 index 9b1ad7b..fc68c4d --- a/rlzoo/common/utils.py +++ b/rlzoo/common/utils.py @@ -1,145 +1,145 @@ -""" -Functions for utilization. - -# Requirements -tensorflow==2.0.0a0 -tensorlayer==2.0.1 - -""" -import os -import re - -import gym -import matplotlib.pyplot as plt -import numpy as np -import tensorlayer as tl -import tensorflow as tf -from importlib import import_module - - -def plot(episode_rewards, algorithm_name, env_name): - """ - plot the learning curve, saved as ./img/algorithm_name-env_name.png - - :param episode_rewards: array of floats - :param algorithm_name: string - :param env_name: string - """ - path = os.path.join('.', 'img') - name = algorithm_name + '-' + env_name - plt.figure(figsize=(10, 5)) - plt.title(name) - plt.plot(np.arange(len(episode_rewards)), episode_rewards) - plt.xlabel('Episode') - plt.ylabel('Episode Reward') - if not os.path.exists(path): - os.makedirs(path) - plt.savefig(os.path.join(path, name + '.png')) - plt.close() - - -def plot_save_log(episode_rewards, algorithm_name, env_name): - """ - plot the learning curve, saved as ./img/algorithm_name-env_name.png, - and save the rewards log as ./log/algorithm_name-env_name.npy - - :param episode_rewards: array of floats - :param algorithm_name: string - :param env_name: string - """ - path = os.path.join('.', 'log') - name = algorithm_name + '-' + env_name - plot(episode_rewards, algorithm_name, env_name) - if not os.path.exists(path): - os.makedirs(path) - np.save(os.path.join(path, name), episode_rewards) - - -def save_model(model, model_name, algorithm_name, env_name): - """ - save trained neural network model - - :param model: tensorlayer.models.Model - :param model_name: string, e.g. 'model_sac_q1' - :param algorithm_name: string, e.g. 'SAC' - """ - name = algorithm_name + '-' + env_name - path = os.path.join('.', 'model', name) - if not os.path.exists(path): - os.makedirs(path) - tl.files.save_npz(model.trainable_weights, os.path.join(path, model_name)) - - -def load_model(model, model_name, algorithm_name, env_name): - """ - load saved neural network model - - :param model: tensorlayer.models.Model - :param model_name: string, e.g. 'model_sac_q1' - :param algorithm_name: string, e.g. 'SAC' - """ - name = algorithm_name + '-' + env_name - path = os.path.join('.', 'model', name) - try: - param = tl.files.load_npz(path, model_name + '.npz') - for p0, p1 in zip(model.trainable_weights, param): - p0.assign(p1) - except Exception as e: - print('Load Model Fails!') - raise e - - -def parse_all_args(parser): - """ Parse known and unknown args """ - common_options, other_args = parser.parse_known_args() - other_options = dict() - index = 0 - n = len(other_args) - float_pattern = re.compile(r'^[-+]?[-0-9]\d*\.\d*|[-+]?\.?[0-9]\d*$') - while index < n: # only str, int and float type will be parsed - if other_args[index].startswith('--'): - if other_args[index].__contains__('='): - key, value = other_args[index].split('=') - index += 1 - else: - key, value = other_args[index:index + 2] - index += 2 - if re.match(float_pattern, value): - value = float(value) - if value.is_integer(): - value = int(value) - other_options[key[2:]] = value - return common_options, other_options - - -def make_env(env_id): - env = gym.make(env_id).unwrapped - """ add env wrappers here """ - return env - - -def get_algorithm_module(algorithm, submodule): - """ Get algorithm module in the corresponding folder """ - return import_module('.'.join(['rlzoo', 'algorithms', algorithm, submodule])) - - -def call_default_params(env, envtype, alg, default_seed=True): - """ Get the default parameters for training from the default script """ - alg = alg.lower() - default = import_module('.'.join(['rlzoo', 'algorithms', alg, 'default'])) - params = getattr(default, envtype)(env, - default_seed) # need manually set seed in the main script if default_seed = False - return params - - -def set_seed(seed, env=None): - """ set random seed for reproduciblity """ - if isinstance(env, list): - assert isinstance(seed, list) - for i in range(len(env)): - env[i].seed(seed[i]) - seed = seed[0] # pick one seed for np and tf - elif env is not None: - env.seed(seed) - np.random.seed(seed) - tf.random.set_seed(seed) +""" +Functions for utilization. + +# Requirements +tensorflow==2.0.0a0 +tensorlayer==2.0.1 + +""" +import os +import re + +import gym +import matplotlib.pyplot as plt +import numpy as np +import tensorlayer as tl +import tensorflow as tf +from importlib import import_module + + +def plot(episode_rewards, algorithm_name, env_name): + """ + plot the learning curve, saved as ./img/algorithm_name-env_name.png + + :param episode_rewards: array of floats + :param algorithm_name: string + :param env_name: string + """ + path = os.path.join('.', 'img') + name = algorithm_name + '-' + env_name + plt.figure(figsize=(10, 5)) + plt.title(name) + plt.plot(np.arange(len(episode_rewards)), episode_rewards) + plt.xlabel('Episode') + plt.ylabel('Episode Reward') + if not os.path.exists(path): + os.makedirs(path) + plt.savefig(os.path.join(path, name + '.png')) + plt.close() + + +def plot_save_log(episode_rewards, algorithm_name, env_name): + """ + plot the learning curve, saved as ./img/algorithm_name-env_name.png, + and save the rewards log as ./log/algorithm_name-env_name.npy + + :param episode_rewards: array of floats + :param algorithm_name: string + :param env_name: string + """ + path = os.path.join('.', 'log') + name = algorithm_name + '-' + env_name + plot(episode_rewards, algorithm_name, env_name) + if not os.path.exists(path): + os.makedirs(path) + np.save(os.path.join(path, name), episode_rewards) + + +def save_model(model, model_name, algorithm_name, env_name): + """ + save trained neural network model + + :param model: tensorlayer.models.Model + :param model_name: string, e.g. 'model_sac_q1' + :param algorithm_name: string, e.g. 'SAC' + """ + name = algorithm_name + '-' + env_name + path = os.path.join('.', 'model', name) + if not os.path.exists(path): + os.makedirs(path) + tl.files.save_npz(model.trainable_weights, os.path.join(path, model_name)) + + +def load_model(model, model_name, algorithm_name, env_name): + """ + load saved neural network model + + :param model: tensorlayer.models.Model + :param model_name: string, e.g. 'model_sac_q1' + :param algorithm_name: string, e.g. 'SAC' + """ + name = algorithm_name + '-' + env_name + path = os.path.join('.', 'model', name) + try: + param = tl.files.load_npz(path, model_name + '.npz') + for p0, p1 in zip(model.trainable_weights, param): + p0.assign(p1) + except Exception as e: + print('Load Model Fails!') + raise e + + +def parse_all_args(parser): + """ Parse known and unknown args """ + common_options, other_args = parser.parse_known_args() + other_options = dict() + index = 0 + n = len(other_args) + float_pattern = re.compile(r'^[-+]?[-0-9]\d*\.\d*|[-+]?\.?[0-9]\d*$') + while index < n: # only str, int and float type will be parsed + if other_args[index].startswith('--'): + if other_args[index].__contains__('='): + key, value = other_args[index].split('=') + index += 1 + else: + key, value = other_args[index:index + 2] + index += 2 + if re.match(float_pattern, value): + value = float(value) + if value.is_integer(): + value = int(value) + other_options[key[2:]] = value + return common_options, other_options + + +def make_env(env_id): + env = gym.make(env_id).unwrapped + """ add env wrappers here """ + return env + + +def get_algorithm_module(algorithm, submodule): + """ Get algorithm module in the corresponding folder """ + return import_module('.'.join(['rlzoo', 'algorithms', algorithm, submodule])) + + +def call_default_params(env, envtype, alg, default_seed=True): + """ Get the default parameters for training from the default script """ + alg = alg.lower() + default = import_module('.'.join(['rlzoo', 'algorithms', alg, 'default'])) + params = getattr(default, envtype)(env, + default_seed) # need manually set seed in the main script if default_seed = False + return params + + +def set_seed(seed, env=None): + """ set random seed for reproduciblity """ + if isinstance(env, list): + assert isinstance(seed, list) + for i in range(len(env)): + env[i].seed(seed[i]) + seed = seed[0] # pick one seed for np and tf + elif env is not None: + env.seed(seed) + np.random.seed(seed) + tf.random.set_seed(seed) diff --git a/rlzoo/common/value_networks.py b/rlzoo/common/value_networks.py old mode 100644 new mode 100755 index 2e787c9..c73dde6 --- a/rlzoo/common/value_networks.py +++ b/rlzoo/common/value_networks.py @@ -1,386 +1,386 @@ -""" -Functions for utilization. - -# Requirements -tensorflow==2.0.0a0 -tensorlayer==2.0.1 - -""" -import copy - -import numpy as np -import tensorlayer as tl -from tensorlayer.layers import BatchNorm, Dense, Input -from tensorlayer.models import Model - -from rlzoo.common.basic_nets import * - - -class ValueNetwork(Model): - def __init__(self, state_space, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), - activation=tf.nn.relu, output_activation=None, trainable=True, name=None): - """ - Value network with multiple fully-connected layers or convolutional layers (according to state shape) - - :param state_space: (gym.spaces) space of the state from gym environments - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) weights initialization - :param activation: (callable) activation function - :param output_activation: (callable or None) output activation function - :param trainable: (bool) set training and evaluation mode - """ - self._state_space = state_space - - obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) - - if isinstance(state_space, spaces.Dict): - assert isinstance(obs_inputs, OrderedDict) - assert isinstance(current_layer, OrderedDict) - self.input_dict = obs_inputs - obs_inputs = list(obs_inputs.values()) - current_layer = tl.layers.Concat(-1)(list(current_layer.values())) - - with tf.name_scope('MLP'): - for i, dim in enumerate(hidden_dim_list): - current_layer = Dense(n_units=dim, act=activation, W_init=w_init, name='hidden_layer%d' % (i + 1))( - current_layer) - - with tf.name_scope('Output'): - outputs = Dense(n_units=1, act=output_activation, W_init=w_init)(current_layer) - - super().__init__(inputs=obs_inputs, outputs=outputs, name=name) - if trainable: - self.train() - else: - self.eval() - - def __call__(self, states, *args, **kwargs): - if isinstance(self._state_space, spaces.Dict): - states = np.array(states).transpose([1, 0]).tolist() - else: - if np.shape(states)[1:] != self.state_shape: - raise ValueError( - 'Input state shape error. Shape can be {} but your shape is {}'.format((None,) + self.state_shape, - np.shape(states))) - states = np.array(states, dtype=np.float32) - return super().__call__(states, *args, **kwargs) - - @property - def state_space(self): - return copy.deepcopy(self._state_space) - - @property - def state_shape(self): - return copy.deepcopy(self._state_shape) - - -class MlpQNetwork(Model): - def __init__(self, state_shape, action_shape, hidden_dim_list, \ - w_init=tf.keras.initializers.glorot_normal(), activation=tf.nn.relu, output_activation=None, - trainable=True): - """ - Q-value network with multiple fully-connected layers - - Inputs: (state tensor, action tensor) - - :param state_shape: (tuple[int]) shape of the state, for example, (state_dim, ) for single-dimensional state - :param action_shape: (tuple[int]) shape of the action, for example, (action_dim, ) for single-dimensional action - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) weights initialization - :param activation: (callable) activation function - :param output_activation: (callable or None) output activation function - :param trainable: (bool) set training and evaluation mode - """ - - input_shape = tuple(map(sum, zip(action_shape, state_shape))) - input_dim = input_shape[0] - - assert len(state_shape) == 1 - with tf.name_scope('MLP'): - inputs, l = MLP(input_dim, hidden_dim_list, w_init, activation) - - with tf.name_scope('Output'): - outputs = Dense(n_units=1, act=output_activation, W_init=w_init)(l) - - super().__init__(inputs=inputs, outputs=outputs) - if trainable: - self.train() - else: - self.eval() - - -class QNetwork(Model): - def __init__(self, state_space, action_space, hidden_dim_list, - w_init=tf.keras.initializers.glorot_normal(), activation=tf.nn.relu, output_activation=None, - trainable=True, name=None, state_only=False, dueling=False): - """ Q-value network with multiple fully-connected layers or convolutional layers (according to state shape) - - :param state_space: (gym.spaces) space of the state from gym environments - :param action_space: (gym.spaces) space of the action from gym environments - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) weights initialization - :param activation: (callable) activation function - :param output_activation: (callable or None) output activation function - :param trainable: (bool) set training and evaluation mode - :param name: (str) name the model - :param state_only: (bool) only input state or not, available in discrete action space - :param dueling: (bool) whether use the dueling output or not, available in discrete action space - """ - self._state_space, self._action_space = state_space, action_space - self.state_only = state_only - self.dueling = dueling - - # create state input layer - obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) - - # create action input layer - if isinstance(self._action_space, spaces.Discrete): - self._action_shape = self._action_space.n, - if not self.state_only: - act_inputs = Input((None,), name='Act_Input_Layer', dtype=tf.int64) - elif isinstance(self._action_space, spaces.Box): - self._action_shape = self._action_space.shape - assert len(self._action_shape) == 1 - act_inputs = Input((None,) + self._action_shape, name='Act_Input_Layer') - else: - raise NotImplementedError - - # concat multi-head state - if isinstance(state_space, spaces.Dict): - assert isinstance(obs_inputs, dict) - assert isinstance(current_layer, dict) - self.input_dict = obs_inputs - obs_inputs = list(obs_inputs.values()) - current_layer = tl.layers.Concat(-1)(list(current_layer.values())) - - if isinstance(self._action_space, spaces.Box): - current_layer = tl.layers.Concat(-1)([current_layer, act_inputs]) - - with tf.name_scope('QNet_MLP'): - for i, dim in enumerate(hidden_dim_list): - current_layer = Dense(n_units=dim, act=activation, W_init=w_init, - name='mlp_hidden_layer%d' % (i + 1))(current_layer) - - with tf.name_scope('Outputs'): - if isinstance(self._action_space, spaces.Discrete): - if self.dueling: - v = Dense(1, None, tf.initializers.Orthogonal(1.0))(current_layer) - q = Dense(n_units=self._action_shape[0], act=output_activation, W_init=w_init)( - current_layer) - mean_q = tl.layers.Lambda(lambda x: tf.reduce_mean(x, 1, True))(q) - current_layer = tl.layers.Lambda(lambda x: x[0] + x[1] - x[2])((v, q, mean_q)) - else: - current_layer = Dense(n_units=self._action_shape[0], act=output_activation, W_init=w_init)( - current_layer) - - if not self.state_only: - act_one_hot = tl.layers.OneHot(depth=self._action_shape[0], axis=1)( - act_inputs) # discrete action choice to one-hot vector - outputs = tl.layers.Lambda( - lambda x: tf.reduce_sum(tf.reduce_prod(x, axis=0), axis=1))((current_layer, act_one_hot)) - else: - outputs = current_layer - - elif isinstance(self._action_space, spaces.Box): - outputs = Dense(n_units=1, act=output_activation, W_init=w_init)(current_layer) - else: - raise ValueError("State Shape Not Accepted!") - - if isinstance(state_space, spaces.Dict): - if self.state_only: - super().__init__(inputs=obs_inputs, outputs=outputs, name=name) - else: - super().__init__(inputs=obs_inputs + [act_inputs], outputs=outputs, name=name) - else: - if self.state_only: - super().__init__(inputs=obs_inputs, outputs=outputs, name=name) - else: - super().__init__(inputs=[obs_inputs, act_inputs], outputs=outputs, name=name) - if trainable: - self.train() - else: - self.eval() - - def __call__(self, inputs, *args, **kwargs): - if self.state_only: - states = inputs - else: - states, actions = inputs - - # states and actions must have the same length - if not self.state_only and len(states) != len(actions): - raise ValueError( - 'Length of states and actions not match. States length is {} but actions length is {}'.format( - len(states), - len(actions))) - - if isinstance(self._state_space, spaces.Dict): - states = np.array(states).transpose([1, 0]).tolist() # batch states to multi-head - ssv = list(self._state_shape.values()) - # check state shape - for i, each_head in enumerate(states): - if np.shape(each_head)[1:] != ssv[i]: - raise ValueError('Input state shape error.') - - else: - if np.shape(states)[1:] != self.state_shape: - raise ValueError( - 'Input state shape error. Shape can be {} but your shape is {}'.format((None,) + self.state_shape, - np.shape(states))) - states = np.array(states, dtype=np.float32) - - if not self.state_only: - if isinstance(self._action_space, spaces.Discrete) and np.any(actions % 1): - raise ValueError('Input float actions in discrete action space') - if isinstance(self._action_space, spaces.Discrete): - actions = tf.convert_to_tensor(actions, dtype=tf.int64) - elif isinstance(self._action_space, spaces.Box): - actions = tf.convert_to_tensor(actions, dtype=tf.float32) - if isinstance(self._state_space, spaces.Dict): - return super().__call__(states + [actions], *args, **kwargs) - else: - return super().__call__([states, actions], *args, **kwargs) - else: - return super().__call__(states, *args, **kwargs) - - - @property - def state_space(self): - return copy.deepcopy(self._state_space) - - @property - def action_space(self): - return copy.deepcopy(self._action_space) - - @property - def state_shape(self): - return copy.deepcopy(self._state_shape) - - @property - def action_shape(self): - return copy.deepcopy(self._action_shape) - - -class NAFLayer(tl.layers.Layer): - def __init__(self, action_dim, name=None): - super(NAFLayer, self).__init__(name) - self.action_dim = action_dim - - def forward(self, inputs): - L, u, mu, value = inputs - pivot = 0 - rows = [] - for idx in range(self.action_dim): - offset = self.action_dim - idx - diag = tf.exp(tf.slice(L, (0, pivot), (-1, 1))) - nondiag = tf.slice(L, (0, pivot + 1), (-1, offset - 1)) - row = tf.pad(tf.concat([diag, nondiag], 1), ((0, 0), (idx, 0))) - pivot += offset - rows.append(row) - L_T = tf.stack(rows, axis=1) - P = tf.matmul(tf.transpose(L_T, (0, 2, 1)), L_T) # L L^T - temp = tf.expand_dims(u - mu, -1) - adv = tf.squeeze(-0.5 * tf.matmul(tf.transpose(temp, [0, 2, 1]), tf.matmul(P, temp)), -1) - return adv + value - - def build(self, inputs_shape=None): - pass - - -class NAFQNetwork(Model): - def __init__(self, state_space, action_space, hidden_dim_list, - w_init=tf.keras.initializers.glorot_normal(), activation=tf.nn.tanh, trainable=True, name=None): - """ NAF Q-value network with multiple fully-connected layers - - :param state_space: (gym.spaces) space of the state from gym environments - :param action_space: (gym.spaces) space of the action from gym environments - :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers - :param w_init: (callable) weights initialization - :param activation: (callable) activation function - :param trainable: (bool) set training and evaluation mode - :param name: (str) name the model - """ - assert isinstance(action_space, spaces.Box) - self._state_space, self._action_space = state_space, action_space - self._action_shape = self._action_space.shape - assert len(self._action_shape) == 1 - act_inputs = Input((None,) + self._action_shape, name='Act_Input_Layer') - - # create state input layer - obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) - - # concat multi-head state - if isinstance(state_space, spaces.Dict): - assert isinstance(obs_inputs, dict) - assert isinstance(current_layer, dict) - self.input_dict = obs_inputs - obs_inputs = list(obs_inputs.values()) - current_layer = tl.layers.Concat(-1)(list(current_layer.values())) - - # calculate value - current_layer = BatchNorm()(current_layer) - with tf.name_scope('NAF_VALUE_MLP'): - for i, dim in enumerate(hidden_dim_list): - current_layer = Dense(n_units=dim, act=activation, W_init=w_init, - name='mlp_hidden_layer%d' % (i + 1))(current_layer) - value = Dense(n_units=1, W_init=w_init, name='naf_value_mlp_output')(current_layer) - - # calculate advantange and Q-value - dim = self._action_shape[0] - with tf.name_scope('NAF_ADVANTAGE'): - mu = Dense(n_units=dim, act=activation, W_init=w_init, name='mu')(current_layer) - L = Dense(n_units=int((dim * (dim + 1)) / 2), W_init=w_init, name='L')(current_layer) - qvalue = NAFLayer(dim)([L, act_inputs, mu, value]) - - super().__init__(inputs=[obs_inputs, act_inputs], outputs=qvalue, name=name) - if trainable: - self.train() - else: - self.eval() - - def __call__(self, inputs, *args, **kwargs): - states, actions = inputs - - # states and actions must have the same length - if len(states) != len(actions): - raise ValueError( - 'Length of states and actions not match. States length is {} but actions length is {}'.format( - len(states), - len(actions))) - - if isinstance(self._state_space, spaces.Dict): - states = np.array(states).transpose([1, 0]).tolist() # batch states to multi-head - ssv = list(self._state_shape.values()) - # check state shape - for i, each_head in enumerate(states): - if np.shape(each_head)[1:] != ssv[i]: - raise ValueError('Input state shape error.') - - else: - if np.shape(states)[1:] != self.state_shape: - raise ValueError( - 'Input state shape error. Shape can be {} but your shape is {}'.format((None,) + self.state_shape, - np.shape(states))) - states = np.array(states, dtype=np.float32) - - actions = tf.convert_to_tensor(actions, dtype=tf.float32) - if isinstance(self._state_space, spaces.Dict): - return super().__call__(states + [actions], *args, **kwargs) - else: - return super().__call__([states, actions], *args, **kwargs) - - @property - def state_space(self): - return copy.deepcopy(self._state_space) - - @property - def action_space(self): - return copy.deepcopy(self._action_space) - - @property - def state_shape(self): - return copy.deepcopy(self._state_shape) - - @property - def action_shape(self): - return copy.deepcopy(self._action_shape) +""" +Functions for utilization. + +# Requirements +tensorflow==2.0.0a0 +tensorlayer==2.0.1 + +""" +import copy + +import numpy as np +import tensorlayer as tl +from tensorlayer.layers import BatchNorm, Dense, Input +from tensorlayer.models import Model + +from rlzoo.common.basic_nets import * + + +class ValueNetwork(Model): + def __init__(self, state_space, hidden_dim_list, w_init=tf.keras.initializers.glorot_normal(), + activation=tf.nn.relu, output_activation=None, trainable=True, name=None): + """ + Value network with multiple fully-connected layers or convolutional layers (according to state shape) + + :param state_space: (gym.spaces) space of the state from gym environments + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) weights initialization + :param activation: (callable) activation function + :param output_activation: (callable or None) output activation function + :param trainable: (bool) set training and evaluation mode + """ + self._state_space = state_space + + obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) + + if isinstance(state_space, spaces.Dict): + assert isinstance(obs_inputs, OrderedDict) + assert isinstance(current_layer, OrderedDict) + self.input_dict = obs_inputs + obs_inputs = list(obs_inputs.values()) + current_layer = tl.layers.Concat(-1)(list(current_layer.values())) + + with tf.name_scope('MLP'): + for i, dim in enumerate(hidden_dim_list): + current_layer = Dense(n_units=dim, act=activation, W_init=w_init, name='hidden_layer%d' % (i + 1))( + current_layer) + + with tf.name_scope('Output'): + outputs = Dense(n_units=1, act=output_activation, W_init=w_init)(current_layer) + + super().__init__(inputs=obs_inputs, outputs=outputs, name=name) + if trainable: + self.train() + else: + self.eval() + + def __call__(self, states, *args, **kwargs): + if isinstance(self._state_space, spaces.Dict): + states = np.array(states).transpose([1, 0]).tolist() + else: + if np.shape(states)[1:] != self.state_shape: + raise ValueError( + 'Input state shape error. Shape can be {} but your shape is {}'.format((None,) + self.state_shape, + np.shape(states))) + states = np.array(states, dtype=np.float32) + return super().__call__(states, *args, **kwargs) + + @property + def state_space(self): + return copy.deepcopy(self._state_space) + + @property + def state_shape(self): + return copy.deepcopy(self._state_shape) + + +class MlpQNetwork(Model): + def __init__(self, state_shape, action_shape, hidden_dim_list, \ + w_init=tf.keras.initializers.glorot_normal(), activation=tf.nn.relu, output_activation=None, + trainable=True): + """ + Q-value network with multiple fully-connected layers + + Inputs: (state tensor, action tensor) + + :param state_shape: (tuple[int]) shape of the state, for example, (state_dim, ) for single-dimensional state + :param action_shape: (tuple[int]) shape of the action, for example, (action_dim, ) for single-dimensional action + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) weights initialization + :param activation: (callable) activation function + :param output_activation: (callable or None) output activation function + :param trainable: (bool) set training and evaluation mode + """ + + input_shape = tuple(map(sum, zip(action_shape, state_shape))) + input_dim = input_shape[0] + + assert len(state_shape) == 1 + with tf.name_scope('MLP'): + inputs, l = MLP(input_dim, hidden_dim_list, w_init, activation) + + with tf.name_scope('Output'): + outputs = Dense(n_units=1, act=output_activation, W_init=w_init)(l) + + super().__init__(inputs=inputs, outputs=outputs) + if trainable: + self.train() + else: + self.eval() + + +class QNetwork(Model): + def __init__(self, state_space, action_space, hidden_dim_list, + w_init=tf.keras.initializers.glorot_normal(), activation=tf.nn.relu, output_activation=None, + trainable=True, name=None, state_only=False, dueling=False): + """ Q-value network with multiple fully-connected layers or convolutional layers (according to state shape) + + :param state_space: (gym.spaces) space of the state from gym environments + :param action_space: (gym.spaces) space of the action from gym environments + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) weights initialization + :param activation: (callable) activation function + :param output_activation: (callable or None) output activation function + :param trainable: (bool) set training and evaluation mode + :param name: (str) name the model + :param state_only: (bool) only input state or not, available in discrete action space + :param dueling: (bool) whether use the dueling output or not, available in discrete action space + """ + self._state_space, self._action_space = state_space, action_space + self.state_only = state_only + self.dueling = dueling + + # create state input layer + obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) + + # create action input layer + if isinstance(self._action_space, spaces.Discrete): + self._action_shape = self._action_space.n, + if not self.state_only: + act_inputs = Input((None,), name='Act_Input_Layer', dtype=tf.int64) + elif isinstance(self._action_space, spaces.Box): + self._action_shape = self._action_space.shape + assert len(self._action_shape) == 1 + act_inputs = Input((None,) + self._action_shape, name='Act_Input_Layer') + else: + raise NotImplementedError + + # concat multi-head state + if isinstance(state_space, spaces.Dict): + assert isinstance(obs_inputs, dict) + assert isinstance(current_layer, dict) + self.input_dict = obs_inputs + obs_inputs = list(obs_inputs.values()) + current_layer = tl.layers.Concat(-1)(list(current_layer.values())) + + if isinstance(self._action_space, spaces.Box): + current_layer = tl.layers.Concat(-1)([current_layer, act_inputs]) + + with tf.name_scope('QNet_MLP'): + for i, dim in enumerate(hidden_dim_list): + current_layer = Dense(n_units=dim, act=activation, W_init=w_init, + name='mlp_hidden_layer%d' % (i + 1))(current_layer) + + with tf.name_scope('Outputs'): + if isinstance(self._action_space, spaces.Discrete): + if self.dueling: + v = Dense(1, None, tf.initializers.Orthogonal(1.0))(current_layer) + q = Dense(n_units=self._action_shape[0], act=output_activation, W_init=w_init)( + current_layer) + mean_q = tl.layers.Lambda(lambda x: tf.reduce_mean(x, 1, True))(q) + current_layer = tl.layers.Lambda(lambda x: x[0] + x[1] - x[2])((v, q, mean_q)) + else: + current_layer = Dense(n_units=self._action_shape[0], act=output_activation, W_init=w_init)( + current_layer) + + if not self.state_only: + act_one_hot = tl.layers.OneHot(depth=self._action_shape[0], axis=1)( + act_inputs) # discrete action choice to one-hot vector + outputs = tl.layers.Lambda( + lambda x: tf.reduce_sum(tf.reduce_prod(x, axis=0), axis=1))((current_layer, act_one_hot)) + else: + outputs = current_layer + + elif isinstance(self._action_space, spaces.Box): + outputs = Dense(n_units=1, act=output_activation, W_init=w_init)(current_layer) + else: + raise ValueError("State Shape Not Accepted!") + + if isinstance(state_space, spaces.Dict): + if self.state_only: + super().__init__(inputs=obs_inputs, outputs=outputs, name=name) + else: + super().__init__(inputs=obs_inputs + [act_inputs], outputs=outputs, name=name) + else: + if self.state_only: + super().__init__(inputs=obs_inputs, outputs=outputs, name=name) + else: + super().__init__(inputs=[obs_inputs, act_inputs], outputs=outputs, name=name) + if trainable: + self.train() + else: + self.eval() + + def __call__(self, inputs, *args, **kwargs): + if self.state_only: + states = inputs + else: + states, actions = inputs + + # states and actions must have the same length + if not self.state_only and len(states) != len(actions): + raise ValueError( + 'Length of states and actions not match. States length is {} but actions length is {}'.format( + len(states), + len(actions))) + + if isinstance(self._state_space, spaces.Dict): + states = np.array(states).transpose([1, 0]).tolist() # batch states to multi-head + ssv = list(self._state_shape.values()) + # check state shape + for i, each_head in enumerate(states): + if np.shape(each_head)[1:] != ssv[i]: + raise ValueError('Input state shape error.') + + else: + if np.shape(states)[1:] != self.state_shape: + raise ValueError( + 'Input state shape error. Shape can be {} but your shape is {}'.format((None,) + self.state_shape, + np.shape(states))) + states = np.array(states, dtype=np.float32) + + if not self.state_only: + if isinstance(self._action_space, spaces.Discrete) and np.any(actions % 1): + raise ValueError('Input float actions in discrete action space') + if isinstance(self._action_space, spaces.Discrete): + actions = tf.convert_to_tensor(actions, dtype=tf.int64) + elif isinstance(self._action_space, spaces.Box): + actions = tf.convert_to_tensor(actions, dtype=tf.float32) + if isinstance(self._state_space, spaces.Dict): + return super().__call__(states + [actions], *args, **kwargs) + else: + return super().__call__([states, actions], *args, **kwargs) + else: + return super().__call__(states, *args, **kwargs) + + + @property + def state_space(self): + return copy.deepcopy(self._state_space) + + @property + def action_space(self): + return copy.deepcopy(self._action_space) + + @property + def state_shape(self): + return copy.deepcopy(self._state_shape) + + @property + def action_shape(self): + return copy.deepcopy(self._action_shape) + + +class NAFLayer(tl.layers.Layer): + def __init__(self, action_dim, name=None): + super(NAFLayer, self).__init__(name) + self.action_dim = action_dim + + def forward(self, inputs): + L, u, mu, value = inputs + pivot = 0 + rows = [] + for idx in range(self.action_dim): + offset = self.action_dim - idx + diag = tf.exp(tf.slice(L, (0, pivot), (-1, 1))) + nondiag = tf.slice(L, (0, pivot + 1), (-1, offset - 1)) + row = tf.pad(tf.concat([diag, nondiag], 1), ((0, 0), (idx, 0))) + pivot += offset + rows.append(row) + L_T = tf.stack(rows, axis=1) + P = tf.matmul(tf.transpose(L_T, (0, 2, 1)), L_T) # L L^T + temp = tf.expand_dims(u - mu, -1) + adv = tf.squeeze(-0.5 * tf.matmul(tf.transpose(temp, [0, 2, 1]), tf.matmul(P, temp)), -1) + return adv + value + + def build(self, inputs_shape=None): + pass + + +class NAFQNetwork(Model): + def __init__(self, state_space, action_space, hidden_dim_list, + w_init=tf.keras.initializers.glorot_normal(), activation=tf.nn.tanh, trainable=True, name=None): + """ NAF Q-value network with multiple fully-connected layers + + :param state_space: (gym.spaces) space of the state from gym environments + :param action_space: (gym.spaces) space of the action from gym environments + :param hidden_dim_list: (list[int]) a list of dimensions of hidden layers + :param w_init: (callable) weights initialization + :param activation: (callable) activation function + :param trainable: (bool) set training and evaluation mode + :param name: (str) name the model + """ + assert isinstance(action_space, spaces.Box) + self._state_space, self._action_space = state_space, action_space + self._action_shape = self._action_space.shape + assert len(self._action_shape) == 1 + act_inputs = Input((None,) + self._action_shape, name='Act_Input_Layer') + + # create state input layer + obs_inputs, current_layer, self._state_shape = CreateInputLayer(state_space) + + # concat multi-head state + if isinstance(state_space, spaces.Dict): + assert isinstance(obs_inputs, dict) + assert isinstance(current_layer, dict) + self.input_dict = obs_inputs + obs_inputs = list(obs_inputs.values()) + current_layer = tl.layers.Concat(-1)(list(current_layer.values())) + + # calculate value + current_layer = BatchNorm()(current_layer) + with tf.name_scope('NAF_VALUE_MLP'): + for i, dim in enumerate(hidden_dim_list): + current_layer = Dense(n_units=dim, act=activation, W_init=w_init, + name='mlp_hidden_layer%d' % (i + 1))(current_layer) + value = Dense(n_units=1, W_init=w_init, name='naf_value_mlp_output')(current_layer) + + # calculate advantange and Q-value + dim = self._action_shape[0] + with tf.name_scope('NAF_ADVANTAGE'): + mu = Dense(n_units=dim, act=activation, W_init=w_init, name='mu')(current_layer) + L = Dense(n_units=int((dim * (dim + 1)) / 2), W_init=w_init, name='L')(current_layer) + qvalue = NAFLayer(dim)([L, act_inputs, mu, value]) + + super().__init__(inputs=[obs_inputs, act_inputs], outputs=qvalue, name=name) + if trainable: + self.train() + else: + self.eval() + + def __call__(self, inputs, *args, **kwargs): + states, actions = inputs + + # states and actions must have the same length + if len(states) != len(actions): + raise ValueError( + 'Length of states and actions not match. States length is {} but actions length is {}'.format( + len(states), + len(actions))) + + if isinstance(self._state_space, spaces.Dict): + states = np.array(states).transpose([1, 0]).tolist() # batch states to multi-head + ssv = list(self._state_shape.values()) + # check state shape + for i, each_head in enumerate(states): + if np.shape(each_head)[1:] != ssv[i]: + raise ValueError('Input state shape error.') + + else: + if np.shape(states)[1:] != self.state_shape: + raise ValueError( + 'Input state shape error. Shape can be {} but your shape is {}'.format((None,) + self.state_shape, + np.shape(states))) + states = np.array(states, dtype=np.float32) + + actions = tf.convert_to_tensor(actions, dtype=tf.float32) + if isinstance(self._state_space, spaces.Dict): + return super().__call__(states + [actions], *args, **kwargs) + else: + return super().__call__([states, actions], *args, **kwargs) + + @property + def state_space(self): + return copy.deepcopy(self._state_space) + + @property + def action_space(self): + return copy.deepcopy(self._action_space) + + @property + def state_shape(self): + return copy.deepcopy(self._state_shape) + + @property + def action_shape(self): + return copy.deepcopy(self._action_shape) diff --git a/rlzoo/distributed/__init__.py b/rlzoo/distributed/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/rlzoo/distributed/dis_components.py b/rlzoo/distributed/dis_components.py new file mode 100755 index 0000000..d8ee1dd --- /dev/null +++ b/rlzoo/distributed/dis_components.py @@ -0,0 +1,128 @@ +import enum + +import tensorflow as tf +from kungfu.python import current_cluster_size, current_rank +from kungfu.tensorflow.ops import (barrier, request_variable, + request_variable_with_template, + save_variable, subset_all_reduce) +from kungfu.tensorflow.ops.queue import new_queue + + +class Role(enum.Enum): + Learner = 1 + Actor = 2 + Server = 3 + + +def show_role_name(role): + return { + Role.Learner: 'learner', + Role.Actor: 'actor', + Role.Server: 'server', + }[role] + + +def _interval(n, offset=0): + return list(range(offset, offset + n)) + + +class Agent: + def __init__(self, n_learners=1, n_actors=1, n_servers=1): + rank = current_rank() + size = current_cluster_size() + if n_learners + n_actors + n_servers != size: + raise RuntimeError('invalid cluster size') + self._n_learners = n_learners + self._n_actors = n_actors + self._n_servers = n_servers + self._global_rank = rank + self._global_size = size + roles = [Role.Learner] * n_learners + [Role.Actor] * n_actors + [Role.Server] * n_servers + rank2role = dict(enumerate(roles)) + self._role = rank2role[rank] + self._roles = { + Role.Learner: _interval(n_learners), + Role.Actor: _interval(n_actors, n_learners), + Role.Server: _interval(n_servers, n_learners + n_actors), + } + self._role_sizes = { + Role.Learner: n_learners, + Role.Actor: n_actors, + Role.Server: n_servers, + } + self._role_offsets = { + Role.Learner: 0, + Role.Actor: n_learners, + Role.Server: n_learners + n_actors, + } + self._role_rank = self._global_rank - self._role_offsets[self._role] + self._role_size = self._role_sizes[self._role] + + def _to_global_rank(self, role, role_rank): + return int(self._role_offsets[role] + int(role_rank)) + + # metadata APIs + def role(self): + return self._role + + def role_rank(self): + return self._role_rank + + def role_size(self, role=None): + if role is None: + return self._role_size + else: + return self._role_sizes[role] + + # collective APIs + def barrier(self): + return barrier() + + def role_all_reduce(self, x): + role_ranks = self._roles[self._role] + topology = [i for i in range(self._global_size)] + for i in role_ranks: + topology[i] = role_ranks[0] + # TODO: generate subset topology + return subset_all_reduce(x, topology) + + # p2p APIs + def save(self, x, name=None): + return save_variable(x, name=name) + + def request(self, role: Role, role_rank, name, shape, dtype): + role_size = self._role_sizes[role] + assert (0 <= role_rank and role_rank < role_size) + target = self._to_global_rank(role, role_rank) + return request_variable( + target, + name=name, + shape=shape, + dtype=dtype, + ) + + def new_queue(self, src, dst): + """create a uni-direction queue.""" + role1, rank1 = src + role2, rank2 = dst + srcRank = self._to_global_rank(role1, rank1) + dstRank = self._to_global_rank(role2, rank2) + return new_queue(srcRank, dstRank) + + def new_queue_pair(self, a, b): + """create a pair of queues.""" + q1 = self.new_queue(a, b) + q2 = self.new_queue(b, a) + return q1, q2 + + +class LearnerExample: + pass + + +class ActorExample: + pass + + +class ServerExample: + pass diff --git a/rlzoo/distributed/run_dis_train.sh b/rlzoo/distributed/run_dis_train.sh new file mode 100755 index 0000000..bfab290 --- /dev/null +++ b/rlzoo/distributed/run_dis_train.sh @@ -0,0 +1,46 @@ +#!/bin/sh +set -e + +cd $(dirname $0) + +kungfu_flags() { + echo -q + echo -logdir logs + + local ip1=127.0.0.1 + local np1=$np + + local ip2=127.0.0.10 + local np2=$np + local H=$ip1:$np1,$ip2:$np2 + local m=cpu,gpu + + echo -H $ip1:$np1 +} + +prun() { + local np=$1 + shift + kungfu-run $(kungfu_flags) -np $np $@ +} + +n_learner=2 +n_actor=2 +n_server=1 + +flags() { + echo -l $n_learner + echo -a $n_actor + echo -s $n_server +} + +rl_run() { + local n=$((n_learner + n_actor + n_server)) + prun $n python3 training_components.py $(flags) +} + +main() { + rl_run +} + +main diff --git a/rlzoo/distributed/start_dis_role.py b/rlzoo/distributed/start_dis_role.py new file mode 100755 index 0000000..22c03bb --- /dev/null +++ b/rlzoo/distributed/start_dis_role.py @@ -0,0 +1,206 @@ +import argparse + +from rlzoo.distributed.dis_components import * +import tensorflow as tf +import numpy as np + + +def parse_args(): + p = argparse.ArgumentParser() + p.add_argument('-l', type=int, default=1) + p.add_argument('-a', type=int, default=1) + p.add_argument('-s', type=int, default=1) + p.add_argument('-f', type=str, default='') # config.json + + args = p.parse_args() + return args + + +def run_learner(agent, args, training_conf, env_conf, agent_conf): + agent_generator = agent_conf['agent_generator'] + total_step, traj_len, train_n_traj = training_conf['total_step'], training_conf['traj_len'], training_conf['train_n_traj'], + obs_shape, act_shape = env_conf['obs_shape'], env_conf['act_shape'] + + if agent.role_rank() == 0: + param_q = agent.new_queue((Role.Learner, 0), (Role.Server, 0)) + + traj_q = agent.new_queue((Role.Server, 0), (Role.Learner, agent.role_rank())) + + rl_agent = agent_generator() + rl_agent.init_components() + + # init model + rl_agent.update_model([agent.role_all_reduce(weights) for weights in rl_agent.all_weights]) + + if agent.role_rank() == 0: + for weight in rl_agent.all_weights: + param_q.put(tf.Variable(weight, dtype=tf.float32)) + + n_update = total_step // (traj_len * agent.role_size(Role.Learner) * train_n_traj) + for i in range(n_update): + traj_list = [[traj_q.get(dtype=tf.float32, shape=(traj_len, *shape)) for shape in [ + obs_shape, act_shape, (), (), obs_shape, (), (1,)]] for _ in range(train_n_traj)] + + rl_agent.train(traj_list, dis_agent=agent) + + # send weights to server + if agent.role_rank() == 0: + for weight in rl_agent.all_weights: + param_q.put(tf.Variable(weight, dtype=tf.float32)) + print('learner finished') + + +def run_actor(agent, args, training_conf, env_conf): # sampler + env_maker, total_step = env_conf['env_maker'], training_conf['total_step'] + + from gym import spaces + + env = env_maker() + action_q, step_data_q = agent.new_queue_pair((Role.Server, 0), (Role.Actor, agent.role_rank())) + + state, reward, done = env.reset(), 0, False + each_total_step = int(total_step/agent.role_size(Role.Actor)) + action_dtype = tf.int32 if isinstance(env.action_space, spaces.Discrete) else tf.float32 + for i in range(each_total_step): + step_data_q.put(tf.Variable(state, dtype=tf.float32)) + a = action_q.get(dtype=action_dtype, shape=env.action_space.shape).numpy() + next_state, reward, done, _ = env.step(a) + for data in (reward, done, next_state): + step_data_q.put(tf.Variable(data, dtype=tf.float32)) + if done: + state = env.reset() + else: + state = next_state + print('actor finished') + + +def run_server(agent, args, training_conf, env_conf, agent_conf): + total_step, traj_len, train_n_traj, save_interval = training_conf['total_step'], training_conf['traj_len'], \ + training_conf['train_n_traj'], training_conf['save_interval'], + obs_shape, env_name = env_conf['obs_shape'], env_conf['env_name'] + agent_generator = agent_conf['agent_generator'] + + from rlzoo.algorithms.dppo_clip_distributed.dppo_clip import DPPO_CLIP + from rlzoo.distributed.dis_components import Role + from gym import spaces + + learner_size = agent.role_size(Role.Learner) + rl_agent: DPPO_CLIP = agent_generator() + rl_agent.init_components() + + # queue to actor + q_list = [agent.new_queue_pair((Role.Server, 0), (Role.Actor, i)) for i in + range(agent.role_size(Role.Actor))] + action_q_list, step_data_q_list = zip(*q_list) + + # queue to learner + param_q = agent.new_queue((Role.Learner, 0), (Role.Server, 0)) + traj_q_list = [agent.new_queue((Role.Server, 0), (Role.Learner, i)) for i in + range(agent.role_size(Role.Learner))] + + # syn net weights from learner + all_weights = [param_q.get(dtype=weight.dtype, shape=weight.shape) for weight in rl_agent.all_weights] + rl_agent.update_model(all_weights) + + train_cnt = 0 + action_dtype = tf.int32 if isinstance(rl_agent.actor.action_space, spaces.Discrete) else tf.float32 + + curr_step = 0 + + total_reward_list = [] + curr_reward_list = [] + tmp_eps_reward = 0 + while curr_step < total_step: + tmp_eps_reward = 0 # todo env with no end + for _ in range(traj_len): + curr_step += agent.role_size(Role.Actor) + + state_list = [] + for step_data_q in step_data_q_list: + state_list.append(step_data_q.get(dtype=tf.float32, shape=obs_shape)) + + action_list, log_p_list = rl_agent.get_action(state_list, batch_data=True) + + for action_q, action in zip(action_q_list, action_list): + action_q.put(tf.Variable(action, dtype=action_dtype)) + reward_list, done_list, next_state_list = [], [], [], + for i, step_data_q in enumerate(step_data_q_list): + reward = step_data_q.get(dtype=tf.float32, shape=()) + if i == 0: + tmp_eps_reward += reward + reward_list.append(reward) + done = step_data_q.get(dtype=tf.float32, shape=()) + if i == 0 and done: + curr_reward_list.append(tmp_eps_reward) + tmp_eps_reward = 0 + done_list.append(done) + next_state_list.append(step_data_q.get(dtype=tf.float32, shape=obs_shape)) + rl_agent.collect_data(state_list, action_list, reward_list, done_list, next_state_list, log_p_list, True) + + rl_agent.update_traj_list() + + # send traj to each learner and update weight + learn_traj_len = learner_size * train_n_traj + if len(rl_agent.traj_list) >= learn_traj_len: + train_cnt += 1 + + # todo env with end + # avg_eps_reward = None + # if curr_reward_list: + # avg_eps_reward = np.mean(curr_reward_list) + # curr_reward_list.clear() + # total_reward_list.append(avg_eps_reward) + + # todo env with no end + avg_eps_reward = tmp_eps_reward + total_reward_list.append(np.array(avg_eps_reward)) + + print('Training iters: {}, steps so far: {}, average eps reward: {}'.format( + train_cnt, curr_step, np.array(avg_eps_reward))) + + rl_agent.plot_save_log(total_reward_list, env_name) + + traj_iter = iter(rl_agent.traj_list[:learn_traj_len]) + rl_agent.traj_list = rl_agent.traj_list[learn_traj_len:] + + # send traj data to each learner + for i, traj_q in enumerate(traj_q_list): + for _ in range(train_n_traj): + try: + traj_data = next(traj_iter) + except StopIteration: + break + for data in traj_data: + traj_q.put(tf.Variable(data, dtype=tf.float32)) + + # syn net weights from learner + all_weights = [param_q.get(dtype=weight.dtype, shape=weight.shape) for weight in rl_agent.all_weights] + rl_agent.update_model(all_weights) + + # save model + if not train_cnt % save_interval: + rl_agent.save_ckpt(env_name) + + # save the final model + rl_agent.save_ckpt(env_name) + print('server finished') + + +def main(training_conf, env_conf, agent_conf): + args = parse_args() + agent = Agent(n_learners=args.l, n_actors=args.a, n_servers=args.s) + + print('%s : %d/%d' % (agent.role(), agent.role_rank(), agent.role_size())) + + agent.barrier() + + if agent.role() == Role.Learner: + run_learner(agent, args, training_conf, env_conf, agent_conf) + elif agent.role() == Role.Actor: + run_actor(agent, args, training_conf, env_conf) + elif agent.role() == Role.Server: + run_server(agent, args, training_conf, env_conf, agent_conf) + else: + raise RuntimeError('invalid role') + + agent.barrier() diff --git a/rlzoo/distributed/training_components.py b/rlzoo/distributed/training_components.py new file mode 100755 index 0000000..3b5c447 --- /dev/null +++ b/rlzoo/distributed/training_components.py @@ -0,0 +1,62 @@ +from rlzoo.common.env_wrappers import build_env +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +from rlzoo.algorithms.dppo_clip_distributed.dppo_clip import DPPO_CLIP +from functools import partial + +# constants +training_conf = { + 'total_step': int(1e7), + 'traj_len': 200, + 'train_n_traj': 2, + 'save_interval': 10, +} + +# launch env settings +env_name, env_type = 'CartPole-v0', 'classic_control' +env_maker = partial(build_env, env_name, env_type) +temp_env = env_maker() +obs_shape, act_shape = temp_env.observation_space.shape, temp_env.action_space.shape + +env_conf = { + 'env_name': env_name, + 'env_type': env_type, + 'env_maker': env_maker, + 'obs_shape': obs_shape, + 'act_shape': act_shape, +} + + +def build_network(observation_space, action_space, name='DPPO_CLIP'): + """ build networks for the algorithm """ + hidden_dim = 256 + num_hidden_layer = 2 + critic = ValueNetwork(observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') + + actor = StochasticPolicyNetwork(observation_space, action_space, + [hidden_dim] * num_hidden_layer, + trainable=True, + name=name + '_policy') + return critic, actor + + +def build_opt(actor_lr=1e-4, critic_lr=2e-4): + import tensorflow as tf + return [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + + +net_builder = partial(build_network, temp_env.observation_space, temp_env.action_space) +opt_builder = partial(build_opt, ) + +agent_conf = { + 'net_builder': net_builder, + 'opt_builder': opt_builder, + 'agent_generator': partial(DPPO_CLIP, net_builder, opt_builder), +} +del temp_env + +from rlzoo.distributed.start_dis_role import main + +print('start training') +main(training_conf, env_conf, agent_conf) +print('finished') diff --git a/rlzoo/interactive/.gitignore b/rlzoo/interactive/.gitignore old mode 100644 new mode 100755 index bbb7590..bd325b2 --- a/rlzoo/interactive/.gitignore +++ b/rlzoo/interactive/.gitignore @@ -1,3 +1,3 @@ -img/ -log/ -model/ +img/ +log/ +model/ diff --git a/rlzoo/interactive/common.py b/rlzoo/interactive/common.py old mode 100644 new mode 100755 index be2d625..a4e0257 --- a/rlzoo/interactive/common.py +++ b/rlzoo/interactive/common.py @@ -1,124 +1,124 @@ -import decimal - -import ipywidgets as widgets -import numpy as np - -border_list = [None, 'hidden', 'dotted', 'dashed', 'solid', 'double', - 'groove', 'ridge', 'inset', 'outset', 'inherit'] - - -class NumInput(widgets.HBox): - - def __init__(self, init_value, step=None, range_min=None, range_max=None): - self.range = [range_min, range_max] - range_min = 0 if range_min is None else range_min - range_max = init_value * 2 if range_max is None else range_max - self.range_size = max([range_max - init_value, init_value - range_min]) - if step is None: - fs = decimal.Decimal(str(init_value)).as_tuple().exponent - self.decimals = -fs - step = np.round(np.power(0.1, self.decimals), self.decimals) - else: - fs = decimal.Decimal(str(step)).as_tuple().exponent - fv = decimal.Decimal(str(init_value)).as_tuple().exponent - self.decimals = -min(fs, fv) - - self.step = step - - self.slider = widgets.FloatSlider( - value=init_value, - min=range_min, - max=range_max, - step=step, - description='Slider input:', - disabled=False, - continuous_update=False, - orientation='horizontal', - readout=True, - readout_format='.' + str(self.decimals) + 'f' - ) - - self.text = widgets.FloatText( - value=self.slider.value, - description='Manual input:', - disabled=False - ) - - def __extend_max(change): - num_new = np.around(change['new'], decimals=self.decimals) - num_old = change['old'] - if num_new > num_old: - if num_new - num_old > (self.slider.max - num_old) / 2: - self.range_size *= 2 - else: - self.range_size *= 0.5 - else: - if num_old - num_new > (num_old - self.slider.min) / 2: - self.range_size *= 2 - else: - self.range_size *= 0.5 - - if self.range_size < self.step * 10: - self.range_size = self.step * 10 - - self.slider.min = num_new - self.range_size if self.range[0] is None else self.range[0] - self.slider.max = num_new + self.range_size if self.range[1] is None else self.range[1] - self.slider.value = num_new - self.text.value = num_new - - self.slider.observe(__extend_max, names='value') - self.text.observe(__extend_max, names='value') - box_layout = widgets.Layout(display='flex', - align_items='stretch', - justify_content='center', ) - # self.frame = widgets.HBox([self.slider, self.text], layout=box_layout) - super().__init__([self.slider, self.text], layout=box_layout) - self._int_type = False - if (isinstance(init_value, int) or isinstance(init_value, np.int16) \ - or isinstance(init_value, np.int32) or isinstance(init_value, np.int64)) \ - and step % 1 == 0: - self._int_type = True - - @property - def value(self): - result = self.slider.value - if self._int_type: - result = int(result) - return result - - -class Border: - def __init__(self, element_list, description=None, size=5, style=0): - if not isinstance(element_list, list): - element_list = [element_list] - - box_layout = widgets.Layout(display='flex', - flex_flow='column', - align_items='flex-start', - align_content='flex-start', - # justify_content='center', - justify_content='space-around', - border=border_list[2] - ) - frame = widgets.Box(children=element_list, layout=box_layout) - - if description is not None: - caption = widgets.HTML(value=""+description+"") - children = [caption, frame] - else: - children = [frame] - - box_layout = widgets.Layout(display='flex', - flex_flow='column', - align_items='center', - justify_content='center', - border=border_list[style], ) - self.frame = widgets.Box(children=children, layout=box_layout) - - -class InfoDisplay: - def __init__(self, description, detail): - label = widgets.Label(description) - self.data = widgets.Label(detail) - self.frame = widgets.HBox([label, self.data], layout=widgets.Layout(justify_content='flex-start', )) -# border=border_list[2])) +import decimal + +import ipywidgets as widgets +import numpy as np + +border_list = [None, 'hidden', 'dotted', 'dashed', 'solid', 'double', + 'groove', 'ridge', 'inset', 'outset', 'inherit'] + + +class NumInput(widgets.HBox): + + def __init__(self, init_value, step=None, range_min=None, range_max=None): + self.range = [range_min, range_max] + range_min = 0 if range_min is None else range_min + range_max = init_value * 2 if range_max is None else range_max + self.range_size = max([range_max - init_value, init_value - range_min]) + if step is None: + fs = decimal.Decimal(str(init_value)).as_tuple().exponent + self.decimals = -fs + step = np.round(np.power(0.1, self.decimals), self.decimals) + else: + fs = decimal.Decimal(str(step)).as_tuple().exponent + fv = decimal.Decimal(str(init_value)).as_tuple().exponent + self.decimals = -min(fs, fv) + + self.step = step + + self.slider = widgets.FloatSlider( + value=init_value, + min=range_min, + max=range_max, + step=step, + description='Slider input:', + disabled=False, + continuous_update=False, + orientation='horizontal', + readout=True, + readout_format='.' + str(self.decimals) + 'f' + ) + + self.text = widgets.FloatText( + value=self.slider.value, + description='Manual input:', + disabled=False + ) + + def __extend_max(change): + num_new = np.around(change['new'], decimals=self.decimals) + num_old = change['old'] + if num_new > num_old: + if num_new - num_old > (self.slider.max - num_old) / 2: + self.range_size *= 2 + else: + self.range_size *= 0.5 + else: + if num_old - num_new > (num_old - self.slider.min) / 2: + self.range_size *= 2 + else: + self.range_size *= 0.5 + + if self.range_size < self.step * 10: + self.range_size = self.step * 10 + + self.slider.min = num_new - self.range_size if self.range[0] is None else self.range[0] + self.slider.max = num_new + self.range_size if self.range[1] is None else self.range[1] + self.slider.value = num_new + self.text.value = num_new + + self.slider.observe(__extend_max, names='value') + self.text.observe(__extend_max, names='value') + box_layout = widgets.Layout(display='flex', + align_items='stretch', + justify_content='center', ) + # self.frame = widgets.HBox([self.slider, self.text], layout=box_layout) + super().__init__([self.slider, self.text], layout=box_layout) + self._int_type = False + if (isinstance(init_value, int) or isinstance(init_value, np.int16) \ + or isinstance(init_value, np.int32) or isinstance(init_value, np.int64)) \ + and step % 1 == 0: + self._int_type = True + + @property + def value(self): + result = self.slider.value + if self._int_type: + result = int(result) + return result + + +class Border: + def __init__(self, element_list, description=None, size=5, style=0): + if not isinstance(element_list, list): + element_list = [element_list] + + box_layout = widgets.Layout(display='flex', + flex_flow='column', + align_items='flex-start', + align_content='flex-start', + # justify_content='center', + justify_content='space-around', + border=border_list[2] + ) + frame = widgets.Box(children=element_list, layout=box_layout) + + if description is not None: + caption = widgets.HTML(value=""+description+"") + children = [caption, frame] + else: + children = [frame] + + box_layout = widgets.Layout(display='flex', + flex_flow='column', + align_items='center', + justify_content='center', + border=border_list[style], ) + self.frame = widgets.Box(children=children, layout=box_layout) + + +class InfoDisplay: + def __init__(self, description, detail): + label = widgets.Label(description) + self.data = widgets.Label(detail) + self.frame = widgets.HBox([label, self.data], layout=widgets.Layout(justify_content='flex-start', )) +# border=border_list[2])) diff --git a/rlzoo/interactive/components.py b/rlzoo/interactive/components.py old mode 100644 new mode 100755 index d79993c..405609e --- a/rlzoo/interactive/components.py +++ b/rlzoo/interactive/components.py @@ -1,463 +1,463 @@ -from __future__ import print_function -import copy -from collections import OrderedDict - -from ipywidgets import Layout -from ipywidgets import GridspecLayout - -from IPython.display import clear_output -from IPython.core.interactiveshell import InteractiveShell -from gym import spaces - -from rlzoo.common.env_list import all_env_list -from rlzoo.common.utils import * -from rlzoo.interactive.common import * - -all_env_list = OrderedDict(sorted(all_env_list.items())) - - -class EnvironmentSelector(widgets.VBox): - def __init__(self): - env_list = all_env_list - # al = list(env_list.keys()) - al = ['atari', 'classic_control', 'box2d', 'mujoco', 'robotics', 'dm_control', 'rlbench'] - description = 'Environment Selector' - caption = widgets.HTML(value="" + description + "") - - text_0 = widgets.Label("Choose your environment") - - self.env_type = widgets.Dropdown( - options=al, - value=al[0], - description='env type:', - disabled=False, - ) - - self.env_name = widgets.Dropdown( - options=env_list[al[0]], - value=env_list[al[0]][0], - description='env name:', - disabled=False, - ) - env_select_box = widgets.VBox([text_0, self.env_type, self.env_name]) - - text_1 = widgets.Label(value="Environment settings") - - self.env_num = widgets.IntText( - value=1, - description='multi envs:', - disabled=False, - min=1, - # layout=Layout(width='150px') - ) - - self.env_state = widgets.Dropdown( - options=['default'], - value='default', - description='state type:', - disabled=False, - ) - - # self.create_button = widgets.Button( - # description='Create!', - # disabled=False, - # tooltip='Create', - # icon='check' - # ) - - # multi_box = widgets.HBox([self.env_multi, self.env_num], layout=Layout(justify_content='flex-start')) - env_setting_box = widgets.VBox([text_1, self.env_num, self.env_state]) - - select_box = widgets.HBox([env_select_box, env_setting_box], - layout=Layout(justify_content='Center')) - - # self.frame = widgets.VBox([select_box, widgets.Box([self.create_button], - # layout=Layout(justify_content='Center'))]) - # self.frame = widgets.AppLayout(left_sidebar=select_box, center=info_border.frame) - - def env_type_change(change): - d = env_list[self.env_type.value] - self.env_name.options = d - self.env_name.value = d[0] - if self.env_type.value == 'rlbench': - self.env_state.options = ['state', 'vision'] - self.env_state.value = 'state' - self.env_num.value = 1 - self.env_num.disabled = True - else: - self.env_state.options = ['default'] - self.env_state.value = 'default' - self.env_num.disabled = False - - # def create_env(c): # todo the program will be blocked if rlbench env is created here - # if self.env_type.value == 'rlbench': - # print(self.env_name.value, self.env_type.value, self.env_num.value, self.env_state.value) - # self._env = build_env(self.env_name.value, self.env_type.value, - # nenv=self.env_num.value, state_type=self.env_state.value) - # self._env = build_env(self.env_name.value, self.env_type.value, nenv=self.env_num.value) - # print('Environment created successfully!') - - def change_nenv(c): - if self.env_num.value < 1: - self.env_num.value = 1 - - self.env_num.observe(change_nenv, names='value') - self.env_type.observe(env_type_change, names='value') - - # self.create_button.on_click(create_env) - - super().__init__([caption, select_box], layout=widgets.Layout(align_items='center', )) - - @property - def value(self): - return {'env_id': self.env_name.value, - 'env_type': self.env_type.value, - 'nenv': self.env_num.value, - 'state_type': self.env_state.value} - - -# @property -# def env(self): -# return self._env - -class SpaceInfoViewer(widgets.Box): - def __init__(self, sp): - assert isinstance(sp, spaces.Space) - if isinstance(sp, spaces.Dict): - it = list(sp.spaces.items()) - info = GridspecLayout(len(it), 2) - for i, v in enumerate(it): - info[i, 0], info[i, 1] = widgets.Label(v[0]), widgets.Label(str(v[1])) - else: - info = widgets.Label(str(sp)) - super().__init__([info]) - - -class EnvInfoViewer(widgets.VBox): - def __init__(self, env): - if isinstance(env, list): - env = env[0] - env_obs = SpaceInfoViewer(env.observation_space) - env_act = SpaceInfoViewer(env.action_space) - tips = None - if isinstance(env.action_space, gym.spaces.Discrete): - tips = 'The action space is discrete.' - elif isinstance(env.action_space, gym.spaces.Box): - tips = 'The action space is continuous.' - - description = 'Environment Information' - caption = widgets.HTML(value="" + description + "") - - a00, a01 = widgets.Label('Environment name:'), widgets.Label(env.spec.id) - a10, a11 = widgets.Label('Observation space:'), env_obs - a20, a21 = widgets.Label('Action space:'), env_act - - if tips is None: - # use GirdBox instead of GridspecLayout to ensure each row has a different height - info = widgets.GridBox(children=[a00, a01, a10, a11, a20, a21], - layout=Layout(grid_template_areas=""" - "a00 a01" - "a10 a11" - "a20 a21" - """)) - else: - t0 = widgets.Label('Tips:') - t1 = widgets.Label(tips) - info = widgets.GridBox(children=[a00, a01, a10, a11, a20, a21, t0, t1], - layout=Layout(grid_template_areas=""" - "a00 a01" - "a10 a11" - "a20 a21" - "t0 t1" - """)) - - super().__init__([caption, info], layout=widgets.Layout(align_items='center', )) - - -all_alg_list = ['A3C', 'AC', 'DDPG', 'DPPO', 'DQN', 'PG', 'PPO', 'SAC', 'TD3', 'TRPO'] -all_alg_dict = {'discrete_action_space': ['AC', 'DQN', 'PG', 'PPO', 'TRPO'], - 'continuous_action_space': ['AC', 'DDPG', 'PG', 'PPO', 'SAC', 'TD3', 'TRPO'], - 'multi_env': ['A3C', 'DPPO'] - } - - -class AlgorithmSelector(widgets.VBox): - def __init__(self, env): - description = 'Algorithm Selector' - caption = widgets.HTML(value="" + description + "") - info = 'Supported algorithms are shown below' - if isinstance(env, list): - # info = 'Distributed algorithms are shown below' - table = all_alg_dict['multi_env'] - self.env_id = env[0].spec.id - elif isinstance(env.action_space, gym.spaces.Discrete): - # info = 'Algorithms which support discrete action space are shown below' - table = all_alg_dict['discrete_action_space'] - self.env_id = env.spec.id - elif isinstance(env.action_space, gym.spaces.Box): - # info = 'Algorithms which support continuous action space are shown below' - table = all_alg_dict['continuous_action_space'] - self.env_id = env.spec.id - else: - raise ValueError('Unsupported environment') - - self.algo_name = widgets.Dropdown( - options=table, - value=table[0], - description='Algorithms:', - disabled=False, - ) - - super().__init__([caption, widgets.Label(info), self.algo_name], - layout=widgets.Layout(align_items='center', )) - - @property - def value(self): - return self.algo_name.value - - -def TransInput(value): - if isinstance(value, bool): - return widgets.Checkbox(value=value, description='', disabled=False, indent=False) - elif isinstance(value, int) or isinstance(value, float) \ - or isinstance(value, np.int16) or isinstance(value, np.float16) \ - or isinstance(value, np.int32) or isinstance(value, np.float32) \ - or isinstance(value, np.int64) or isinstance(value, np.float64) \ - or isinstance(value, np.float128): - return NumInput(value) - else: - return widgets.Label(value) - - -class AlgoInfoViewer(widgets.VBox): - def __init__(self, alg_selector, org_alg_params, org_learn_params): - alg_params, learn_params = copy.deepcopy(org_alg_params), copy.deepcopy(org_learn_params) - - # ---------------- alg_params --------------- # - description = 'Algorithm Parameters' - alg_caption = widgets.HTML(value="" + description + "") - net_label = widgets.Label('Network information:') - show_net = lambda net: widgets.VBox([widgets.Label(str(layer)) for layer in net.all_layers]) - - n = np.ndim(alg_params['net_list']) - if n == 1: - model_net = alg_params['net_list'] - elif n == 2: - model_net = alg_params['net_list'][0] - - net_info = widgets.VBox([widgets.VBox([widgets.Label(str(net.__class__.__name__)), - show_net(net), ], - layout=widgets.Layout(border=border_list[2], - align_items='center', - align_content='center' - ) - ) for net in model_net]) - self._net_list = alg_params['net_list'] - del alg_params['net_list'] - - opt_label = widgets.Label('Optimizer information:') - - def show_params(params): - params = copy.deepcopy(params) - n = len(params) - frame = widgets.GridspecLayout(n, 2, layout=widgets.Layout(border=border_list[2], )) - show_info = lambda k: [widgets.Label(str(k)), widgets.Label(str(params[k]))] - frame[0, 0], frame[0, 1] = show_info('name') - frame[1, 0], frame[1, 1] = show_info('learning_rate') - del params['name'] - del params['learning_rate'] - for i, k in enumerate(sorted(params.keys())): - if k != 'name' and k != 'learning_rate': - frame[2 + i, 0], frame[2 + i, 1] = show_info(k) - return frame - - opt_info = widgets.VBox([show_params(n.get_config()) for n in alg_params['optimizers_list']]) - self._optimizers_list = alg_params['optimizers_list'] - del alg_params['optimizers_list'] - - stu_frame = widgets.GridBox(children=[net_label, net_info, opt_label, opt_info], - layout=Layout(grid_template_areas=""" - "net_label net_info" - "opt_label opt_info" - """)) - - alg_sel_dict = dict() - sk = sorted(alg_params.keys()) - n = len(sk) + 1 - alg_param_sel = widgets.GridspecLayout(n, 2) - b = 0 - if 'method' in sk: - module = widgets.RadioButtons(options=['penalty', 'clip'], disabled=False) - alg_param_sel[0, 0], alg_param_sel[0, 1] = widgets.Label('method'), module - alg_sel_dict['method'] = module - sk.remove('method') - b += 1 - - for i, k in enumerate(sk): - module = TransInput(alg_params[k]) - alg_sel_dict[k] = module - if k == 'dueling': - module.disabled = True - alg_param_sel[i + b, 0], alg_param_sel[i + b, 1] = widgets.Label(k), module - - alg_param_box = widgets.VBox([alg_caption, stu_frame, alg_param_sel], ) - name = alg_selector.value + '-' + alg_selector.env_id - path = os.path.join('.', 'model', name) - alg_param_sel[n - 1, 0] = widgets.Label('model save path') - alg_param_sel[n - 1, 1] = widgets.Label(path) - - self.alg_sel_dict = alg_sel_dict - # ================== alg_params ================= # - - # ----------------- learn_params ---------------- # - description = 'Learn Parameters' - learn_caption = widgets.HTML(value="" + description + "") - - learn_sel_dict = dict() - sk = sorted(learn_params.keys()) - - n = len(sk) - if 'mode' not in sk: n += 1 - if 'render' not in sk: n += 1 - learn_param_sel = widgets.GridspecLayout(n, 2) - - module = widgets.RadioButtons(options=['train', 'test'], disabled=False) - learn_param_sel[0, 0], learn_param_sel[0, 1] = widgets.Label('mode'), module - learn_sel_dict['mode'] = module - try: - sk.remove('mode') - except: - pass - - module = widgets.Checkbox(value=False, description='', disabled=False, indent=False) - learn_param_sel[1, 0], learn_param_sel[1, 1] = widgets.Label('render'), module - learn_sel_dict['render'] = module - try: - sk.remove('render') - except: - pass - - for i, k in enumerate(sk): - module = TransInput(learn_params[k]) - learn_sel_dict[k] = module - learn_param_sel[i + 2, 0], learn_param_sel[i + 2, 1] = widgets.Label(k), module - learn_param_box = widgets.VBox([learn_caption, learn_param_sel], - # layout=Layout(align_items='center',) - ) - self.learn_sel_dict = learn_sel_dict - # ================= learn_params ================ # - - b = widgets.Output(layout=widgets.Layout(border='solid')) - - self.smooth_factor_slider = widgets.FloatSlider( - value=0.8, - min=0, - max=1, - step=0.01, - description='learning curve smooth factor', - disabled=False, - continuous_update=False, - orientation='horizontal', - readout=True, - readout_format='.2f', - style={'description_width': 'initial'}, - ) - super().__init__([alg_param_box, b, learn_param_box, b, self.smooth_factor_slider]) - - @property - def alg_params(self): - result = {'net_list': self._net_list, 'optimizers_list': self._optimizers_list} - for k in self.alg_sel_dict.keys(): - result[k] = self.alg_sel_dict[k].value - return result - - @property - def smooth_factor(self): - return self.smooth_factor_slider.value - - @property - def learn_params(self): - result = dict() - for k in self.learn_sel_dict.keys(): - result[k] = self.learn_sel_dict[k].value - return result - - -class RevOutput(widgets.Output): - def _append_stream_output(self, text, stream_name): - """Append a stream output.""" - self.outputs = ( - {'output_type': 'stream', 'name': stream_name, 'text': text}, - ) + self.outputs - - def append_display_data(self, display_object): - """Append a display object as an output. - - Parameters - ---------- - display_object : IPython.core.display.DisplayObject - The object to display (e.g., an instance of - `IPython.display.Markdown` or `IPython.display.Image`). - """ - fmt = InteractiveShell.instance().display_formatter.format - data, metadata = fmt(display_object) - self.outputs = ( - { - 'output_type': 'display_data', - 'data': data, - 'metadata': metadata - }, - ) + self.outputs - - -class OutputMonitor(widgets.HBox): - def __init__(self, learn_params, smooth_factor): - max_num = learn_params['train_episodes'] if learn_params['mode'] == 'train' else learn_params['test_episodes'] - self.progress = widgets.FloatProgress(value=0.0, min=0.0, max=max_num, description='Progress') - - self.plot_out = widgets.Output(layout=widgets.Layout(width='350px', - height='250px', )) - self.smooth_factor = smooth_factor - # self.smooth_factor = widgets.FloatSlider( - # value=self.sf, - # min=0, - # max=1, - # step=0.01, - # description='smooth factor', - # disabled=False, - # continuous_update=False, - # orientation='horizontal', - # readout=True, - # readout_format='.2f' - # ) - - # def link(c): - # self.sf = self.smooth_factor.value - - # self.smooth_factor.observe(link, 'value') - # plot_out = widgets.VBox([widgets.Label('Learning curve'), self.plot_out, self.smooth_factor]) - plot_out = widgets.VBox([widgets.Label('Learning curve'), self.plot_out]) - - self.print_out = RevOutput(layout=widgets.Layout(overflow='scroll', - width='60%', - height='300px', - # display='flex', - # positioning='bottom', - border='1px solid black', - )) - self.plot_func([]) - super().__init__([widgets.VBox([plot_out, self.progress]), self.print_out]) - - def plot_func(self, datas): - # datas = signal.lfilter([1 - self.smooth_factor], [1, -self.smooth_factor], datas, axis=0) - if datas: - disD = [datas[0]] - for d in datas[1:]: - disD.append(disD[-1] * self.smooth_factor + d * (1 - self.smooth_factor)) - else: - disD = datas - with self.plot_out: - self.progress.value = len(disD) - plt.plot(disD) - clear_output(wait=True) - plt.show() +from __future__ import print_function +import copy +from collections import OrderedDict + +from ipywidgets import Layout +from ipywidgets import GridspecLayout + +from IPython.display import clear_output +from IPython.core.interactiveshell import InteractiveShell +from gym import spaces + +from rlzoo.common.env_list import all_env_list +from rlzoo.common.utils import * +from rlzoo.interactive.common import * + +all_env_list = OrderedDict(sorted(all_env_list.items())) + + +class EnvironmentSelector(widgets.VBox): + def __init__(self): + env_list = all_env_list + # al = list(env_list.keys()) + al = ['atari', 'classic_control', 'box2d', 'mujoco', 'robotics', 'dm_control', 'rlbench'] + description = 'Environment Selector' + caption = widgets.HTML(value="" + description + "") + + text_0 = widgets.Label("Choose your environment") + + self.env_type = widgets.Dropdown( + options=al, + value=al[0], + description='env type:', + disabled=False, + ) + + self.env_name = widgets.Dropdown( + options=env_list[al[0]], + value=env_list[al[0]][0], + description='env name:', + disabled=False, + ) + env_select_box = widgets.VBox([text_0, self.env_type, self.env_name]) + + text_1 = widgets.Label(value="Environment settings") + + self.env_num = widgets.IntText( + value=1, + description='multi envs:', + disabled=False, + min=1, + # layout=Layout(width='150px') + ) + + self.env_state = widgets.Dropdown( + options=['default'], + value='default', + description='state type:', + disabled=False, + ) + + # self.create_button = widgets.Button( + # description='Create!', + # disabled=False, + # tooltip='Create', + # icon='check' + # ) + + # multi_box = widgets.HBox([self.env_multi, self.env_num], layout=Layout(justify_content='flex-start')) + env_setting_box = widgets.VBox([text_1, self.env_num, self.env_state]) + + select_box = widgets.HBox([env_select_box, env_setting_box], + layout=Layout(justify_content='Center')) + + # self.frame = widgets.VBox([select_box, widgets.Box([self.create_button], + # layout=Layout(justify_content='Center'))]) + # self.frame = widgets.AppLayout(left_sidebar=select_box, center=info_border.frame) + + def env_type_change(change): + d = env_list[self.env_type.value] + self.env_name.options = d + self.env_name.value = d[0] + if self.env_type.value == 'rlbench': + self.env_state.options = ['state', 'vision'] + self.env_state.value = 'state' + self.env_num.value = 1 + self.env_num.disabled = True + else: + self.env_state.options = ['default'] + self.env_state.value = 'default' + self.env_num.disabled = False + + # def create_env(c): # todo the program will be blocked if rlbench env is created here + # if self.env_type.value == 'rlbench': + # print(self.env_name.value, self.env_type.value, self.env_num.value, self.env_state.value) + # self._env = build_env(self.env_name.value, self.env_type.value, + # nenv=self.env_num.value, state_type=self.env_state.value) + # self._env = build_env(self.env_name.value, self.env_type.value, nenv=self.env_num.value) + # print('Environment created successfully!') + + def change_nenv(c): + if self.env_num.value < 1: + self.env_num.value = 1 + + self.env_num.observe(change_nenv, names='value') + self.env_type.observe(env_type_change, names='value') + + # self.create_button.on_click(create_env) + + super().__init__([caption, select_box], layout=widgets.Layout(align_items='center', )) + + @property + def value(self): + return {'env_id': self.env_name.value, + 'env_type': self.env_type.value, + 'nenv': self.env_num.value, + 'state_type': self.env_state.value} + + +# @property +# def env(self): +# return self._env + +class SpaceInfoViewer(widgets.Box): + def __init__(self, sp): + assert isinstance(sp, spaces.Space) + if isinstance(sp, spaces.Dict): + it = list(sp.spaces.items()) + info = GridspecLayout(len(it), 2) + for i, v in enumerate(it): + info[i, 0], info[i, 1] = widgets.Label(v[0]), widgets.Label(str(v[1])) + else: + info = widgets.Label(str(sp)) + super().__init__([info]) + + +class EnvInfoViewer(widgets.VBox): + def __init__(self, env): + if isinstance(env, list): + env = env[0] + env_obs = SpaceInfoViewer(env.observation_space) + env_act = SpaceInfoViewer(env.action_space) + tips = None + if isinstance(env.action_space, gym.spaces.Discrete): + tips = 'The action space is discrete.' + elif isinstance(env.action_space, gym.spaces.Box): + tips = 'The action space is continuous.' + + description = 'Environment Information' + caption = widgets.HTML(value="" + description + "") + + a00, a01 = widgets.Label('Environment name:'), widgets.Label(env.spec.id) + a10, a11 = widgets.Label('Observation space:'), env_obs + a20, a21 = widgets.Label('Action space:'), env_act + + if tips is None: + # use GirdBox instead of GridspecLayout to ensure each row has a different height + info = widgets.GridBox(children=[a00, a01, a10, a11, a20, a21], + layout=Layout(grid_template_areas=""" + "a00 a01" + "a10 a11" + "a20 a21" + """)) + else: + t0 = widgets.Label('Tips:') + t1 = widgets.Label(tips) + info = widgets.GridBox(children=[a00, a01, a10, a11, a20, a21, t0, t1], + layout=Layout(grid_template_areas=""" + "a00 a01" + "a10 a11" + "a20 a21" + "t0 t1" + """)) + + super().__init__([caption, info], layout=widgets.Layout(align_items='center', )) + + +all_alg_list = ['A3C', 'AC', 'DDPG', 'DPPO', 'DQN', 'PG', 'PPO', 'SAC', 'TD3', 'TRPO'] +all_alg_dict = {'discrete_action_space': ['AC', 'DQN', 'PG', 'PPO', 'TRPO'], + 'continuous_action_space': ['AC', 'DDPG', 'PG', 'PPO', 'SAC', 'TD3', 'TRPO'], + 'multi_env': ['A3C', 'DPPO'] + } + + +class AlgorithmSelector(widgets.VBox): + def __init__(self, env): + description = 'Algorithm Selector' + caption = widgets.HTML(value="" + description + "") + info = 'Supported algorithms are shown below' + if isinstance(env, list): + # info = 'Distributed algorithms are shown below' + table = all_alg_dict['multi_env'] + self.env_id = env[0].spec.id + elif isinstance(env.action_space, gym.spaces.Discrete): + # info = 'Algorithms which support discrete action space are shown below' + table = all_alg_dict['discrete_action_space'] + self.env_id = env.spec.id + elif isinstance(env.action_space, gym.spaces.Box): + # info = 'Algorithms which support continuous action space are shown below' + table = all_alg_dict['continuous_action_space'] + self.env_id = env.spec.id + else: + raise ValueError('Unsupported environment') + + self.algo_name = widgets.Dropdown( + options=table, + value=table[0], + description='Algorithms:', + disabled=False, + ) + + super().__init__([caption, widgets.Label(info), self.algo_name], + layout=widgets.Layout(align_items='center', )) + + @property + def value(self): + return self.algo_name.value + + +def TransInput(value): + if isinstance(value, bool): + return widgets.Checkbox(value=value, description='', disabled=False, indent=False) + elif isinstance(value, int) or isinstance(value, float) \ + or isinstance(value, np.int16) or isinstance(value, np.float16) \ + or isinstance(value, np.int32) or isinstance(value, np.float32) \ + or isinstance(value, np.int64) or isinstance(value, np.float64) \ + or isinstance(value, np.float128): + return NumInput(value) + else: + return widgets.Label(value) + + +class AlgoInfoViewer(widgets.VBox): + def __init__(self, alg_selector, org_alg_params, org_learn_params): + alg_params, learn_params = copy.deepcopy(org_alg_params), copy.deepcopy(org_learn_params) + + # ---------------- alg_params --------------- # + description = 'Algorithm Parameters' + alg_caption = widgets.HTML(value="" + description + "") + net_label = widgets.Label('Network information:') + show_net = lambda net: widgets.VBox([widgets.Label(str(layer)) for layer in net.all_layers]) + + n = np.ndim(alg_params['net_list']) + if n == 1: + model_net = alg_params['net_list'] + elif n == 2: + model_net = alg_params['net_list'][0] + + net_info = widgets.VBox([widgets.VBox([widgets.Label(str(net.__class__.__name__)), + show_net(net), ], + layout=widgets.Layout(border=border_list[2], + align_items='center', + align_content='center' + ) + ) for net in model_net]) + self._net_list = alg_params['net_list'] + del alg_params['net_list'] + + opt_label = widgets.Label('Optimizer information:') + + def show_params(params): + params = copy.deepcopy(params) + n = len(params) + frame = widgets.GridspecLayout(n, 2, layout=widgets.Layout(border=border_list[2], )) + show_info = lambda k: [widgets.Label(str(k)), widgets.Label(str(params[k]))] + frame[0, 0], frame[0, 1] = show_info('name') + frame[1, 0], frame[1, 1] = show_info('learning_rate') + del params['name'] + del params['learning_rate'] + for i, k in enumerate(sorted(params.keys())): + if k != 'name' and k != 'learning_rate': + frame[2 + i, 0], frame[2 + i, 1] = show_info(k) + return frame + + opt_info = widgets.VBox([show_params(n.get_config()) for n in alg_params['optimizers_list']]) + self._optimizers_list = alg_params['optimizers_list'] + del alg_params['optimizers_list'] + + stu_frame = widgets.GridBox(children=[net_label, net_info, opt_label, opt_info], + layout=Layout(grid_template_areas=""" + "net_label net_info" + "opt_label opt_info" + """)) + + alg_sel_dict = dict() + sk = sorted(alg_params.keys()) + n = len(sk) + 1 + alg_param_sel = widgets.GridspecLayout(n, 2) + b = 0 + if 'method' in sk: + module = widgets.RadioButtons(options=['penalty', 'clip'], disabled=False) + alg_param_sel[0, 0], alg_param_sel[0, 1] = widgets.Label('method'), module + alg_sel_dict['method'] = module + sk.remove('method') + b += 1 + + for i, k in enumerate(sk): + module = TransInput(alg_params[k]) + alg_sel_dict[k] = module + if k == 'dueling': + module.disabled = True + alg_param_sel[i + b, 0], alg_param_sel[i + b, 1] = widgets.Label(k), module + + alg_param_box = widgets.VBox([alg_caption, stu_frame, alg_param_sel], ) + name = alg_selector.value + '-' + alg_selector.env_id + path = os.path.join('.', 'model', name) + alg_param_sel[n - 1, 0] = widgets.Label('model save path') + alg_param_sel[n - 1, 1] = widgets.Label(path) + + self.alg_sel_dict = alg_sel_dict + # ================== alg_params ================= # + + # ----------------- learn_params ---------------- # + description = 'Learn Parameters' + learn_caption = widgets.HTML(value="" + description + "") + + learn_sel_dict = dict() + sk = sorted(learn_params.keys()) + + n = len(sk) + if 'mode' not in sk: n += 1 + if 'render' not in sk: n += 1 + learn_param_sel = widgets.GridspecLayout(n, 2) + + module = widgets.RadioButtons(options=['train', 'test'], disabled=False) + learn_param_sel[0, 0], learn_param_sel[0, 1] = widgets.Label('mode'), module + learn_sel_dict['mode'] = module + try: + sk.remove('mode') + except: + pass + + module = widgets.Checkbox(value=False, description='', disabled=False, indent=False) + learn_param_sel[1, 0], learn_param_sel[1, 1] = widgets.Label('render'), module + learn_sel_dict['render'] = module + try: + sk.remove('render') + except: + pass + + for i, k in enumerate(sk): + module = TransInput(learn_params[k]) + learn_sel_dict[k] = module + learn_param_sel[i + 2, 0], learn_param_sel[i + 2, 1] = widgets.Label(k), module + learn_param_box = widgets.VBox([learn_caption, learn_param_sel], + # layout=Layout(align_items='center',) + ) + self.learn_sel_dict = learn_sel_dict + # ================= learn_params ================ # + + b = widgets.Output(layout=widgets.Layout(border='solid')) + + self.smooth_factor_slider = widgets.FloatSlider( + value=0.8, + min=0, + max=1, + step=0.01, + description='learning curve smooth factor', + disabled=False, + continuous_update=False, + orientation='horizontal', + readout=True, + readout_format='.2f', + style={'description_width': 'initial'}, + ) + super().__init__([alg_param_box, b, learn_param_box, b, self.smooth_factor_slider]) + + @property + def alg_params(self): + result = {'net_list': self._net_list, 'optimizers_list': self._optimizers_list} + for k in self.alg_sel_dict.keys(): + result[k] = self.alg_sel_dict[k].value + return result + + @property + def smooth_factor(self): + return self.smooth_factor_slider.value + + @property + def learn_params(self): + result = dict() + for k in self.learn_sel_dict.keys(): + result[k] = self.learn_sel_dict[k].value + return result + + +class RevOutput(widgets.Output): + def _append_stream_output(self, text, stream_name): + """Append a stream output.""" + self.outputs = ( + {'output_type': 'stream', 'name': stream_name, 'text': text}, + ) + self.outputs + + def append_display_data(self, display_object): + """Append a display object as an output. + + Parameters + ---------- + display_object : IPython.core.display.DisplayObject + The object to display (e.g., an instance of + `IPython.display.Markdown` or `IPython.display.Image`). + """ + fmt = InteractiveShell.instance().display_formatter.format + data, metadata = fmt(display_object) + self.outputs = ( + { + 'output_type': 'display_data', + 'data': data, + 'metadata': metadata + }, + ) + self.outputs + + +class OutputMonitor(widgets.HBox): + def __init__(self, learn_params, smooth_factor): + max_num = learn_params['train_episodes'] if learn_params['mode'] == 'train' else learn_params['test_episodes'] + self.progress = widgets.FloatProgress(value=0.0, min=0.0, max=max_num, description='Progress') + + self.plot_out = widgets.Output(layout=widgets.Layout(width='350px', + height='250px', )) + self.smooth_factor = smooth_factor + # self.smooth_factor = widgets.FloatSlider( + # value=self.sf, + # min=0, + # max=1, + # step=0.01, + # description='smooth factor', + # disabled=False, + # continuous_update=False, + # orientation='horizontal', + # readout=True, + # readout_format='.2f' + # ) + + # def link(c): + # self.sf = self.smooth_factor.value + + # self.smooth_factor.observe(link, 'value') + # plot_out = widgets.VBox([widgets.Label('Learning curve'), self.plot_out, self.smooth_factor]) + plot_out = widgets.VBox([widgets.Label('Learning curve'), self.plot_out]) + + self.print_out = RevOutput(layout=widgets.Layout(overflow='scroll', + width='60%', + height='300px', + # display='flex', + # positioning='bottom', + border='1px solid black', + )) + self.plot_func([]) + super().__init__([widgets.VBox([plot_out, self.progress]), self.print_out]) + + def plot_func(self, datas): + # datas = signal.lfilter([1 - self.smooth_factor], [1, -self.smooth_factor], datas, axis=0) + if datas: + disD = [datas[0]] + for d in datas[1:]: + disD.append(disD[-1] * self.smooth_factor + d * (1 - self.smooth_factor)) + else: + disD = datas + with self.plot_out: + self.progress.value = len(disD) + plt.plot(disD) + clear_output(wait=True) + plt.show() diff --git a/rlzoo/interactive/main.ipynb b/rlzoo/interactive/main.ipynb old mode 100644 new mode 100755 index d5459a9..42c30a7 --- a/rlzoo/interactive/main.ipynb +++ b/rlzoo/interactive/main.ipynb @@ -1,6056 +1,6056 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Interactive Hyper-parameter Configuration\n", - "This is a use case provided by RLzoo to support an interactive hyper-parameter configuration process. It is built with *ipywidgets* package, so make sure you have the package installed:\n", - "\n", - "```! pip3 install ipywidgets==7.5.1```\n", - "\n", - "You just need to **run** each cell (Shift+Enter) and **select** the sliders or dropdown lists to configure the hyper-parameters for the learning process, for whichever algorithm and environment supported in RLzoo. \n", - "\n", - "It follows four steps:\n", - "1. Environment Configuration\n", - "2. Environment Information Display and Algorithm Configuration\n", - "3. Algorithm Parameters Display and Learning Parameters Configuration\n", - "4. Launch Learning with Visualization \n", - "\n", - "Tips:\n", - "To stop the learning process and start a new one, you needs to restart the kernel (always work) or interrupt the kernel (not always work). \n", - "\n", - "Have fun!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "1. Environment Configuration\n", - "-----------------------------\n", - "Run a environment selector and select a environment you like.\n", - "\n", - "Tips: no need to rerun after selection, directly go to next cell.\n", - "\"\"\"\n", - "\n", - "from rlzoo.interactive.common import *\n", - "from rlzoo.interactive.components import *\n", - "from rlzoo.algorithms import *\n", - "from rlzoo.common.env_wrappers import build_env, close_env\n", - "env_sel = EnvironmentSelector()\n", - "display(env_sel)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "2. Environment Information Display and Algorithm Configuration\n", - "--------------------------------------------------------------\n", - "Run this code to create the enivronment instance.\n", - "\n", - "Tips: need to rerun every time you want to create a new environment with above cell, \\\n", - "because this cell builds the environment.\n", - "\"\"\"\n", - "\n", - "try:\n", - " close_env(env) # close the previous environment\n", - "except:\n", - " pass\n", - "env = build_env(**env_sel.value)\n", - "print('Environment created!')\n", - "display(EnvInfoViewer(env))\n", - "\n", - "# run a algorithm selector and select a RL algorithm\n", - "alog_sel = AlgorithmSelector(env)\n", - "display(alog_sel)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "3. Algorithm Parameters Display and Learning Parameters Configuration\n", - "----------------------------------------------------------------------\n", - "Call the default parameters of the selected algorithm in our environment and display them, \\\n", - "then select learning parameters.\n", - "\n", - "Tips: need to rerun after you created a different algorithm or environment.\n", - "\"\"\"\n", - "\n", - "EnvType, AlgName = env_sel.value['env_type'], alog_sel.value\n", - "alg_params, learn_params = call_default_params(env, EnvType, AlgName)\n", - "print('Default parameters loaded!')\n", - "\n", - "# see the networks, optimizers and adjust other parameters\n", - "algiv = AlgoInfoViewer(alog_sel, alg_params, learn_params)\n", - "display(algiv)\n", - "\n", - "# run this to generate the algorithm instance with the algorithm parameter settings above\n", - "alg_params = algiv.alg_params\n", - "alg = eval(AlgName+'(**alg_params)')\n", - "print('Algorithm instance created!')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "4. Launch Learning with Visualization \n", - "---------------------------------------\n", - "Run the cell to train the algorithm with the configurations above.\n", - "\"\"\"\n", - "\n", - "learn_params = algiv.learn_params\n", - "om = OutputMonitor(learn_params, smooth_factor=algiv.smooth_factor)\n", - "display(om)\n", - "with om.print_out:\n", - " alg.learn(env=env, plot_func=om.plot_func, **learn_params)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# whenever leaving the page, please close the environment by the way\n", - "close_env(env)\n", - "print('Environment closed')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.10" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": { - "00663174be1342fbbd29bc99cdd6d3aa": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "00ead8f3c1ea4020930b11c3bde3dd48": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_543b543dd8bb4fcb9dc9f4a16ac4bd6e", - "style": "IPY_MODEL_f63f7fca433e4d32ad6252416895155b", - "value": "max_steps" - } - }, - "0106cced0fe54fbb9a3a261b11941cce": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_593926166a704759992244f9732d0f8d", - "style": "IPY_MODEL_4a1bc5d7007848cb89e08eff1479ddf8", - "value": "Learn Parameters" - } - }, - "012eeb7c3bab46d9baa05356cd4ff0f6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "0143906a10054b1594675c3674642d83": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "014bf4270fea44b6aad4c80c7a5979b7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "019cd764de374cb382236f88a5d204af": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "01cece59d650454b9cf09d03e85a6a10": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_81d1f55272ef4977b06be173bdd59b8c", - "style": "IPY_MODEL_e62a214128d34799be2e1cc2cdb98b8c", - "value": "Network information:" - } - }, - "0201bde3e922471d9bb86857be61df95": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_8178676fb5e441ec92464938695643a8", - "step": null, - "style": "IPY_MODEL_0143906a10054b1594675c3674642d83", - "value": 24 - } - }, - "02904d8bc2d442deb3da0b5e6e0363a9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "04461564de8c45d6af4c6055f7b4c17f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "RadioButtonsModel", - "state": { - "_options_labels": [ - "train", - "test" - ], - "index": 0, - "layout": "IPY_MODEL_520b2e1af36547edbae1352d82099fda", - "style": "IPY_MODEL_2c9a721e0f084f8f8f437a5d4d875e3f" - } - }, - "04abdee05e514880bb74dfe64bca36ff": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_9384c24875c24e5b8be37d4c55e04820", - "style": "IPY_MODEL_bebb739676c74aacb396889de39592e6", - "value": "0.9" - } - }, - "0580852520e142a89d7b42c50bfef6a1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "066c122ea5f64991b7347279a79e8061": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "06d5c4249f3d404793fe2defc8eb0051": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_5af1a3e17ac64264905701b109c013e2", - "IPY_MODEL_691c17934ca3435eb36a2d84d15ecdf7" - ], - "layout": "IPY_MODEL_7d163d682d5744d6ac7be041fb66c158" - } - }, - "070bc781a91449c6a7fb227586d347e6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_494deb5503e842b78948ed2c14e28e3e", - "style": "IPY_MODEL_2d1f0d1b81ee4e1f85ae2f777dcd0db9", - "value": "beta_2" - } - }, - "07377f1ec0e74dd4897d484914a44f99": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "077609b632e64492acbc9a009222e086": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "078c44ca72d24661bbeb9921196ddace": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "07b040199f664673b2cb1b45c5a5af34": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_47513573787c4ab1bfafee8a38450355", - "style": "IPY_MODEL_0abdf6aca8e44b2f96d9e278ce60a016", - "value": "Dense(n_units=1, tanh, in_channels='64', name='dense_2')" - } - }, - "07b0e1377c414989a1d7ce1bf1da1c4e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_0bd6e0b89391415fa6fc2c7f7fbf3bd3", - "style": "IPY_MODEL_da04b8e9a4464f7ea141e41904fa3b0f", - "value": "0.999" - } - }, - "080346c4f0ae457182549d3c68aaaaea": { - "model_module": "@jupyter-widgets/output", - "model_module_version": "1.0.0", - "model_name": "OutputModel", - "state": { - "layout": "IPY_MODEL_23d66d78336541bf8b3f863dc3e554d4", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": "Training... | Algorithm: AC | Environment: Pendulum-v0\nEpisode: 0/10 | Episode Reward: -1730.5698 | Running Time: 1.6412\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_actor\n[TL] [*] Saved\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_critic\n[TL] [*] Saved\nEpisode: 1/10 | Episode Reward: -1738.3357 | Running Time: 3.3340\nEpisode: 2/10 | Episode Reward: -1744.1233 | Running Time: 4.9608\nEpisode: 3/10 | Episode Reward: -1854.8743 | Running Time: 6.5518\nEpisode: 4/10 | Episode Reward: -1678.3274 | Running Time: 8.1632\nEpisode: 5/10 | Episode Reward: -1833.9245 | Running Time: 9.7298\nEpisode: 6/10 | Episode Reward: -1805.7677 | Running Time: 11.3628\nEpisode: 7/10 | Episode Reward: -1822.8594 | Running Time: 12.9569\nEpisode: 8/10 | Episode Reward: -1409.2653 | Running Time: 14.5867\nEpisode: 9/10 | Episode Reward: -1752.4231 | Running Time: 16.2574\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_actor\n[TL] [*] Saved\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_critic\n[TL] [*] Saved\n" - } - ] - } - }, - "081136f1075542a3999ce83eba68fdb5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_1ef9aa26484548e99e94bb3d8aae3cce", - "IPY_MODEL_45847f561d154d999d93f170524e2bdf", - "IPY_MODEL_9ce0362f9fac4e45a87ebe7a085a24af" - ], - "layout": "IPY_MODEL_ab2e3b3dc5024debb0c00c3d27d48a8b" - } - }, - "08f5684d8e194916ac04ed379e2bf022": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_019cd764de374cb382236f88a5d204af", - "step": null, - "style": "IPY_MODEL_c4662ffdadef4c7d82aba5ddca1fbfda", - "value": 0.9 - } - }, - "093fd11986764d78ad5dcf1429a496c9": { - "model_module": "@jupyter-widgets/output", - "model_module_version": "1.0.0", - "model_name": "OutputModel", - "state": { - "layout": "IPY_MODEL_0b19536128d34993b9a3354b2a05e2dc", - "msg_id": "8f19b370e7f641249abb608a3c84b213", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": "Training... | Algorithm: AC | Environment: Pendulum-v0\nEpisode: 0/100 | Episode Reward: -1730.5698 | Running Time: 1.6647\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_actor\n[TL] [*] Saved\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_critic\n[TL] [*] Saved\nEpisode: 1/100 | Episode Reward: -1738.3357 | Running Time: 3.3156\nEpisode: 2/100 | Episode Reward: -1744.1233 | Running Time: 4.9611\nEpisode: 3/100 | Episode Reward: -1854.8743 | Running Time: 6.5757\nEpisode: 4/100 | Episode Reward: -1678.3274 | Running Time: 8.2029\nEpisode: 5/100 | Episode Reward: -1833.9245 | Running Time: 9.7915\nEpisode: 6/100 | Episode Reward: -1805.7677 | Running Time: 11.3793\nEpisode: 7/100 | Episode Reward: -1822.8594 | Running Time: 12.9897\nEpisode: 8/100 | Episode Reward: -1409.2653 | Running Time: 14.5941\nEpisode: 9/100 | Episode Reward: -1752.4231 | Running Time: 16.2545\nEpisode: 10/100 | Episode Reward: -1595.9812 | Running Time: 17.8784\nEpisode: 11/100 | Episode Reward: -1750.5559 | Running Time: 19.4594\nEpisode: 12/100 | Episode Reward: -1780.9001 | Running Time: 21.0874\nEpisode: 13/100 | Episode Reward: -1645.4007 | Running Time: 22.7261\nEpisode: 14/100 | Episode Reward: -1684.3441 | Running Time: 24.3810\nEpisode: 15/100 | Episode Reward: -1764.5074 | Running Time: 25.9965\nEpisode: 16/100 | Episode Reward: -1688.8096 | Running Time: 27.6359\nEpisode: 17/100 | Episode Reward: -1582.7040 | Running Time: 29.2999\n" - } - ] - } - }, - "094d34956035446984a6cb8a6efc22a7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "09c74a8b5d1a43828034e148d2edfbfc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "09eb8f946d00416dace2ee661ad55fbd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget001" - } - }, - "0a179f0e33df4522b9286a546e181b60": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_f2ffc80dd5074916b1a69e9de91149f9", - "style": "IPY_MODEL_8784dbc322c7455aaef2b352bae2f205", - "value": "name" - } - }, - "0a21d0f35913467a9b266a75d2af8db0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "0a575cd57803474a9574922e07d3d316": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_5532430429754176a10d6ab53ba4b6d9", - "style": "IPY_MODEL_e35bce23c28f4af3b0d4dce2266ed2e8", - "value": "Learning curve" - } - }, - "0abdf6aca8e44b2f96d9e278ce60a016": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "0af6103ca9e44bb4a44c62b84b39415f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_4112e1653afc41a795418fc54377af6c", - "IPY_MODEL_10d4f1af65b0492594efc926d9976e59" - ], - "layout": "IPY_MODEL_1e197bc7d05a4518969ee7d3f97f211c" - } - }, - "0b081708649d446ab37f522f5a019e19": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "0b19536128d34993b9a3354b2a05e2dc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "border": "1px solid black", - "height": "300px", - "overflow": "scroll", - "width": "60%" - } - }, - "0b1a53d081f547f8ab913cd15fe70058": { - "model_module": "@jupyter-widgets/output", - "model_module_version": "1.0.0", - "model_name": "OutputModel", - "state": { - "layout": "IPY_MODEL_d99dceda8ae6483f8df298525d45be82" - } - }, - "0bd6e0b89391415fa6fc2c7f7fbf3bd3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget010" - } - }, - "0c0d922d9ed14199ab9b8f48b9e8ba1d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "IntTextModel", - "state": { - "description": "multi envs:", - "layout": "IPY_MODEL_f2db93e6094b47d0bfce3821b33d707a", - "step": 1, - "style": "IPY_MODEL_454f999c2ca44e7b86263594806f6191", - "value": 1 - } - }, - "0c64eb2046714b6c885261124bcb09f8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_167de1c7956c4ede9fa6a584404bc568", - "style": "IPY_MODEL_5469680f21e44e77b1092b8354d9aee0", - "value": "Dense(n_units=1, No Activation, in_channels='64', name='dense_1')" - } - }, - "0cabfd585d5d4421a05805698bc1c8ad": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "0d95601931d94f8cac55349f5886038a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "0dc03ae5db46484a85272ce1899e53c0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_8ca1f8992583484a8a0ff2f7f46afee2", - "IPY_MODEL_99ac959475eb4f75b586ed6599b99113", - "IPY_MODEL_2ab7b4c8b49a4163b5521127d8329674", - "IPY_MODEL_9689f9977c7f455282a9831bcd81905c" - ], - "layout": "IPY_MODEL_eb5fdb48aa1d483fa9acf05a229ef307" - } - }, - "0e74af77352a4b40b0f9e5163d92a836": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget007" - } - }, - "0eb34e6e2b07401dae9a2bfa4f1d49df": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_47ed36f4da904759bb9adcf9f1f1685b", - "style": "IPY_MODEL_7dc1333733194435934e6ca098ede1ad", - "value": "False" - } - }, - "0ec6f6b7c7c84bb4b54e92db8342ce85": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DropdownModel", - "state": { - "_options_labels": [ - "atari", - "classic_control", - "box2d", - "mujoco", - "robotics", - "dm_control", - "rlbench" - ], - "description": "env type:", - "index": 1, - "layout": "IPY_MODEL_bfdfc9d77a654743a9ebdfc08ab167da", - "style": "IPY_MODEL_ce5b0166c393435a840819472b761b8c" - } - }, - "0fb529fd883648edb15d72a94813126e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_a01f34500cfc486289f3334e3cd222df", - "IPY_MODEL_d6ddb43e654a421ead72beacfae7145e", - "IPY_MODEL_b106f6f6a7f047a4a11ec9f9a23804e2" - ], - "layout": "IPY_MODEL_ffce2434eb114cd1a7f6961dd71ff755" - } - }, - "1022056a831a477e91366a9deda960de": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_ecc6da99cf7944f5a5a6cfd1f0516aa6", - "style": "IPY_MODEL_ebff747fea3f4cf2abb9efcd9f998ddb", - "value": "Dense(n_units=64, relu, in_channels='3', name='hidden_layer1')" - } - }, - "10685777c5384041b62b4ce3aa26bf6e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "10b2a84971164564ac50d9f53bd98579": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "10d4f1af65b0492594efc926d9976e59": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_50ce374ed2fc4f2ebc2c156c16ba4f38", - "IPY_MODEL_11337137fc3b4e19b06d48508495d2ce", - "IPY_MODEL_fc6a2f4827034d64b99a15547f3d9f43", - "IPY_MODEL_1846a28797b64a7a8266f33f497550d4", - "IPY_MODEL_00ead8f3c1ea4020930b11c3bde3dd48", - "IPY_MODEL_89ae5379ee8b4e2d92f116a018b9420e", - "IPY_MODEL_d6a04d9b77b54ae89af21fa5551e205e", - "IPY_MODEL_b42c755dec514e6fa26ca97f3f0ef923", - "IPY_MODEL_d915d378018e4bd085cf4a0a935e2aaa", - "IPY_MODEL_162bfef08113403d82be4e50b362acb9", - "IPY_MODEL_30d87705b48648089aaa078817a89da2", - "IPY_MODEL_bdb404863da84bdf870e550898f54848" - ], - "layout": "IPY_MODEL_81a50427a5384feeaaee374a19ad5931" - } - }, - "11337137fc3b4e19b06d48508495d2ce": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "RadioButtonsModel", - "state": { - "_options_labels": [ - "train", - "test" - ], - "index": 0, - "layout": "IPY_MODEL_da5694fd870b41e79f41ebc7d7b8db5e", - "style": "IPY_MODEL_3a389cd3e9254722a3bef185d92c9ac4" - } - }, - "1202663af1bf4653bc967824c8574e1a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_98f2c9b34e884cada9e2eedac93e1912", - "style": "IPY_MODEL_67a79ba4cbf84418967857e237a5a1be", - "value": "Environment name:" - } - }, - "1222c8a942134f83aa262d9b321ee413": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "125f5c3fd35e49339e558a30a39a9f8a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_fc83fd9df36b4c0fa6ee544fe520cde7", - "style": "IPY_MODEL_3f7607f9884f482498bb28a91df5ab02", - "value": "beta_1" - } - }, - "12a0f20f2ecd423889594f36b15647f1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "12e50eba7f3e4e9f888416f46172b60f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "145001c5826a41cd989997ea61244ca1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "14a01344ad1b48b3becfe74fa709a0c6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_40848c8562dc485fa88be8cf89c7a5e2", - "style": "IPY_MODEL_a7d8b17ff9fd43298bc30e0471ade94f", - "value": "Input(shape=(None, 3), name='input_layer')" - } - }, - "1537ab75a9dd4f429ffb3812c485116f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_0a179f0e33df4522b9286a546e181b60", - "IPY_MODEL_91d86c9ddbfa4acdaf18e13d8adf3862", - "IPY_MODEL_b4d945e45eae41ceb40de345939615ad", - "IPY_MODEL_715b10d741354c8db506fb8ba945a074", - "IPY_MODEL_b92bc4065ee4473aa6e1b4051e044dee", - "IPY_MODEL_c2160078393b421d9f3a4343f37307e2", - "IPY_MODEL_125f5c3fd35e49339e558a30a39a9f8a", - "IPY_MODEL_04abdee05e514880bb74dfe64bca36ff", - "IPY_MODEL_070bc781a91449c6a7fb227586d347e6", - "IPY_MODEL_2bb83c7012914171b4b76d559b92034c", - "IPY_MODEL_fa3877a284354fd08f33d320314b6765", - "IPY_MODEL_5446746816dd4edf8dffb29995d15715", - "IPY_MODEL_3755df840c214a33941879b316489adf", - "IPY_MODEL_776cdbcecc004924a856eb45ec0a5699" - ], - "layout": "IPY_MODEL_4b5dc49fbc1743c8abe6cded3f9ed703" - } - }, - "159f94f25de5436aafa6fec3c88e3356": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_0106cced0fe54fbb9a3a261b11941cce", - "IPY_MODEL_cfb6b6bcedad4f61893206fb1eb28385" - ], - "layout": "IPY_MODEL_89880b2c3e03469da53b8a7e9e2e930b" - } - }, - "15ae64b32d794189a34bba91e2f7a15b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "162bfef08113403d82be4e50b362acb9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_474e0de897334eb69236cc05ae69f164", - "IPY_MODEL_aafbebe0ec5b4425acf54f0ad9f6c80f" - ], - "layout": "IPY_MODEL_66bc7fd58a2743a0960e9dd5df378998" - } - }, - "167816e5912f4ea18d96b6e468d82ae7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "167de1c7956c4ede9fa6a584404bc568": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "1764805129704afcb7c170e877b81788": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_fe547223f16e423fa8493d4c6ae577ba", - "IPY_MODEL_093fd11986764d78ad5dcf1429a496c9" - ], - "layout": "IPY_MODEL_2bea049f9ec74da0bcf2a7eeffce8720" - } - }, - "182107ee16aa4bfba497dd033e347d65": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_e6958eae462d43d8bdb9c6227deddcc7", - "style": "IPY_MODEL_f9a9a8529629435f926e28c9e2ff6d21", - "value": "Observation space:" - } - }, - "1826b147229c4a96b6603cc13978a090": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "182c5797541f4476bb02c95a710f1bca": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "1846a28797b64a7a8266f33f497550d4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "CheckboxModel", - "state": { - "disabled": false, - "indent": false, - "layout": "IPY_MODEL_e8b87d816ccb409083b0c522ef0bd9dd", - "style": "IPY_MODEL_167816e5912f4ea18d96b6e468d82ae7", - "value": false - } - }, - "18470dca56a94ced8388c8eec402515f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_294896e2ec5f413e9e23d9ec81e6bbbf", - "IPY_MODEL_8c59866961674911b2157bded443e366", - "IPY_MODEL_261d86e673814c6b9c6ed7b921861867", - "IPY_MODEL_6d5b0a5b26874cfd874c4a0bdf307eff" - ], - "layout": "IPY_MODEL_b58381d8050044ee9df6c0857e3a06e4" - } - }, - "18a7121ba72e42af9a496a39fb8c6f6a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "18ea002dd43344a5864f8a8651ceeaeb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget007" - } - }, - "19b0d8173d9141e0a0db8d0b2110c98c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"", - "grid_template_columns": "repeat(2, 1fr)", - "grid_template_rows": "repeat(6, 1fr)" - } - }, - "1a3aa6da2cad4cfd9696b32125ab645b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "1adbcde168d04bcdaed1c410feae74ac": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "1b48b0f90cef4077aa20b9ee8be52e9b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget002" - } - }, - "1c09f9523eb2469ab864ddcd5f15f417": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "1c75d4a07143476588ce4826116ea8ee": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "1cb1d8e98bef410e85502ad2edb46c45": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"", - "grid_template_columns": "repeat(2, 1fr)", - "grid_template_rows": "repeat(2, 1fr)" - } - }, - "1cb88e139a0642afb2f3c958dff539aa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_9705108e9dd540fa8e02c1933e03eadd", - "style": "IPY_MODEL_2126fce329534e2b98f039a35e99344a", - "value": "Dense(n_units=64, relu, in_channels='3', name='hidden_layer1')" - } - }, - "1d03aaf95d45497ca74e337a82632cee": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_2ee89b46bdc146f9b9f4f48f5874a349", - "style": "IPY_MODEL_e0a1f12f4f0e4e31adc281b1fe6dee11", - "value": "0.9" - } - }, - "1db128fafd984258b040b5295b477f0d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "1dbbcf0744194117b3463d5ae8af00ef": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "1e197bc7d05a4518969ee7d3f97f211c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "1e327c8e4b844c2fbb017a5544fa678e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget004" - } - }, - "1e6d0c80ceaa4e58846e9f554371b363": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "1eec2203d3bf49c2876604c21291cc18": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_d20f2266d6fc44df988c78b63b202a81", - "style": "IPY_MODEL_5228a7a8160f421f846e2d7d06c9d159", - "value": "1e-07" - } - }, - "1ef9aa26484548e99e94bb3d8aae3cce": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_5ac9e6a121a3488ea93f85f5589429a0", - "style": "IPY_MODEL_698f9329e3754e7482dc32690ba58f4a", - "value": "Environment settings" - } - }, - "1f0e424278554da08fbb15138e571a62": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "1f37fdacb85646a1b5ff9a2b1d6ab38a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "2126fce329534e2b98f039a35e99344a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "214c87e57eb641bb89644c9f465889ca": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget007" - } - }, - "2205db5769754bf0948d81dde160eab4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "22126658c9d54cfab48b63029798c705": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "223fd915d3a5472aabdde3b5dd47a5f1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "22ff0e7129b04334b71044d77e3c9298": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "BoxModel", - "state": { - "children": [ - "IPY_MODEL_eef437964b4e4fa29ea42afc6b9a69ce" - ], - "layout": "IPY_MODEL_759c11789beb46f798f3b48c4cf88577" - } - }, - "23424247d797485dba0788eb6b7614aa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "235af533ab1c41a6b82350c6f3a88426": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget010" - } - }, - "23d66d78336541bf8b3f863dc3e554d4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "border": "1px solid black", - "height": "300px", - "overflow": "scroll", - "width": "60%" - } - }, - "24f450d31f2d47a68aa2c58be28170fb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_09eb8f946d00416dace2ee661ad55fbd", - "style": "IPY_MODEL_f8a20f2f4b8b4c03857bcd85bf96b136", - "value": "name" - } - }, - "254576dd293543d384c9e5620c3db225": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget005" - } - }, - "26036b1a064245a6a1cef60ec7d39376": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_b18ac7a05b7c4d58813a3e735173a3ca", - "IPY_MODEL_0ec6f6b7c7c84bb4b54e92db8342ce85", - "IPY_MODEL_467644544d33439284f04fe2a9883182" - ], - "layout": "IPY_MODEL_f9b983bef3a14087b6d1f966b8b041ed" - } - }, - "261d86e673814c6b9c6ed7b921861867": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_e9794b57be6c4c0e981a017d3fa82a36", - "style": "IPY_MODEL_946c2a2e7e8f4e36b0311e922520272f", - "value": "Optimizer information:" - } - }, - "266e10703ed340a78b259c7d3ddc8836": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_6d6739242111448eaf1e80a8962f1aac", - "style": "IPY_MODEL_bf620c54949846b49135585c61101b19", - "value": "Environment Information" - } - }, - "26c0e699dae643b58817819a3d134e6f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_f29a7f4ff2a74bbf8d6485cbfb086152", - "IPY_MODEL_bb5d38052b40427585a8ec928bdef7b5" - ], - "layout": "IPY_MODEL_d02f0cd6f8f94156ac86605286a6ee78" - } - }, - "27fbf57b093b4444b8990601eaddca26": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_01cece59d650454b9cf09d03e85a6a10", - "IPY_MODEL_e09e0ff65ebf454b80a965aaa0f61d32", - "IPY_MODEL_83c18b3b4c374f70947e47230ffe4f82", - "IPY_MODEL_06d5c4249f3d404793fe2defc8eb0051" - ], - "layout": "IPY_MODEL_ff06931e66b544389c8f409734b472e3" - } - }, - "283080f17fcf4286b2e6e059bcda3370": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_4b23820dcff647a6ad204c7c4a596248", - "style": "IPY_MODEL_1826b147229c4a96b6603cc13978a090", - "value": "mode" - } - }, - "28ad6172b7f34ba9923847d24dd555b3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "294896e2ec5f413e9e23d9ec81e6bbbf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_98824ad5eda8475394e9fb13819502a9", - "style": "IPY_MODEL_79953b3e59c048548c96bb197d46a7ea", - "value": "Network information:" - } - }, - "2982ccca674f4bfc839557e06cde9993": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_14a01344ad1b48b3becfe74fa709a0c6", - "IPY_MODEL_1022056a831a477e91366a9deda960de", - "IPY_MODEL_814eef7fa97a4fa2b4c5f1ed1b3728f3", - "IPY_MODEL_0c64eb2046714b6c885261124bcb09f8" - ], - "layout": "IPY_MODEL_223fd915d3a5472aabdde3b5dd47a5f1" - } - }, - "298f572cd2ec4a9ca5a6feafaf334040": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget002" - } - }, - "29a207365d934cc4a402ed72a19194ca": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "border": "solid" - } - }, - "2a9fb576ef6145abaf95398bf620cd8d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget010", - "justify_content": "center" - } - }, - "2ab7b4c8b49a4163b5521127d8329674": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_7d70e416e925499f93e5837aabc6afc2", - "style": "IPY_MODEL_69268529fca5425e9f11506c968490e7", - "value": "Dense(n_units=64, relu, in_channels='64', name='hidden_layer2')" - } - }, - "2b0d8567d4aa4e53a5837284b315cc58": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "2bb83c7012914171b4b76d559b92034c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_235af533ab1c41a6b82350c6f3a88426", - "style": "IPY_MODEL_75c167ca66774581880b2500d5176a36", - "value": "0.999" - } - }, - "2bea049f9ec74da0bcf2a7eeffce8720": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "2c0353597c114ba184977dac607510c3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget006" - } - }, - "2c48650276864e79a7b82413ddd8c6fa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_d34c7789bb974de1a36ef3cc45737b52", - "style": "IPY_MODEL_626ae439ee1f4ce4895764fb66f9c6d3", - "value": "0.999" - } - }, - "2c9a721e0f084f8f8f437a5d4d875e3f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "2d1f0d1b81ee4e1f85ae2f777dcd0db9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "2da2537f2e444e16ad634693e684af58": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "BoxModel", - "state": { - "children": [ - "IPY_MODEL_e4665eee9731436a839eaebea246f048" - ], - "layout": "IPY_MODEL_e944a76d793541058cf5f32563847fb3" - } - }, - "2dab24721ba34bd789afa55d1479464b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "2dece16eb4994e5082a1cbeeea4163d0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget002" - } - }, - "2e65a763e5db40ca8969c36950c0d9bd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_1c75d4a07143476588ce4826116ea8ee", - "style": "IPY_MODEL_15ae64b32d794189a34bba91e2f7a15b", - "value": "Supported algorithms are shown below" - } - }, - "2e6e71650a6a48878fce055c8e563538": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_f1985e262a7d401ea97c903091713789", - "style": "IPY_MODEL_2205db5769754bf0948d81dde160eab4", - "value": "Dense(n_units=64, relu, in_channels='64', name='hidden_layer2')" - } - }, - "2e8b3025623248e2a92daa5a7750997f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "2ece943ff83c48e8b69e0b2396b6064c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_aeecfc3325ec482ebd31ced3fc2e6839", - "style": "IPY_MODEL_b979276c5b584ebab1400eea707b2c39", - "value": "Pendulum-v0" - } - }, - "2ee89b46bdc146f9b9f4f48f5874a349": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget008" - } - }, - "2f93a27048a44beda22771c8249fba0d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "border": "dotted", - "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"\n\"widget013 widget014\"", - "grid_template_columns": "repeat(2, 1fr)", - "grid_template_rows": "repeat(7, 1fr)" - } - }, - "3025ff51115247eebfcfe7e2a18e414e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget004" - } - }, - "3044da8a1f89485398f1ea9d4965bc55": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget006", - "justify_content": "center" - } - }, - "304f4dcdb42b4bca91451ccfe7eba639": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "30d87705b48648089aaa078817a89da2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_c3c09aa3ecea45eda2b142c857c5d7c5", - "style": "IPY_MODEL_e3adb676dd9b48a6bd4e895ac644b653", - "value": "train_episodes" - } - }, - "31276a604cf14bcd82297907c46c17f8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_a899edcecbcf49d1a1f57b48bed97865", - "max": 400, - "readout_format": ".0f", - "style": "IPY_MODEL_4711e3b757ae4ba08ece2d994aa46c2a", - "value": 200 - } - }, - "31f3ea5f445a4342b1a4db664f61eb93": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "31fe17808d8e4f7ead5964af2e4f5894": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "border": "dotted", - "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"\n\"widget013 widget014\"", - "grid_template_columns": "repeat(2, 1fr)", - "grid_template_rows": "repeat(7, 1fr)" - } - }, - "329f804132904f47a73d10b3ccba4b4d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DropdownModel", - "state": { - "_options_labels": [ - "atari", - "classic_control", - "box2d", - "mujoco", - "robotics", - "dm_control", - "rlbench" - ], - "description": "env type:", - "index": 1, - "layout": "IPY_MODEL_8ae2c037e98f420486a61a8570daf106", - "style": "IPY_MODEL_df84370f89e949518569f900854e2510" - } - }, - "334d1a726d2347db82e42df5760618b3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "CheckboxModel", - "state": { - "disabled": false, - "indent": false, - "layout": "IPY_MODEL_c3d17e5a575344968f8b84a174b26ba9", - "style": "IPY_MODEL_31f3ea5f445a4342b1a4db664f61eb93", - "value": false - } - }, - "33ecf71f75a649a285ea6a8211b5acbd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "3488ba4c7374447794395c4c315a1193": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "351ae05c16d040dab9a578c06a78858c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "35525c0fbffa497eb43f7d5bd081bb0b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "initial" - } - }, - "3556d6d1fe0c4e558b21b70b8c7b9395": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget003" - } - }, - "3755df840c214a33941879b316489adf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_e835260b70924edd959ac38cbdaa50d3", - "style": "IPY_MODEL_7aa2babe24dc4fab84bfbd511f0b5e98", - "value": "epsilon" - } - }, - "379d32750a8c4e88b3b6a8d76c3ee91b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget012" - } - }, - "383cf0cb101341d4bdfb65604a24a4d5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget003" - } - }, - "38484ea61c3449a1b809d8526ead582d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget006" - } - }, - "389174ab87e24a48a23ad5f81a32da61": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_d2ba7f491ec94768be174bba323aff6d", - "style": "IPY_MODEL_a32e41356969452abe56558608109dc8", - "value": "test_episodes" - } - }, - "38f46c0b84c84233a228758c9b306a79": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "3909591203bd4321b62ed4e0aa575a3e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_a2bb633318304f79a811eb07e18da7f5", - "IPY_MODEL_4ab1ce52edf54c879f2ee002e94c98f1", - "IPY_MODEL_159f94f25de5436aafa6fec3c88e3356", - "IPY_MODEL_4ab1ce52edf54c879f2ee002e94c98f1", - "IPY_MODEL_88b977df9d82476298ff3c70d714afe0" - ], - "layout": "IPY_MODEL_886c73a1052a4a2da9ec06c958855a51" - } - }, - "39219af0b9a34c03a11682fdbaf85b04": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_e9d6d91ceda64a63b9fe358e90337820", - "IPY_MODEL_9694a75a41e543a3b2642aee3572857d" - ], - "layout": "IPY_MODEL_ed746bfae28741e9ae1d450dd1394423" - } - }, - "39c394badc7246fdb12032649f71a1b6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget003" - } - }, - "3a389cd3e9254722a3bef185d92c9ac4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "3a3916bde1e849aeae0e2701258ddc34": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_fc20a5f1e967425c840960c1948f00c8", - "style": "IPY_MODEL_c75a9640bb26465785ca214520007519", - "value": "train_episodes" - } - }, - "3a96e3ae233940e18c75f004da9e0459": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_bf7a578fb6204ce694235598a0f00ea2", - "style": "IPY_MODEL_f2612900bd944258af3be77cacc7a46b", - "value": "name" - } - }, - "3b0358464a32494ea410b866646b79b1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_dd51349042bc4341b061da02df9f8be2", - "style": "IPY_MODEL_63c30e87411c45dd8d58dfa485850fc2", - "value": "learning_rate" - } - }, - "3c695e15ebbd4ecfb555b0fe5221ad10": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_9a247aedcd64492d9b4ddf9d76c13062", - "style": "IPY_MODEL_96fc368f69794e5baa9433c3a31b1ec1", - "value": "amsgrad" - } - }, - "3c77984eb49f4b3fbf5b78b313af8071": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget013" - } - }, - "3cfd11894b514078901081bddd35c83d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "3d9166fc4fcf43f3b930ebc7f996a5bf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "3e9c9dcc814b47f8b2b392074c83d853": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_template_areas": "\n \"a00 a01\"\n \"a10 a11\"\n \"a20 a21\"\n \"t0 t1\"\n " - } - }, - "3f7607f9884f482498bb28a91df5ab02": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "40747ee3248e4cbca2b22e3201e7ae52": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "4080aa3475b94001b5324fd14d18816c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "initial" - } - }, - "40848c8562dc485fa88be8cf89c7a5e2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "40c1e5560977460b86028ca09ee94662": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget005" - } - }, - "4112e1653afc41a795418fc54377af6c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_808fb0e5d6b940388d588196c927564d", - "style": "IPY_MODEL_9b276e72efa44a7e911ee209d08859b6", - "value": "Learn Parameters" - } - }, - "413fd706b68148a099ed9af1a952ec6d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "41425cf814dc44c49ac901aeec4c668f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "420cda5d7fd34a05b48fa845558987c4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_cfc4c351d9da4a2bbe36bb1288f74e82", - "step": null, - "style": "IPY_MODEL_9b5f3fd4ebd341ac91227f9ded9fab19", - "value": 200 - } - }, - "42f8297b00d240308e7403a004a1c6b4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget003" - } - }, - "432a3a690b36409192aa3ee4dd5fedf8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_45b014170b1e4c6b8efc9d245b587b48", - "max": 1.8, - "readout_format": ".1f", - "step": 0.1, - "style": "IPY_MODEL_4c528854314c4df18a84eafa4f1a7404", - "value": 0.9 - } - }, - "434eec441fb94a30bcb70bec50c60d78": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_681fa50d92ed4da0afda87805d2383ca", - "IPY_MODEL_18470dca56a94ced8388c8eec402515f", - "IPY_MODEL_da5536ed85464ee5a97c44660b985348" - ], - "layout": "IPY_MODEL_74dc8e60490943c8b9601232bf24f608" - } - }, - "43730220bf8e489cae588fcf375d08cf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_31276a604cf14bcd82297907c46c17f8", - "IPY_MODEL_420cda5d7fd34a05b48fa845558987c4" - ], - "layout": "IPY_MODEL_ddba268ea0db428898643ae0f9a259a3" - } - }, - "43ca75c41e054155b5ad51e493b3b990": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_e53d3b32848c4872a5e1254a2ed080f1", - "style": "IPY_MODEL_e467ed3285684035a013df63ebb6b422", - "value": "Tips:" - } - }, - "43f9446733e242f1977bbe394ddc479b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "452324b6d7cc4cf28d456787efc23b8f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "454021a337164bae8a96f5a5a7749b78": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "454f999c2ca44e7b86263594806f6191": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "455c6fed537d48b188edef0200ab0fb1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_07377f1ec0e74dd4897d484914a44f99", - "style": "IPY_MODEL_a5d8986e9aad47b1ba7821ddf2850c7a", - "value": "Algorithm Selector" - } - }, - "45847f561d154d999d93f170524e2bdf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "IntTextModel", - "state": { - "description": "multi envs:", - "layout": "IPY_MODEL_4cff6dcb31874722a4fcd9052bb1f9b6", - "step": 1, - "style": "IPY_MODEL_e41fe8ee1bf04764abe02428057a540a", - "value": 1 - } - }, - "45850b0512424834a6d4c70e60892ae8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "45b014170b1e4c6b8efc9d245b587b48": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "45e906bdfe7a464d848f9c972f536d31": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "467644544d33439284f04fe2a9883182": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DropdownModel", - "state": { - "_options_labels": [ - "Acrobot-v1", - "CartPole-v1", - "CartPole-v0", - "MountainCar-v0", - "MountainCarContinuous-v0", - "Pendulum-v0" - ], - "description": "env name:", - "index": 5, - "layout": "IPY_MODEL_e210fdbc53d246a2ae55da6a3689745b", - "style": "IPY_MODEL_f29ba87ee02f4fc38760b98a32e20581" - } - }, - "469da089cf804101a4cbc570975a1aed": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_ac4da45cf7d84d5fa0ea8963afbe5c12", - "style": "IPY_MODEL_dc12042cc1bb40c98a69bef90468797a", - "value": "gamma" - } - }, - "4711e3b757ae4ba08ece2d994aa46c2a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "4749f46df2c4438e874ed6912a4d7ef1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget009" - } - }, - "474e0de897334eb69236cc05ae69f164": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_c234ed19a3204e1d9452d6686e014efb", - "max": 200, - "readout_format": ".0f", - "style": "IPY_MODEL_22126658c9d54cfab48b63029798c705", - "value": 100 - } - }, - "47513573787c4ab1bfafee8a38450355": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "47d275b36e704a74a22098c38f14f301": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "47ed36f4da904759bb9adcf9f1f1685b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget006" - } - }, - "48392da1f6c64d3fad859465d0d0095b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget002", - "justify_content": "center" - } - }, - "48a97cf1c4a44a858c3376f962060321": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "48d65f9009904854b076047201074a2c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_template_areas": "\n \"a00 a01\"\n \"a10 a11\"\n \"a20 a21\"\n \"t0 t1\"\n " - } - }, - "494deb5503e842b78948ed2c14e28e3e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget009" - } - }, - "49c009585e524d98af99d984cf65a85b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "4a1bc5d7007848cb89e08eff1479ddf8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "4a2a0ec5e8f641f489d58e31f3f5fcef": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_d1b7a611e0ea474991c6034e7e7a9e98", - "style": "IPY_MODEL_60104c359482485eaa44f621628fb667", - "value": "Box(3,)" - } - }, - "4a88a99c974d47da993c8bde3faab362": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "justify_content": "center" - } - }, - "4ab1ce52edf54c879f2ee002e94c98f1": { - "model_module": "@jupyter-widgets/output", - "model_module_version": "1.0.0", - "model_name": "OutputModel", - "state": { - "layout": "IPY_MODEL_29a207365d934cc4a402ed72a19194ca" - } - }, - "4b23820dcff647a6ad204c7c4a596248": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget001" - } - }, - "4b5dc49fbc1743c8abe6cded3f9ed703": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "border": "dotted", - "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"\n\"widget013 widget014\"", - "grid_template_columns": "repeat(2, 1fr)", - "grid_template_rows": "repeat(7, 1fr)" - } - }, - "4b9184b437ac441e8c485894889e7fd4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_8865f419c3a04323907d8e9d11f06c24", - "IPY_MODEL_c60dc42b295c47138b76205df9071217", - "IPY_MODEL_85165a2de0d64a2bb9baf9b64b3ffa38", - "IPY_MODEL_bffd75c7e90346ebb8214c6fe0ce2ab4" - ], - "layout": "IPY_MODEL_1cb1d8e98bef410e85502ad2edb46c45" - } - }, - "4bbe95c5e6b34795a2058cc7bf7416f9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_615934e58366458ea65a907cae98c64e", - "style": "IPY_MODEL_570c4f6867da492cafc6318dd145f87d", - "value": "Dense(n_units=64, relu, in_channels='3', name='hidden_layer1')" - } - }, - "4c528854314c4df18a84eafa4f1a7404": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "4cff6dcb31874722a4fcd9052bb1f9b6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "4d8d22e583c64179817ad9c514bd4490": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget004" - } - }, - "4e6414fcd34b454e94c982f7233402a7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "4ee9cbafcaad44de9f9e7453ee765047": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_f74c2a3b52114bbc80056d7097731209", - "IPY_MODEL_7fbbe1851a944d69a568c06875de2b0f" - ], - "layout": "IPY_MODEL_2a9fb576ef6145abaf95398bf620cd8d" - } - }, - "4fa0861e758940d9b9c2775304ebb140": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "50ce374ed2fc4f2ebc2c156c16ba4f38": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_5ee808e0128f4e85921b2855f4ff3831", - "style": "IPY_MODEL_6a001a1bb11844d0b85857486c544879", - "value": "mode" - } - }, - "510e33d521264ac387af97dbbb46dd39": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_content": "center", - "align_items": "center", - "border": "dotted" - } - }, - "516cc7132ca94faab3023ffcd1ed4cd4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_5af150388cac4ebc96775a3696923399", - "style": "IPY_MODEL_81621cd1e69f47a1868bf499caac5824", - "value": "Choose your environment" - } - }, - "520b2e1af36547edbae1352d82099fda": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget002" - } - }, - "5228a7a8160f421f846e2d7d06c9d159": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "532ea00fd94045298f69a3917ced39c7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget008", - "justify_content": "center" - } - }, - "53c0481b6b294cf888f2b3abdc33a95c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "543b543dd8bb4fcb9dc9f4a16ac4bd6e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget005" - } - }, - "5446746816dd4edf8dffb29995d15715": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_6406ec864c1848d88b92c9b5248a9c9e", - "style": "IPY_MODEL_891e2bdcc12d4314affa4fd372ed7ade", - "value": "0.0" - } - }, - "5469680f21e44e77b1092b8354d9aee0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "547d2113aae04e20ba41d30deb33ec5f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget014" - } - }, - "54927f9f2cde4416bf0e3b782fbd5118": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "5526ed8ea7b4499eadc0bbb165d7bbc4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_965b9a99694b4227a43121ae2e974290", - "IPY_MODEL_e57f860aafca4775a03574208f4944b7" - ], - "layout": "IPY_MODEL_510e33d521264ac387af97dbbb46dd39" - } - }, - "5532430429754176a10d6ab53ba4b6d9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "55790721852a4ac38f0bf04e1016c16a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_28ad6172b7f34ba9923847d24dd555b3", - "style": "IPY_MODEL_c35cf89d5b4c42c886c9c83fdc93c8e6", - "value": "Environment name:" - } - }, - "55abe6fb296b491ba2e2a09a492b5ae8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "570c4f6867da492cafc6318dd145f87d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "575f6d3a87c041e4a3005385d7ec75b4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_b20aaab10e6a49138d9cf0a414321c49", - "IPY_MODEL_c2aa94c81efc4f3f826adcb847fbdb89" - ], - "layout": "IPY_MODEL_8173f889450249d58f18acfe83d63ddd" - } - }, - "57f97e2ebec542f8b297365916bf571e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget011" - } - }, - "58201f662dc74741bcdeb0e7753843c4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "593926166a704759992244f9732d0f8d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "595aeae2634948268510587998ec9587": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_575f6d3a87c041e4a3005385d7ec75b4", - "IPY_MODEL_080346c4f0ae457182549d3c68aaaaea" - ], - "layout": "IPY_MODEL_b9743661bbd24d94969c463e1f77d6e8" - } - }, - "59da397a7faa43c79c633dd523b6f07b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "5ac9e6a121a3488ea93f85f5589429a0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "5adceaf568da4a1d88d6bf7b379965c2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_0e74af77352a4b40b0f9e5163d92a836", - "style": "IPY_MODEL_c7a9f23b553e43a78d5c0ced37526327", - "value": "beta_1" - } - }, - "5af150388cac4ebc96775a3696923399": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "5af1a3e17ac64264905701b109c013e2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_24f450d31f2d47a68aa2c58be28170fb", - "IPY_MODEL_a7d002d3e5454965af1d9cdb2e54e7ca", - "IPY_MODEL_3b0358464a32494ea410b866646b79b1", - "IPY_MODEL_b4047180a5aa44479c358d8c12f0c5d5", - "IPY_MODEL_9fd6a74ce4e54ae38816e55d19327281", - "IPY_MODEL_0eb34e6e2b07401dae9a2bfa4f1d49df", - "IPY_MODEL_5fc0273b28ca4f42b441948986c98e99", - "IPY_MODEL_bd7afa2132154beebd89e4320ebcad26", - "IPY_MODEL_d48e8464b37c4f0099d42e59369dbab6", - "IPY_MODEL_07b0e1377c414989a1d7ce1bf1da1c4e", - "IPY_MODEL_b04b868ce504489c82bd8818501b3ac3", - "IPY_MODEL_d1ba6fbf21674589b3f585f6e0f9638b", - "IPY_MODEL_c083a4b8f36848ed9f277f423ae18084", - "IPY_MODEL_8c168f5c8ecc4d0ba203b60193856d1c" - ], - "layout": "IPY_MODEL_2f93a27048a44beda22771c8249fba0d" - } - }, - "5afcc13ec3d94e6299bd06fb87ed7885": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget007" - } - }, - "5b759ba6fc8f451c97ee15467069a6ed": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "5b87473fb6cc473a89998a285388f4da": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "5bced3d11d4a41a4b3e1c712f83b98e4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DropdownModel", - "state": { - "_options_labels": [ - "default" - ], - "description": "state type:", - "index": 0, - "layout": "IPY_MODEL_f4d0297192f5464bac7ab02b3dabed2c", - "style": "IPY_MODEL_7fea48aa29c24b4b94784890589e01e4" - } - }, - "5caab83d7d4d4658ac739d02b56e9fd6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "5daa3bcd6829495cb223328230f0f8e4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "5ee808e0128f4e85921b2855f4ff3831": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget001" - } - }, - "5efb085669c2400a909ac37b5cb4e45e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget008", - "justify_content": "center" - } - }, - "5f1fda7eb4ac4ce694f721e312e205ab": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "5fc0273b28ca4f42b441948986c98e99": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_18ea002dd43344a5864f8a8651ceeaeb", - "style": "IPY_MODEL_e14f5611fa9242af879512207669394f", - "value": "beta_1" - } - }, - "60104c359482485eaa44f621628fb667": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "611b3bc2e8e749a38fe77bbdab064670": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_3c77984eb49f4b3fbf5b78b313af8071", - "style": "IPY_MODEL_b64d5e345cb5482595aa92662c8f162c", - "value": "epsilon" - } - }, - "615934e58366458ea65a907cae98c64e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "6187b72c80f64272a6c33c90cb582c4c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "center" - } - }, - "626ae439ee1f4ce4895764fb66f9c6d3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "629ece3b43ac4c8a8c2f83733a180978": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget005" - } - }, - "62a5e4f04f554e6580d63bb32f36b3be": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget012", - "justify_content": "center" - } - }, - "63c30e87411c45dd8d58dfa485850fc2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "63d55c74d6ed493abe58361958b23046": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "6406ec864c1848d88b92c9b5248a9c9e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget012" - } - }, - "64750206fa3a48119aa85e75f5ff2de8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_55790721852a4ac38f0bf04e1016c16a", - "IPY_MODEL_2ece943ff83c48e8b69e0b2396b6064c", - "IPY_MODEL_7a5d99612efa45acb82149814a4a7e82", - "IPY_MODEL_87b22017505c4d14a335692f09abd816", - "IPY_MODEL_8f5e2c19238240c38947f1a5d8e72792", - "IPY_MODEL_2da2537f2e444e16ad634693e684af58", - "IPY_MODEL_6e144126a66b48f9a22641284932ad73", - "IPY_MODEL_ef95b43fb5cd436cb6f737f2defc8e38" - ], - "layout": "IPY_MODEL_48d65f9009904854b076047201074a2c" - } - }, - "660e8c250f974ff685128c61b3d57fe3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "661fd55473c0431aa9dffd6876d1d559": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "66bc7fd58a2743a0960e9dd5df378998": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget010", - "justify_content": "center" - } - }, - "677e2010d7ce45eb9adc6f26a8977636": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_3556d6d1fe0c4e558b21b70b8c7b9395", - "style": "IPY_MODEL_0580852520e142a89d7b42c50bfef6a1", - "value": "learning_rate" - } - }, - "67a79ba4cbf84418967857e237a5a1be": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "681fa50d92ed4da0afda87805d2383ca": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_fb19638e8a38465f844aaf06c6378b29", - "style": "IPY_MODEL_47d275b36e704a74a22098c38f14f301", - "value": "Algorithm Parameters" - } - }, - "683e3afa65604f1b85604a79ec228a2b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "689e8f05af2f4f159239a896e7e9843a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "68d4eab6f1cf4e2fa0e229ecdce8d392": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget005" - } - }, - "68fcf5652dd14e5fad220fcbe777ddbb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "description_width": "" - } - }, - "691c17934ca3435eb36a2d84d15ecdf7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_3a96e3ae233940e18c75f004da9e0459", - "IPY_MODEL_8d18e0fa10b94372a3edf64edb4814bc", - "IPY_MODEL_677e2010d7ce45eb9adc6f26a8977636", - "IPY_MODEL_e224793bc1524f0c91ce3d7ef0e98f8e", - "IPY_MODEL_c34d5f3024f24951b4f478bca62dd7c7", - "IPY_MODEL_6bb0b7ee0cdf49ca97bb0c3b528131e8", - "IPY_MODEL_5adceaf568da4a1d88d6bf7b379965c2", - "IPY_MODEL_6c1a4850cad844f4bd144b78177e6d31", - "IPY_MODEL_c12ffb6b4533460bbdfc7404ff89d807", - "IPY_MODEL_e6c798aa900740009741c67dfccb0d92", - "IPY_MODEL_75b1aa83fa184214aecc8ea858858cd3", - "IPY_MODEL_e1f03c622ff64b3bb4e59fc54e7898a6", - "IPY_MODEL_611b3bc2e8e749a38fe77bbdab064670", - "IPY_MODEL_eb54eb7b3c674e67b10610ce2aaf309a" - ], - "layout": "IPY_MODEL_fb06877af7ae451baefc12dfd27d9348" - } - }, - "6923c73eeac747fdbe41b2062e257a58": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget007" - } - }, - "69268529fca5425e9f11506c968490e7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "698f9329e3754e7482dc32690ba58f4a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "6a001a1bb11844d0b85857486c544879": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "6ab9513a615a4551a596a3d2e637d181": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "6bb0b7ee0cdf49ca97bb0c3b528131e8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_2c0353597c114ba184977dac607510c3", - "style": "IPY_MODEL_82c3b758724944d0b02d17ecfdd05698", - "value": "False" - } - }, - "6c1a4850cad844f4bd144b78177e6d31": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_e255dc6e7af7487e8a2729f670bffd8a", - "style": "IPY_MODEL_012eeb7c3bab46d9baa05356cd4ff0f6", - "value": "0.9" - } - }, - "6c751fa2c2aa415ea57d3c9b0e11b22d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_254576dd293543d384c9e5620c3db225", - "style": "IPY_MODEL_304f4dcdb42b4bca91451ccfe7eba639", - "value": "max_steps" - } - }, - "6caef128e4df40ebb76ef90ad9a40d41": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_a496bd2aabab465fbcf0022dc1acd19f", - "IPY_MODEL_2982ccca674f4bfc839557e06cde9993" - ], - "layout": "IPY_MODEL_fbd450c8b01f4ab9ae7ea1caa129bd66" - } - }, - "6cb628f08ae2469db2ee42e38ca4de74": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "BoxModel", - "state": { - "children": [ - "IPY_MODEL_90d52d8b63c342f087384246a76680d7" - ], - "layout": "IPY_MODEL_759fddd650134c46bbbbd4b4c6f8c744" - } - }, - "6d5b0a5b26874cfd874c4a0bdf307eff": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_1537ab75a9dd4f429ffb3812c485116f", - "IPY_MODEL_a18265de326b4d399e760f9d2e5bb238" - ], - "layout": "IPY_MODEL_7208b8f21c77462dad67124eb0fd8164" - } - }, - "6d6739242111448eaf1e80a8962f1aac": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "6db9105409df4485909f169fc6e6d696": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget003" - } - }, - "6dc0399123f94dd1831a2b2cfb6c3078": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "6e144126a66b48f9a22641284932ad73": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_53c0481b6b294cf888f2b3abdc33a95c", - "style": "IPY_MODEL_a8e550f371f94677a29e238776be2cdb", - "value": "Tips:" - } - }, - "6efa143c4b9d43aa94ed8cfe56824583": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "6f0bd8ffadf44461a70b1031b3f65064": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "learning curve smooth factor", - "layout": "IPY_MODEL_145001c5826a41cd989997ea61244ca1", - "max": 1, - "step": 0.01, - "style": "IPY_MODEL_4080aa3475b94001b5324fd14d18816c", - "value": 0.8 - } - }, - "6f525160109d45299758550c08196bd9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "description_width": "" - } - }, - "70c300868924433094e74b74d260a4a2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "justify_content": "center" - } - }, - "715b10d741354c8db506fb8ba945a074": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_1e327c8e4b844c2fbb017a5544fa678e", - "style": "IPY_MODEL_6ab9513a615a4551a596a3d2e637d181", - "value": "0.0001" - } - }, - "7208b8f21c77462dad67124eb0fd8164": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "731d299fb9dd45c1a41a5d4df4f41f94": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "747e88ebfefc4efb95f60f63e725dcc1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "74d03d1491d4451d879384ab357f33a9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "center" - } - }, - "74dc8e60490943c8b9601232bf24f608": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "7532b84aea3a4f4290efa4b0369e846a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "759c11789beb46f798f3b48c4cf88577": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "759fddd650134c46bbbbd4b4c6f8c744": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "75b1aa83fa184214aecc8ea858858cd3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_7cdb0eb01b9b434ca4c08fd25f243f09", - "style": "IPY_MODEL_3cfd11894b514078901081bddd35c83d", - "value": "decay" - } - }, - "75c167ca66774581880b2500d5176a36": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "76c7ceb7a42e44048e694b71f27f56eb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "76d1b335a0134c19852090005ae135c4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_e8260cb1f55049a49bdaf024528d43c4", - "style": "IPY_MODEL_def02ee29d9a44b19a1fd20f8a4be1a0", - "value": "name" - } - }, - "76dec90334724f3ba9e51ba05856ff79": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "776cdbcecc004924a856eb45ec0a5699": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_98eeb6cc7ac643ac882d54fab647de04", - "style": "IPY_MODEL_a02320673c484c46848d7aeb6fda6e18", - "value": "1e-07" - } - }, - "78f5897896d144fe839fafd65e76816e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "79611f87c64c431794f17eccbbd60f38": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget012" - } - }, - "79953b3e59c048548c96bb197d46a7ea": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7a4be7c4229640b18c29d60d30cc0e70": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "7a5d99612efa45acb82149814a4a7e82": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_7e40917d81264ee9986d07bae8291022", - "style": "IPY_MODEL_1e6d0c80ceaa4e58846e9f554371b363", - "value": "Observation space:" - } - }, - "7a6c0819e1344119aae9ef136830ad44": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "7a7ebee6dcf34f36b1d55d2cb443e387": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "7a807eea55d14bae96d792b1e475adcb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7aa2babe24dc4fab84bfbd511f0b5e98": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7aba7921241e41af9a32cbe042699485": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7af9623e94c64555b01efa581f338e60": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_841b7f5d915e4f639784140b23610d75", - "IPY_MODEL_e904337542fd4e5d8187b9b9190b7522" - ], - "layout": "IPY_MODEL_532ea00fd94045298f69a3917ced39c7" - } - }, - "7b48f1fae96e40519787018ed628b99b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7cc3bf6293494425b70569d1eca3af03": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7cdb0eb01b9b434ca4c08fd25f243f09": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget011" - } - }, - "7d163d682d5744d6ac7be041fb66c158": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "7d64c7c8f2dc4d4eb6218e55ae44bfbe": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7d70e416e925499f93e5837aabc6afc2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "7dc1333733194435934e6ca098ede1ad": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7df23ef826fb4c568071b0667bafcd3b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_6db9105409df4485909f169fc6e6d696", - "style": "IPY_MODEL_84111028e0ea4937a6fea8f96b279bec", - "value": "model save path" - } - }, - "7e128d275e3c4e88829167514cec3bc6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "7e40917d81264ee9986d07bae8291022": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "7f3f44cbaac94755810c0e589d048490": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7f9233b831cc448a97a909e398122bb9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "7f94bb571172453a920e7bd6d7a9050f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_58201f662dc74741bcdeb0e7753843c4", - "max": 600, - "min": -400, - "readout_format": ".0f", - "style": "IPY_MODEL_b5dd447dec9c48bc8b1bb664c9553912", - "value": 100 - } - }, - "7fbbe1851a944d69a568c06875de2b0f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_12e50eba7f3e4e9f888416f46172b60f", - "step": null, - "style": "IPY_MODEL_18a7121ba72e42af9a496a39fb8c6f6a", - "value": 100 - } - }, - "7fea48aa29c24b4b94784890589e01e4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "7ff9e3e9f09b40d398b6c898e5ee9653": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "808fb0e5d6b940388d588196c927564d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "80d9bf94c37c49708820ccb5a2aa8f8b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "814eef7fa97a4fa2b4c5f1ed1b3728f3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_2e8b3025623248e2a92daa5a7750997f", - "style": "IPY_MODEL_bb04f52581bb496e9a6931ce291714c9", - "value": "Dense(n_units=64, relu, in_channels='64', name='hidden_layer2')" - } - }, - "81621cd1e69f47a1868bf499caac5824": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "8173f889450249d58f18acfe83d63ddd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "8178676fb5e441ec92464938695643a8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "81a50427a5384feeaaee374a19ad5931": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"", - "grid_template_columns": "repeat(2, 1fr)", - "grid_template_rows": "repeat(6, 1fr)" - } - }, - "81d1f55272ef4977b06be173bdd59b8c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "81f34a95028440608c8a5a307cd7ee9b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_content": "center", - "align_items": "center", - "border": "dotted" - } - }, - "82c3b758724944d0b02d17ecfdd05698": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "831ed45407f74193acc07dacada162a9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "835ef9a1125846679a65d679afb62013": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "8387714984af4e9cbaf16cbff2a45cbb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget001" - } - }, - "83c18b3b4c374f70947e47230ffe4f82": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_be4d4fbbc53d4705963f9b343aff399f", - "style": "IPY_MODEL_8efed772f09f4ea1a1dabf91598fd49a", - "value": "Optimizer information:" - } - }, - "84111028e0ea4937a6fea8f96b279bec": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "841b7f5d915e4f639784140b23610d75": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_0b081708649d446ab37f522f5a019e19", - "readout_format": ".0f", - "style": "IPY_MODEL_12a0f20f2ecd423889594f36b15647f1", - "value": 50 - } - }, - "842ea79123034275adec1df392a4846d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget009" - } - }, - "84f7291061b34bfaaaec0711bd0cca56": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_ae877e1e2a554a19b78fb9a12f60e5d3", - "style": "IPY_MODEL_1f0e424278554da08fbb15138e571a62", - "value": "The action space is continuous." - } - }, - "85165a2de0d64a2bb9baf9b64b3ffa38": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_383cf0cb101341d4bdfb65604a24a4d5", - "style": "IPY_MODEL_23424247d797485dba0788eb6b7614aa", - "value": "model save path" - } - }, - "85514e8a938240e7b2df7c2a8ad6b6e8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "85d35dbed0594a3a837f536309af0b59": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_1db128fafd984258b040b5295b477f0d", - "max": 74, - "min": -26, - "readout_format": ".0f", - "style": "IPY_MODEL_066c122ea5f64991b7347279a79e8061", - "value": 24 - } - }, - "86e357397076415ba3ac239b26a8bc8f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget006" - } - }, - "8784dbc322c7455aaef2b352bae2f205": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "87b22017505c4d14a335692f09abd816": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "BoxModel", - "state": { - "children": [ - "IPY_MODEL_4a2a0ec5e8f641f489d58e31f3f5fcef" - ], - "layout": "IPY_MODEL_1f37fdacb85646a1b5ff9a2b1d6ab38a" - } - }, - "885608d7df064c51ac0523ef9928e6b6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_689e8f05af2f4f159239a896e7e9843a", - "style": "IPY_MODEL_b85dbc19731e4b84bb6122ea52367809", - "value": "Action space:" - } - }, - "8865f419c3a04323907d8e9d11f06c24": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_8387714984af4e9cbaf16cbff2a45cbb", - "style": "IPY_MODEL_5daa3bcd6829495cb223328230f0f8e4", - "value": "gamma" - } - }, - "886c73a1052a4a2da9ec06c958855a51": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "88aafdf648784ac7954ce933431f9a3a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_8d80128792d44bf1a0467b7e86df0b54", - "IPY_MODEL_d91d58d65e864faa90c9cc7bfd2959b0" - ], - "layout": "IPY_MODEL_8ff956034aa047d0a8809922cbefa856" - } - }, - "88b977df9d82476298ff3c70d714afe0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "learning curve smooth factor", - "layout": "IPY_MODEL_7f9233b831cc448a97a909e398122bb9", - "max": 1, - "step": 0.01, - "style": "IPY_MODEL_35525c0fbffa497eb43f7d5bd081bb0b", - "value": 0.8 - } - }, - "88fc41c33c024f4eb22b13e0ea98e605": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget003" - } - }, - "891909eab8204a4bb78c9a468bc20112": { - "model_module": "@jupyter-widgets/output", - "model_module_version": "1.0.0", - "model_name": "OutputModel", - "state": { - "layout": "IPY_MODEL_e1f175e02edf40f39585c485ec11cbff", - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD8CAYAAACCRVh7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VOXZ//HPlX0hJBACgbBDAiJCkMiiwqOCilsRKluR2tY+tFar1tpq26e/9nm6qK11a91otaUtKopasKi4F0VLDcgShEzCmgAZEoQwScg61++PDBgwIcssJ8v1fr3mxcx9zpm5MmK+nHPf575FVTHGGGMaE+Z0AcYYY9ovCwljjDFNspAwxhjTJAsJY4wxTbKQMMYY0yQLCWOMMU2ykDDGGNMkCwljjDFNspAwxhjTpAinC/BXr169dPDgwU6XYYwxHcqGDRtKVDWluf06fEgMHjyY7Oxsp8swxpgORUT2tmQ/u9xkjDGmSRYSxhhjmmQhYYwxpkkWEsYYY5pkIWGMMaZJFhLGGGOaZCFhjDGmSRYSxhjTCq9tPciu4jKnywgZCwljjGmhQ55Kblq2kase+YDlH+9DVZ0uKegsJIwxpoVcRfVnEH26R3PXi1v57rOfcKyyxuGqgstCwhhjWijX7QHg+W9N5geXj+C1nCKufPh9Nu474nBlwWMhYYwxLeQq8tAzPoqUhGhuvng4z39rMqow54mPePTdfLzeznf5yULCGGNaKNftIaNPN0QEgPGDevDqbVOYMTqV367JZdHT63Efq3S4ysCykDDGmBZQVfLcHkb0STilPTE2kj8sGMd9Xz6HDXuPcMXD7/PODrdDVQaehYQxxrTA/qPHKa+uIyM14QvbRIR55w3kn9+9kN4J0XzjL9n83yufUlVb50ClgeVXSIjIHBHZJiJeEclq0L5QRDY1eHhFJNO37VciUiAiZae9V7SILBeRfBFZLyKD/anNGGMCyeXrtM7o88WQOGF47wT+cfMFfO38wTy9bjezHv2QnR38ngp/zyRygNnA2oaNqrpMVTNVNRNYBOxW1U2+za8AExp5rxuBI6o6HHgQuM/P2owxJmByfcNfM3o3HRIAMZHh/PxLZ/Onr2ZxsPQ41/z+A17ILuiw91T4FRKqul1Vc5vZbQHwXINj/q2qBxvZbyaw1Pd8BTBNTvQOGWOMw1xuD6ndY0iMi2zR/tNH9eG126Yypn8iP1ixhdue24SnA95TEYo+iXnAsy3YLw0oAFDVWqAUSG5sRxFZLCLZIpJdXFwcsEKNMaYpLren0f6IM0lNjGHZNyfx/UszWL31IFc+8j6bCo4GqcLgaDYkROQtEclp5DGzBcdOBCpUNScg1fqo6hJVzVLVrJSUZtfxNsYYv9R5lbxDZYzo063Vx4aHCd+dls7yxZPweuG6xz/kiX/t7DD3VEQ0t4OqTvfj/efTsrMIgP3AAKBQRCKAROCwH59tjDEBsfdwOdW1XtLP0GndnKzBPXn11inc/dIW7n1tBx/klfDAvLH0TogJYKWBF7TLTSISBsylQX9EM1YBN/ieXwe8ox21p8cY06mcGNl0+j0SrZUYF8ljC8/lntnnkL33M6546H3ezT0UiBKDxt8hsLNEpBCYDKwWkTUNNk8FClR112nH/MZ3TJyIFIrIz32bngKSRSQfuAO425/ajDEmUE6MbEpvw+Wm04kICyYM5JVbLqRXt2i+/ueP+eU/P6W61uv3eweDdPR/rGdlZWl2drbTZRhjOrGbn9nI1sJS1v7w4oC+b2VNHb9avZ2//Xsv56Ql8siCcQzpFR/Qz2iKiGxQ1azm9rM7ro0xphmuIs8Zb6Jrq5jIcH5x7WieXDSefZ9VcPUj7/PSxsKAf44/LCSMMeYMqmrr2F1STkYALjU15fKzU3nttimc3S+RO57fzPeWb6KsqjZon9caFhLGGHMGu0vKqfUqI1p5j0Rr9UuK5dnFk/je9AxWbtrPVY+8z5ZC5++psJAwxpgzcLl903EE4XLT6cLDhNump/Pc4snU1HqZ/diHLFnr7D0VFhLGGHMGriIP4WHC0JTQdCgDTBjSk1dvm8K0s3rz61d38LW/fEyxpypkn9+QhYQxxpxBrtvDkF7xREeEh/Rzk+KieOL68fzy2tGs33WYKx5ey1pX6KchspAwxpgzcPlWo3OCiHD9pEGsuuVCesZH8dWn/8M9r24P6T0VFhLGGNOE49V17PusIiT9EWcyIjWBlTdfyFcmDuTJtbuY88SH7D1cHpLPtpAwxpgm5B8qQ9X/6TgCITYqnF/POofHF57L7pJyrnrkA9bllwT9c5ud4M8YY7qq3BOr0QV5+GtrXHFOX8YMSOL/XtkWkjMcCwljjGmCy+0hKiKMQT3jnC7lFGlJsTy5qNkZNQLCLjcZY0wTcos8DEvpRkR41/1V2XV/cmOMaYbL7WnTQkOdiYWEMcY04lhlDQdLK9tVf4QTLCSMMaYReQFaaKijs5AwxphGnFhoyOl7JJxmIWGMMY1wuT3ERYWTlhTrdCmO8nf50jkisk1EvCKS1aB9oYhsavDwikimiMSJyGoR2eE77t4Gx0SLyHIRyReR9SIy2J/ajDHGH7lFHtL7JBAWJk6X4ih/zyRygNnA2oaNqrpMVTNVNRNYBOxW1U2+zfer6khgHHCBiFzha78ROKKqw4EHgfv8rM0YY9os75CNbAI/Q0JVt6tqbjO7LQCe8+1foarv+p5XAxuB/r79ZgJLfc9XANNEpGtHuDHGESVlVZSUVXf5/ggITZ/EPODZ0xtFJAm4Bnjb15QGFACoai1QCiSHoD5jjDmF68TIpi4+/BVaMC2HiLwFpDay6SequrKZYycCFaqac1p7BPXB8Yiq7mpFvSeOXwwsBhg4cGBrDzfGmDNyFfnmbLIzieZDQlWn+/H+82nkLAJYAuSp6kMN2vYDA4BCX4gkAoebqGmJ7z3Iyspybl0/Y0ynlOsuIzE2kt4J0U6X4rigXW4SkTBgLr7+iAbtv6Q+AG4/7ZBVwA2+59cB76iqBYAxJuTy3B5G9EnAukX9HwI7S0QKgcnAahFZ02DzVKCg4eUkEekP/AQYBWz0DY/9pm/zU0CyiOQDdwB3+1ObMca0haqS6/aQkWojm8DPqcJV9WXg5Sa2vQdMOq2tEGg0mlW1EpjjTz3GGOOvomOVeCpru/x0HCfYHdfGGNNArq/TOt1CArCQMMaYU5wY/mojm+pZSBhjTAMudxkpCdH0jI9yupR2wULCGGMacPlGNpl6FhLGGOPj9Sout8cuNTVgIWGMMT4FRyqorPGSYRP7nWQhYYwxPidGNnX1JUsbspAwxhifvEP1q9Gl97YziRMsJIwxxie3yENaUiwJMZFOl9JuWEgYY4yPy+2x6cFPYyFhjDFATZ2XncVlNrLpNBYSxhgD7Ckpp6ZObWTTaSwkjDGG+jutwabjOJ2FhDHGALluD2ECw21k0yksJIwxhvolSwcnxxMTGe50Ke2KhYQxxoBNx9EECwljTJdXWVPHnsPl1mndCAsJY0yXt7O4DK/adByN8XeN6zkisk1EvCKS1aB9oW/96hMPr4hk+ra9LiKbfcc9ISLhvvaeIvKmiOT5/uzh349mjDEtc2KhIZsi/Iv8PZPIAWYDaxs2quoyVc1U1UxgEbBbVTf5Ns9V1bHAaCCFz9e1vht4W1XTgbd9r40xJuhyi8qIDBcG94p3upR2x6+QUNXtqprbzG4LgOcaHHPM9zQCiALU93omsNT3fClwrT+1GWNMS7ncHoaldCMy3K7Any4U38g84NmGDSKyBjgEeIAVvuY+qnrQ97wI6BOC2owxhtwiD+l2qalREc3tICJvAamNbPqJqq5s5tiJQIWq5jRsV9XLRSQGWAZcArx52nYVEaUJIrIYWAwwcODA5n6ERv1zywG2FJaSlhRL/x6xpPWItdkfjemCyqpq2X/0OAsmDHC6lHap2ZBQ1el+vP98TjuLaPC+lSKykvrLTG8CbhHpq6oHRaQv9WcaTdW0BFgCkJWV1WSYnMnWwlL+8uEeqmu9p7R3j4mgf4+4k6HRv4cvRJLq23rERSIibflIY0w7lOfrtLZ7JBrXbEi0lYiEAXOBKQ3augEJviCIAK4C3vdtXgXcANzr+/OMZyn++tGVZ3HXjJGUlFex/8hx9h89TuGR4yef7z1czof5JZRX151yXFxUOGlJ9WceDcMjLSmWAT1i6dUtmrAwCxFjOoqTI5ts+Guj/AoJEZkF/J76UUqrRWSTql7u2zwVKFDVXQ0OiQdWiUg09f0h7wJP+LbdCzwvIjcCe6kPmKAKCxN6J8TQOyGGcQO/OOJWVSk9XkPhEV+AHK0PkcIjFew/epxNBUc5WlFzyjFR4WH0S4qpPxtJij3ljCStRyyp3WOIsM4xY9qN3KIyYiLDGNAjzulS2iW/QkJVXwZebmLbe8Ck09rcwHlN7H8YmOZPPYEmIiTFRZEUF8XotMRG9ymrqvWdfVT4AuQ4hb4weXvHIUrKqk7ZPzxMSO0eU38m4guRoSnxjOqbyLCUeAsQY0LM5faQ3jvBrgA0IWiXm7qKbtERjEhNaPJUtbKmjgNHTz0Tqb+0VcG/dx2m6FglXl+vSlREGCNTExjVtztn9+vOqH7dGZnanfho+89kTLC43B6mpKc4XUa7Zb99giwmMpyhKd0YmtL4nDA1dV52l5Sz7UApnx44xqcHj/H6tiKe+7gAABEYkhzPWf18wdG3Pjx6J8SE8scwplM6Ul7NIU8VI1JtzqamWEg4LDI8jIw+CWT0SWDWuPo2VeVgaSXbDhzzBUcpmwuOsnrLwZPHpSREn3LGMapvdwYnx9spszGt4LKRTc2ykGiHRIR+SbH0S4rl0lGf31NYerzm5NnGpweOse1AKevyS6j1Xa+KiwrnrL6nnnFk9Emw+fGNaYKNbGqehUQHkhgbyeRhyUwelnyyraq2jjx32cnw2HaglJc27uevVXuB+o7y4SndTjnjGNWvO0lxUU79GMa0Gy53GQnREaR2t8u3TbGQ6OCiI8IZnZZ4yugrr1cpOFLR4HLVMdbtLOGlT/af3CctKfaU0Di7X3f62xBA08Xkuj1kpCbYDbJnYCHRCYWFCYOS4xmUHM+V5/Q92V5SVvWFy1VvbXejvtFVt09P5/bpGQ5VbUxoqSout4crRvdtfucuzEKiC+nVLZqpGSlMzfh8uF9FdS07ijw89m4+j723k3nnDaBvYqyDVRoTGsWeKo5W1DDCVqM7I7tzq4uLi4rg3IE9+Nk1Z6OqPPJ2ntMlGRMSuSdGNlmn9RlZSBgABvSMY+HEQTyfXciu4jKnyzEm6Fzu+r/nNvz1zCwkzEk3Xzyc6IgwHnjT5XQpxgSdq8hDcnwUvbpFO11Ku2YhYU5KSYjmGxcM4Z9bDpKzv9TpcowJqly3x84iWsBCwpziv6cOJTE2kvvfaG5VWmM6Lq9XyXN77Ca6FrCQMKdIjI3kpouG8V5uMet3HXa6HGOCYv/R45RX19mZRAtYSJgvuGHyYHonRPObNbmotmnhP2PatbxDJ+ZssuGvzbGQMF8QGxXOrdPS2bD3CO/saHIVWWM6rNyi+pFN6XYm0SwLCdOoeecNYFByHL9dk4vXa2cTpnNxuT30TYwhMTbS6VLaPQsJ06jI8DDuuDSDHUUeXtlywOlyjAmo3CIb2dRSfoWEiMwRkW0i4hWRrAbtC0VkU4OHV0QyTzt2lYjkNHjdU0TeFJE8359fXHTahNQ1Y/oxMjWBB950UVPndbocYwKizqvkF5fZyKYW8vdMIgeYDaxt2Kiqy1Q1U1UzgUXAblXddGK7iMwGTr+t927gbVVNB972vTYOCgsTfnD5CPYermC5b6U8Yzq6vYfLqa71kt7bOq1bwq+QUNXtqtrcgPoFwHMnXohIN+AO4Jen7TcTWOp7vhS41p/aTGBcMrI34wf14JG38zheXed0Ocb4zRYaap1Q9EnMA55t8PoXwO+AitP266OqJ9bnLAL60AQRWSwi2SKSXVxcHNBizalEhLtmjOSQp4qlH+1xuhxj/JZbVIYIDLcziRZpNiRE5C0RyWnkMbMFx04EKlQ1x/c6Eximqi+f6TitH5zf5JAaVV2iqlmqmpWSktLUbiZAJgzpyUUjUnj8vZ2UHq9xuhxj/OJyexjYM464KFspoSWaDQlVna6qoxt5rGzB+8/n1LOIyUCWiOwBPgAyROQ93za3iPQF8P1pA/TbkTsvG0Hp8Rr+uHaX06UY4xebs6l1gna5SUTCgLk06I9Q1cdVtZ+qDgYuBFyqepFv8yrgBt/zG4CWhJAJkdFpiVw9pi9Pr9tNsafK6XKMaZOq2jr2lJTbndat4O8Q2FkiUkj9GcJqEVnTYPNUoEBVW/pPz3uBS0UkD5jue23ake9fNoKqWi+PvpvvdCnGtMnuknJqvWpnEq3g10U5X99Co/0LqvoeMOkMx+4BRjd4fRiY5k89JriG9IpnblZ/lq3fy40XDmFAzzinSzKmVXKLbGRTa9kd16ZVbp2Wjojw0Fudd5nTvYfL+dnKHI6UVztdigkwl9tDRJgwtJddbmopCwnTKn0TY7lh8iBe+qTw5HjzzqS8qpZvLs1m6Ud7+fHLW20W3E4mt6iMIb3iiYqwX30tZd+UabWbLhpOfFQE96/pXAsTqSo/XLGFncVlXDWmL6/lFPHyJ/udLssEUN4hG9nUWhYSptV6xkfx31OG8sanbj7Zd8TpcgJmydpdrN56kLtmjOSR+ePIGtSDn63cRuGR0+/7NB1RRXUt+z6rsJBoJQsJ0yY3ThlCcnwUv+0kZxPr8ku47/UdXHVOXxZPHUp4mPDA3Ey8qtz5wmabLr0TyD9UhiqMSLX+iNawkDBt0i06gu9cPJwPdx7mg7wSp8vxS+GRCm55ZiPDUrrxm+vGICIADEyO42fXnM2/d33G0+t2O1yl8deJkU12JtE6FhKmzRZOHEhaUiy/XbOjw3bwVtbU8e2/b6C2Tnly0Xjio08dFT4nqz+XjurDb17PPflLxnRMLreHqIgwBiXHO11Kh2IhYdosJjKc26ans7mwlDXbipwup9VUlf/5Rw45+4/x4LxMhqZ88TKEiHDP7HPoHhvB7cs3UVVrM+F2VC53GcNTuhEeJk6X0qFYSBi/zB6XxrCUeO5/w0VdB7tu//f1+1ixoZBbp6UzfVSTkw7Tq1s0984ew/aDx3jwzc57f0hn53J77Ca6NrCQMH6JCA/jzstGkH+ojJc2FjpdTott2PsZ//fKNi4ekcLt09Kb3X/6qD4smDCAJ9fu5D+7PwtBhSaQSo/XcLC00voj2sBCwvhtxuhUxvRP5KG38jrE5ZhDxyr59t830i8plofmjSOshZcf/ueqUQzoEccdz2/CU2lTpnckeScXGrKRTa1lIWH8JlK/zOn+o8d5Zv0+p8s5o+paL99ZtpGyylqeXDSexLjIFh8bHx3Bg/PGcuDocf7vlU+DWKUJtFy3jWxqKwsJExAXDu/F5KHJ/OGdfMqrap0up0m/XP0p2XuPcN91YxiZ2r3Vx48f1JObLhrGCxsKO2RnfVeV5y4jPiqctKRYp0vpcCwkTECICD+YMYLD5dU8/UH7vKdgxYZC/vrRXv57yhC+NLZfm9/ntmkZnN2vOz96aautrdFB5BZ5SO+TcPIeGNNyFhImYM4d2INLR/Vhydpd7W4G1Zz9pfzk5a1MHprMXTNG+vVeURFhPDQvk7KqWu5+cUuHvUekK3G5PYywS01tYiFhAurOy0ZQVl3L4//a6XQpJ31WXs23/raB5Pgo/vCVcUSE+//XPr1PAnfPGMnbOw7x3McFAajSBEtJWRWHy6vJsOGvbWIhYQJqRGoCszLTWPrhHopKK50uh9o6L7c++wnFZVU8fv14krtFB+y9v3b+YC4Ynswv/vkpe0rKA/a+JrBcJxYasjOJNvF3+dI5IrJNRLwiktWgfaGIbGrw8IpIpm/beyKS22Bbb197tIgsF5F8EVkvIoP9qc0453uXZuBV5eG3nb/x7P43XHyQX8IvZ45m7ICkgL53WJhw/5yxRIQJdzy/ido6b0Df3wSG6+TIJhv+2hb+nknkALOBtQ0bVXWZqmaqaiawCNitqpsa7LLwxHZVPeRruxE4oqrDgQeB+/yszThkQM84FkwYyPPZBex28F/Yr249yBP/2slXJg5k7nkDgvIZfRNj+cW1o9m47yhPtKNLbOZzue4ykuIiSUkI3FlkV+JXSKjqdlVtbq7oBcBzLXi7mcBS3/MVwDSxoQgd1i2XDCcqPIwH3nQ58vl5bg93vrCZcQOT+Nk1o4L6WTMz07hmbD8eeiuPrYWlQf0s03oud/1CQ/brpG1C0ScxD3j2tLY/+y41/bRBEKQBBQCqWguUAskhqM8EQe+EGL5x4WBe2XyAbQdC+4vzWGUNi/+2gbioCB5fOJ7oiPCgf+YvZp5Ncrcobl/+CZU17f+u865CVXEV2cgmfzQbEiLylojkNPKY2YJjJwIVqprToHmhqp4DTPE9FrW2aBFZLCLZIpJdXFzc2sNNiCyeOozE2MiQLnPq9Sp3LN9MwWcVPLbwXFITY0LyuUlxUdw/Zyw7i8u57/UdIflM07yDpZV4qmptZJMfmg0JVZ2uqqMbeaxswfvP57SzCFXd7/vTAzwDTPBt2g8MABCRCCARONxETUtUNUtVs1JSUlpQhnFCYmwk3/6vYbybW8zHe0IzKd4f3s3nre1ufnLVWUwY0jMkn3nClPQUvnb+YP68bk+HX4ipszjZad3bOq3bKmiXm0QkDJhLg/4IEYkQkV6+55HA1dR3fgOsAm7wPb8OeEftLqUO72vnD6Z3QjS/eT34CxO9u+MQD77lYta4NL52/uCgflZT7poxkmEp8dz5wmZKK2wSQKe5bM4mv/k7BHaWiBQCk4HVIrKmweapQIGq7mrQFg2sEZEtwCbqzx7+6Nv2FJAsIvnAHcDd/tRm2ofYqHC+Oy2dj/cc4b3c4F0a3FNSzm3PfcJZqd359axzHOukjI0K58F5mZSUVfHTlTnNH2CCKreojN4J0fSIj3K6lA7L39FNL6tqf1WNVtU+qnp5g23vqeqk0/YvV9XxqjpGVc9W1dtUtc63rVJV56jqcFWdcFq4mA5sXtYABvaM4zdrcvEGYWGiiupavv33DYgITy4aT2xU8Duqz2RM/yRum5bOqs0HWLlpv6O1dHW20JD/7I5rE3RREWHccWkG2w8e459bDwb0vVWVu17cSq7bwyMLxjGgZ1xA37+tbrpoGOMGJvHTf+RwsPS40+V0SV6vknfIY5ea/GQhYULiS2P7MTI1gQfeyKUmgHcmP/XBbl7ZfIA7LxvBf2W0n0EMEeFhPDg3k5o65QcvbAnKGZQ5s4IjFVTWeO1Oaz9ZSJiQCAsT7rxsBHsOV/BCdmCWOf1wZwn3vLaDy8/uw3cuGhaQ9wykwb3i+enVo/ggv4S/frTH6XK6nNwi67QOBAsJEzLTzurNuQOTePhtl983nB04epzvPvMJg5PjuH/O2HZ7N+2CCQO4ZGRv7nltB/mHPE6X06WcGNmUbiHhFwsJEzIiwg9njMR9rIqlH+5p8/tU1tRx0983UFXr5clFWSTEtHwJ0lATEe798jnERYVz+/JNVNfaJIChkusuo3+PWLpFRzhdSodmIWFCatLQZKZmpPD4v3ZyrLJt9xH8fNU2NheW8ru5YxneAW6S6p0Qwz2zzyFn/zF+/47zM+N2FXm20FBAWEiYkPvh5SM4WlHDH9e2fpTzM+v38dzHBdxy8XAuPzs1CNUFx4zRfblufH8efTefDXuPOF1Op1dT52VncZldagoACwkTcqPTErlqTF+e+mB3q9aI3rjvCD9blcPUjBS+d2lGECsMjp9dM4q+ibHc8fwmyqtqnS6nU9tTUk5NnTIitf2fabZ3FhLGEd+/NIOqWi+Pvpvfov2LPVV85+8bSU2M4ZH5mYSHtc+O6jNJiInkgblj2fdZBb9cvd3pcjq1XJuOI2AsJIwjhqZ0Y874/jyzfh+FRyrOuG9NnZebn9nI0ePVPHl9FklxHXeKhYlDk1k8dSjP/mcfb293O11Op+Uq8hAmMCzFziT8ZSFhHHPb9HQQeOitM3fm/vrV7fxn92fcO3sMo/p1D1F1wXPHpRmMTE3grhe3cris5ZfbTMu53GUM7hVPTKSzU7R0BhYSxjF9E2P56qRBvLSxkDx34/cQ/OOT/fx53R6+fsFgrh2XFuIKgyM6IpyH5mdy7HgNP3ppa9Bnx+2KXG4PGb3tUlMgWEgYR33n4uHERUXwuze+uMzptgOl3P3SFiYM6cmPrzzLgeqCZ2Rqd+68PIM3PnWzYkNg7kA39Spr6thzuNwWGgoQCwnjqJ7xUXxzyhBe31bE5oKjJ9uPVlTz7b9vICk2ike/ci6R4Z3vr+qNFw5l4pCe/O8rn1Lw2Zn7ZUzL5R8qw6vYPRIB0vn+zzMdzjenDKVnfBS/9S1zWudVbn1uE+7SKh6//lxSEqIdrjA4wsOE380dC8D3n99MnU0CGBAnpuOw4a+BYSFhHNctOoLvXDSMD/JL+DC/hAffdLHWVczPv3Q24wb2cLq8oOrfI47//dLZ/GfPZ/zxfVtCJRBc7jKiwsMYlBzvdCmdgoWEaReunzSIfokx3PnCZv7wbj7zzxvAVyYOdLqskJh9bhpXjE7ld2/k8umBY06X0+G53B6GpsR3ykuUTrBv0bQLMZHh3DY9nQOllYztn8jPv3S20yWFjIjwq1nnkBQXxfeWb/J7htyuLrfIFhoKJH/XuJ4jIttExCsiWQ3aF4rIpgYPr4hk+rZFicgSEXGJyA4R+bKvPVpElotIvoisF5HB/tRmOp4vn9ufX80azR9vyOpy49t7xkfxm+vGkOv28MCbXxzpZVrGU1nD/qPHbcnSAPL3TCIHmA2sbdioqstUNVNVM4FFwG5V3eTb/BPgkKpmAKOAf/nabwSOqOpw4EHgPj9rMx1MRHgYCycOondCjNOlOOLiEb25ftJA/vj+Lj7aedjpcjqkvENlgE3HEUh+hYSqblfV3GZ2WwA81+D1N4B7fMd7VbXE1z4TWOp7vgKYJu11JRljguTHV57F4OR47nydNTO5AAAPXklEQVRhc5unUu/KTtyUacNfAycUfRLzgGcBRCTJ1/YLEdkoIi+ISB9fWxpQAKCqtUApkByC+oxpN+KiInhg7liKjlXy81XbnC6nw8ktKiM2Mpz+PWKdLqXTaDYkROQtEclp5DGzBcdOBCpUNcfXFAH0Bz5U1XOBj4D7W1u0iCwWkWwRyS4uLm7t4ca0a+MG9uDmi4fz0sb9vLr1oNPldCgut4f0Pt0I64CzBLdXza7rp6rT/Xj/+fjOInwOAxXAS77XL1DfFwGwHxgAFIpIBJDo27+xmpYASwCysrLsDiTT6Xz3kuG8l3uIu1ZsQYArzunrdEkdQq7bw39lpDhdRqcStMtNIhIGzKVBf4TWz2T2CnCRr2ka8Knv+SrgBt/z64B31GY+M11UZHgYjy08lyEp8dy0bCM/XLHZFipqxpHyaoo9VdYfEWD+DoGdJSKFwGRgtYisabB5KlCgqqffRnoX8HMR2UL9yKfv+9qfApJFJB+4A7jbn9qM6ej694jjxZvO5+aLh/HChkKufOR9PtlnS5825cR0HDaxX2A1e7npTFT1ZeDlJra9B0xqpH0v9QFyenslMMefeozpbCLDw/jB5SP5r4zefG/5Jq574iNuvSSdmy8eRoTdUXyKkyHRx+ZsCiT7W2ZMBzBhSE9evW0KV4/py4NvuZi35N/sO2wzxzaU6/aQEBNBaveueZ9NsFhIGNNBJMZG8vD8cTw8PxNXkYcrH3mfFzcU2qJFPq6iMkb0ScBurwosCwljOpiZmWm8dvsURvXtzvdf2Mwtz35CaUXXvvFOVcl1e6w/IggsJIzpgPr3iOPZxZP4weUjWJNTxIyH1/LhzpLmD+ykij1VlB6vsZFNQWAhYUwHFR4m9Tfdfed8YiLDWfin9dzz2naqa71OlxZyub5O63TrtA44CwljOrgx/ZNYfeuFzD9vIE/+axezHltH/iGP02WFVG6RzdkULBYSxnQCcVER3DP7HJYsGs+Bo8e5+vcf8LeP9nSZTm2X20OvblEkd+ucS906yULCmE7ksrNTWXP7VCYMSeanK7dx49JsSsqqnC4r6HLdZTY9eJBYSBjTyfTuHsNfvnYeP7tmFB/klzDjobW8u+OQ02UFjder5LttNbpgsZAwphMKCxO+fsEQXrnlQnp1i+brf/mY/7cyp1Mujbr/6HHKq+ssJILEQsKYTmxEagL/uPkCbrxwCH/9aC9X//4Dth0odbqsgDoxHceIVBvZFAwWEsZ0cjGR4fz06lH87cYJHDtew7WPruPJf+3E6+0cndqfD3+1M4lgsJAwpouYkp7CmtuncsnI3tzz2g6uf2o9B0uPO12W31xFHvolxtA9JtLpUjolCwljupAe8VE8cf147vvyOWwqOMqMh95n9ZaOvfqdy11m03EEkYWEMV2MiDDvvIGsvnUKg3vFc/MzG/n+85sp64CLGtXWeckvtuGvwWQhYUwXNaRXPCu+PZlbLxnOy58UcuXD77Nhb8da1GjvZxVU13otJILIQsKYLiwyPIw7LhvB8m9NxqvK3Cc/4sE3XdTWdYz5n1w2HUfQWUgYYzhvcP2iRjPH9uPht/OY8+RH7D1c7nRZzcp1exCB4b1t+Guw+LvG9RwR2SYiXhHJatC+UEQ2NXh4RSRTRBJOay8RkYd8x0SLyHIRyReR9SIy2L8fzRjTGt1jInlgXiaPLBhH/qEyrnz4fV7ILmjX8z/lucsY1DOO2Khwp0vptPw9k8gBZgNrGzaq6jJVzVTVTGARsFtVN6mq50S7b9te4CXfYTcCR1R1OPAgcJ+ftRlj2uBLY/vx+u1TGZ2WyA9WbOHmZzZytKLa6bIalev22P0RQeZXSKjqdlXNbWa3BcBzpzeKSAbQG3jf1zQTWOp7vgKYJrYOoTGOSEuK5Zn/nsRdM0byxjY3Vz3yAdsPHnO6rFNU1daxu6Tc+iOCLBR9EvOAZxtpnw8s18/PZdOAAgBVrQVKgeTG3lBEFotItohkFxcXB6FkY0x4mHDTRcN48abzqfV6+fLjH/Lmp26nyzppV3E5dV61eySCrNmQEJG3RCSnkcfMFhw7EahQ1ZxGNs+n8fBolqouUdUsVc1KSUlpy1sYY1po7IAkVt1yIcN7d2Px37J58l8720U/xck5m+xMIqgimttBVaf78f6NBoGIjAUiVHVDg+b9wACgUEQigETgsB+fbYwJkD7dY1i+eDJ3rtjMPa/tIO9QGb+aNZroCOc6jF1uDxFhwpBe8Y7V0BU0GxJtJSJhwFxgSiObF/DF8FgF3AB8BFwHvKPt4Z8rxhgAYqPC+cOCcQxP6cbDb+ex93A5T1w/3rHV4HKLyhjSK56oCBvJH0z+DoGdJSKFwGRgtYisabB5KlCgqrsaOXQuXwyJp4BkEckH7gDu9qc2Y0zgiQjfuzSD3y8Yx5bCUmY+uu7k+tKh5nJ7rD8iBPwd3fSyqvZX1WhV7aOqlzfY9p6qTmriuKGquuO0tkpVnaOqw1V1QhPhYoxpB64Z24/nvzWZ6lovsx9bxzs7QtuhXVFdy77PKqw/IgTsPM0Y0yZjBySx8pYLGNwrnhuXZvOn93eFrEM7z10GYHM2hYCFhDGmzfomxvLCtycz4+xUfrl6O3e/uJXq2uDP+/T5anQWEsFmIWGM8UtcVASPfuVcvnvJcJZnF3D9U+v5rDy4d2i73B6iI8IY2DMuqJ9jLCSMMQEQFiZ8/7IRPDw/k00FR7n20XXkuYPXoZ3rLmN4726Eh9mkDMFmIWGMCZiZmWksXzyJiuo6Zj/2Ie/lHgrK57iKPNZpHSIWEsaYgBo3sAcrb7mA/j3j+MZfPubP63YHtEO79HgNRccqbfhriFhIGGMCLi0plhXfnsz0s/rwv698yo9fzqEmQAsZ5dl0HCFlIWGMCYr46AieuH4837loGM/+Zx9ffeo/AZlyPNcXEul9bKGhULCQMMYETViY8MMZI3lg7lg27D3CtY+uI/9QmV/v6SryEB8VTlpSbICqNGdiIWGMCbrZ5/bn2cUT8VTWMuuxdbyf1/Yp/nN903HYcjOhYSFhjAmJ8YN6svKWC0hLiuVrf/6Yv360p03vk+cus/6IELKQMMaETP8ecay46XwuHpHC/1u5jZ/+o3Ud2iVlVRwur7bpOELIQsIYE1LdoiN4clEW35o6lL/9ey9f//PHlFbUtOhYl2/GWQuJ0LGQMMaEXHiY8KMrz+I3141h/e7DzHpsHbuKm+/QPjGyKSPVRjaFioWEMcYxc7MGsOybkzh6vIZrH13HuvySM+7vcnvoERdJikMLHXVFFhLGGEdNGNKTlTdfQGpiDF99+j8sW7+3yX1d7jIy+tjIplCykDDGOG5AzzhevOl8pqb34icv5/DzVduoPa1DW1Xr52yy6ThCyt/lS+eIyDYR8YpIVoP2hSKyqcHDKyKZvm0LRGSriGwRkddFpJevvaeIvCkieb4/e/j3oxljOpKEmEj+dMN5fPPCIfzlwz18/S8fU3r88w7tg6WVeKpqSbdO65Dy90wiB5gNrG3YqKrLVDVTVTOBRcBuVd0kIhHAw8DFqjoG2ALc4jvsbuBtVU0H3sbWuDamywkPE/7n6lHcO/scPtp5mNmPrWNPSTnweae13SMRWv6ucb1dVXOb2W0B8Jzvufge8VJ/UbE7cMC3bSaw1Pd8KXCtP7UZYzqu+RMG8rcbJ3K4vJprH1vHRzsPNxj+aiObQikUfRLzgGcBVLUGuAnYSn04jAKe8u3XR1UP+p4XAX1CUJsxpp2aPCyZlTdfQHJ8FIueWs9zHxfQp3s0SXFRTpfWpTQbEiLylojkNPKY2YJjJwIVqprjex1JfUiMA/pRf7npR6cfp/WTzzc5Ab2ILBaRbBHJLi5u+xwwxpj2bVByPC/ffAHnD+/F7pJyu4nOARHN7aCq0/14//n4ziJ8Mn3vuRNARJ7n874Ht4j0VdWDItIXaHJJK1VdAiwByMrKCtxqJsaYdqd7TCRP35DF0+t2Mzot0elyupygXW4SkTBgLp/3RwDsB0aJSIrv9aXAdt/zVcANvuc3ACuDVZsxpmOJCA9j8dRhnD+sl9OldDn+DoGdJSKFwGRgtYisabB5KlCgqrtONKjqAeB/gbUisoX6M4tf+zbfC1wqInnAdN9rY4wxDpJArj3rhKysLM3Ozna6DGOM6VBEZIOqZjW3n91xbYwxpkkWEsYYY5pkIWGMMaZJFhLGGGOaZCFhjDGmSRYSxhhjmtThh8CKSDHQ9ColZ9YLOPNSWF2LfR+fs+/iVPZ9nKozfB+DVDWluZ06fEj4Q0SyWzJOuKuw7+Nz9l2cyr6PU3Wl78MuNxljjGmShYQxxpgmdfWQWOJ0Ae2MfR+fs+/iVPZ9nKrLfB9duk/CGGPMmXX1MwljjDFn0GVDQkRmiEiuiOSLyN3NH9E5icgAEXlXRD4VkW0icpvTNbUHIhIuIp+IyD+drsVpIpIkIitEZIeIbBeRyU7X5BQR+Z7v/5McEXlWRGKcrinYumRIiEg48ChwBfXrbC8QkVHOVuWYWuD7qjoKmATc3IW/i4Zu4/MFsbq6h4HXVXUkMJYu+r2ISBpwK5ClqqOBcOpX3+zUumRIABOAfFXdparV1K+e1+ya3Z2Rqh5U1Y2+5x7qfwGkOVuVs0SkP3AV8Cena3GaiCRSv4DYUwCqWq2qR52tylERQKyIRABxwAGH6wm6rhoSaUBBg9eFdPFfjAAiMhgYB6x3thLHPQT8EPA6XUg7MAQoBv7su/z2JxGJd7ooJ6jqfuB+YB9wEChV1TecrSr4umpImNOISDfgReB2VT3mdD1OEZGrgUOqusHpWtqJCOBc4HFVHQeUA12yD09EelB/xWEI0A+IF5Hrna0q+LpqSOwHBjR43d/X1iWJSCT1AbFMVV9yuh6HXQB8SUT2UH8Z8hIR+buzJTmqEChU1RNnlyuoD42uaDqwW1WLVbUGeAk43+Gagq6rhsTHQLqIDBGRKOo7n1Y5XJMjRESov968XVUfcLoep6nqj1S1v6oOpv7vxTuq2un/tdgUVS0CCkRkhK9pGvCpgyU5aR8wSUTifP/fTKMLdOJHOF2AE1S1VkRuAdZQP0LhaVXd5nBZTrkAWARsFZFNvrYfq+qrDtZk2pfvAst8/6DaBXzd4XocoarrRWQFsJH6UYGf0AXuvLY7ro0xxjSpq15uMsYY0wIWEsYYY5pkIWGMMaZJFhLGGGOaZCFhjDGmSRYSxhhjmmQhYYwxpkkWEsYYY5r0/wGKtE2Kfdh95gAAAABJRU5ErkJggg==\n", - "text/plain": "
" - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ] - } - }, - "891e2bdcc12d4314affa4fd372ed7ade": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "89880b2c3e03469da53b8a7e9e2e930b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "8991ca296f464086aab8e12cc644430c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget011" - } - }, - "89ae5379ee8b4e2d92f116a018b9420e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_c2eca071d21942c98a47aaf881130883", - "IPY_MODEL_a6a4d48baea44d659e3b2dd7e54fcd17" - ], - "layout": "IPY_MODEL_3044da8a1f89485398f1ea9d4965bc55" - } - }, - "8ae2c037e98f420486a61a8570daf106": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "8b14eeb5b78e4e4cb98441ffaeccf4fb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget002" - } - }, - "8c168f5c8ecc4d0ba203b60193856d1c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_b9ad33908a4f4a6ba687c820c123c37a", - "style": "IPY_MODEL_094d34956035446984a6cb8a6efc22a7", - "value": "1e-07" - } - }, - "8c27b4b759354d64b25bcb3462c444ef": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DropdownModel", - "state": { - "_options_labels": [ - "AC", - "DDPG", - "PG", - "PPO", - "SAC", - "TD3", - "TRPO" - ], - "description": "Algorithms:", - "index": 0, - "layout": "IPY_MODEL_b5ac8df291f9438bacc64a6cb2805620", - "style": "IPY_MODEL_45850b0512424834a6d4c70e60892ae8" - } - }, - "8c59866961674911b2157bded443e366": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_fc69d16aa7e547b09859e2ca7dbfbde8", - "IPY_MODEL_6caef128e4df40ebb76ef90ad9a40d41" - ], - "layout": "IPY_MODEL_00663174be1342fbbd29bc99cdd6d3aa" - } - }, - "8ca1f8992583484a8a0ff2f7f46afee2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_76c7ceb7a42e44048e694b71f27f56eb", - "style": "IPY_MODEL_97b119b9f8fc4a5f80b7f35b2fbc20dd", - "value": "Input(shape=(None, 3), name='input_layer')" - } - }, - "8d025735275c4dfdbbbf2d491e727c08": { - "model_module": "@jupyter-widgets/output", - "model_module_version": "1.0.0", - "model_name": "OutputModel", - "state": { - "layout": "IPY_MODEL_ce5b912531614dfe90ee3e20fa7ba467", - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAD8CAYAAABpcuN4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd81fW9+PHXO5uEkJCQhAz23hAiiANBUXEiWHGPum613ttbtdba3229HVZba/WqtUWte1WFgsUFCOICDSHskYCMnJMJWWQn5/P743yjB8g44ZycQd7Px+M88j3f+eabw3nn+5lijEEppZRqT4i/A1BKKRXYNFEopZTqkCYKpZRSHdJEoZRSqkOaKJRSSnVIE4VSSqkOaaJQSinVIU0USimlOqSJQimlVIfCPDlYRK4AHgTGANOMMdku2yYCfwf6AA7gFGNMvYhcDTwAGMAOXGeMKRORBOAtYDCwD1hojCnvLIZ+/fqZwYMHe/LPUEqpHmfDhg1lxpgkd/YVT4bwEJExOJPA34F7WxOFiIQBOcD1xphNIpIIVACCMzmMtZLDH4FaY8yD1vJhY8zDInI/0NcY8/POYsjKyjLZ2dmd7aaUUsqFiGwwxmS5s69HRU/GmB3GmF1tbDoP2GyM2WTtd8gY04IzUQgQIyKC82nDbh0zD3jJWn4JuMyT2JRSSnlHd9VRjASMiHwkIjkich+AMaYJuAPYgvVkATxvHZNijCm0louAlG6KTSmlVBd0mihEZKWIbG3jNa+Dw8KAM4BrrZ/zReQcEQnHmSimAGnAZuAXxx5snOVh7ZaJicjtIpItItmlpaWd/ROUUkp5oNPKbGPMnBM4bwGw1hhTBiAi7wOZQJV1zj3W+n8C91vHFItIqjGmUERSgZIOYloELAJnHcUJxKeUUspN3VX09BEwQUSirYrts4DtgA0YKyKtNe3nAjus5WXAjdbyjcDSbopNKaVUF3jaPHY+8CSQBCwXkVxjzPnGmHIReQz4BmcR0vvGmOXWMf8LrBWRJmA/cJN1uoeBf4rILdb6hZ7EppRSyjs8ah4bCLR5rFJKdZ3PmscqpZTyvZqGZn777+3sK6vxyfU0USilVJBZuaOY5z//ltIjDT65niYKpZQKMktz7aTH92LqwL4+uZ4mCqWUCiKHaxpZu7uUiyelEhIiPrmmJgqllAoi728ppNlhmDcp3WfX1EShlFJBZFmunRHJvRmTGuuza2qiUEqpIGGrqOPrfYeZNzkN57iqvqGJQimlgsR7m5yDbV/qw2In0EShlFJBY2munSkD4xmYGO3T62qiUEqpIJBXXM2OwirmTUrz+bU1USilVBBYtslOiMBFEzVRKKWUOoYxhqW5dk4f3o+k2EifX18ThVJKBbjcgxUcOFzLpX4odgJNFEopFfCW5tqJCAvh/PH9/XJ9TRRKKRXAmlsc/HtzIeeMTqZPVLhfYtBEoZRSAeyrvYcoO9LAvMn+KXYCTRRKKRXQlubaiY0MY9aoZL/FoIlCqSBXXd/EVlulv8NQ3aC+qYUPtxYxd3x/osJD/RaHJgqlgtyvl27j0qc+Z93eQ/4ORXnZ6p0lHGloZt5k3w7ZcSxNFEoFsSMNzby/tRCHgf96YyNlPprxTPnGsk12+vWOZMawRL/GoYlCqSD2/pZC6pscPDR/AhV1Tfz0rVwcDuPvsJQXVNU3sWpnCZdMSiXURxMUtUcThVJBbHFOAUP7xXD1tAH8+pKxfJZXxjOf7vF3WMoLPtpaRGOzw2+d7FxpolAqSB08XMu6vYdZkJmOiHDNtIFcPDGVx1bs5utvD/s7POWhZZvsDEyIZvKAeH+HoolCqWD1r402AC6b4qzoFBH+sGACA/r24r/e2MjhmkZ/hqc8UFJdzxf5ZT6foKg9miiUCkLGGBZvtDFjaCIZfb+fmyA2KpynrsnkcE0jd/9T6yuC1fLNzgYK/uxk50oThVJBKOdABd+W1bAg8/hmk+PT4/ifi8ewZlcpiz7b64folKeW5toZm9qH4cm+mxe7I5oolApCi3MK6BUeygUTUtvcft2pg7hwQn/+9NEuNuzX+opgsv9QDbkHKwLmaQI0USgVdOqbWnhvk5254/vTOzKszX1EhIcvn0h6fC/+8/WNlGt9RdBYluucF/uSAGjt1EoThVJB5pOdJVTVN3N5ZkaH+/WJCufpazIpO9LIvW9vwhitrwh0xhiWbrIzbUgCafG9/B3OdzRRKBVk3t1QQP8+UW711p2QEccDF45m1c4SnvvsWx9Epzyxo7Ca/JIjAdF3wpUmCqWCSNmRBtbsLmV+ZrrbvXVvPG0wc8f155EPd5JzoLybI1SeWLrJRliIcGE7dU/+oolCqSCyNNdOi8OwYIr7g8SJCI/8YCL946L4z9c3Ulnb1I0RqhPlcBjey7Uzc2QSCTER/g7nKB4lChG5QkS2iYhDRLJc1l8rIrkuL4eITLa2TRWRLSKSLyL/J1ZvEhFJEJEVIpJn/ezr2T9NqZPP4pwCJmbEMSKla80m43o5+1eUVNdz7ztaXxGIsveXY6+sD6jWTq08faLYCiwA1rquNMa8ZoyZbIyZDFwPfGuMybU2PwPcBoywXnOt9fcDq4wxI4BV1nullGVnURXb7FWdVmK3Z/KAeH4+dzQrthfzwhf7vBuc8tjSXBu9wkOZMybF36Ecx6NEYYzZYYzZ1cluVwNvAohIKtDHGLPOOP+keRm4zNpvHvCStfySy3qlFLA4x0Z4qHjUbPKWM4YwZ0wKf/hgB5sOVngxOuWJxmYHy7cUcu7YFGLaafLsT76oo7gSeMNaTgcKXLYVWOsAUowxhdZyERB4aVUpP2lucbBko43Zo5I9Kr8WER69YiLJsVH8+PUcKutO3voKYwxrd5fyo1c2sGyT3d/hdOjz/FIqapsCstgJoNPUJSIrgf5tbPqlMWZpJ8dOB2qNMVu7EpQxxohIu4WoInI7cDvAwIEDu3JqpYLS5/lllFY3sOAEi51cxUdH8OQ1U1j4t6/4+Tubeea6zIAYeM5bGpsdLNtk57nP9rKzqJoQgU93lzIxPY7B/WL8HV6blubaiY8O58wRSf4OpU2dPlEYY+YYY8a38eowSViu4vunCQAb4PpJz7DWARRbRVOtRVQlHcS0yBiTZYzJSkoKzBurlDctzrERHx3O2aOTvXK+zIF9uW/uKD7cVsTLX+33yjn9raK2kadX53PGI59YHQzhTz+YyJp7ZxMeKtzz9iZaAnCQxNrGZlZsL+bCCalEhAVmQ9RuKwwTkRBgIXBm6zpjTKGIVInIqcB64AbgSWvzMuBG4GHrpzuJSKmTXlV9Ex9tK+LKUwZ49Yvk1jOGsm7vYX6/fAeZA/syISPOa+f2pf2HavjH59/yz+wC6ppaOHNEP/50xSRmjuj33ZPSby8bz0/ezGXR2r3cMWuYnyM+2sodJdQ2tgRcJztXnjaPnS8iBcAMYLmIfOSyeSZw0Bhz7PCVdwLPAfnAHuADa/3DwLkikgfMsd4r1eN9sKWQhmaHV4qdXIWECH++YhKJvSP48es5VNUHV33Fhv3l3PHqBmY/uobXvz7AhRNS+eAnZ/LKLdM5a2TSUcVpl05K48IJ/XlsxS52FFb5MerjLcu10b9PFNMGJ/g7lHZJsLenzsrKMtnZ2f4OQ6lus/BvX3GopoGVd5/VLXUJG/YfZuHf1zF3XH+eumZKQNdXtDgMH28r4tnP9pJzoIK4XuFcO30gN542mJQ+UR0ee7imkfP+spak2EiW/vj0gCjmKa9p5JTfr+TmM4bwwIVjfHptEdlgjMnqfE/tma1UQDtwqJav9x1mQWZGt32BTx2UwL3njWL5lkJeXX+gW67hqdrGZl76ch+zH13DHa/lUHakkf+9dBxf3n82980d3WmSAEiIieDhBRPYUVjFE6t2+yDqzn2wtYhmhwnoYifoxjoKpZTnFm8sQATmd2HIjhPxHzOH8tXeQzy0fAfnjU1x64vXF0qq6nnxy328tv4AlXVNZA6M5xcXjOa8cf3dHuvK1ZyxKSzMyuCZNXs4e3QKUwf5dwCIpbk2hiXFMC6tj1/j6Iw+USgVoIwxLM6xcdqwxG4fcjokRPjdvPG0OAx//LCzPrS+8ehHuzj9kU/426d7OG1YIu/ecRqL7zydCyaknlCSaPU/F48lNa4X9769idrGZi9G3DX2ijq+3neYeZPTA7q4DzRRKBWwNuwv58Dh2hMesqOrBiZG88MzBvNuTgFbCip9cs32fLmnjKdW53P+uP6svncWz1w31Wt//cdGhfPoFZP4tqyGhz/Y6ZVznoh/b7ZjDAFf7ASaKJQKWO/mFBAdEcr549rq79o97po9nMSYCH7z721+GziwqcXBr5ZuY0BCLx69YhKDEr3fSW7GsERuOWMIL3+1n8/ySr1+fncszbUzaUB8wHYCdKWJQqkAVN/Uwr83FzJ3fH+fjv0TGxXOPeeN4pt95Xywtchn13X1whffkl9yhAcvGUdUeGi3Xedn549ieHJvfvb2Zp8PZZJfcoRt9irmBcHTBGiiUKrLSqrq+eOHO6nuxn4HK7YXU13fzA98VOzk6spTBjC6fywPvb+D+qYWn167qLKeJ1bmcc7oZM7p5lFUo8JDeWzhJEqPNPC/y7Z167WOtWyTnRCBiycG1gRF7dFEoVQXfby9mL+u2cMtL2ZT19g9X6SLcwpIi4vi1KGdT3fqbaEhwv9cPJaC8jqfD0f++/d30OQw/PqScT653sSMeO6aPZzFG218uLWw8wO8wBjDslwbM4Ylkhwgrcs6o4lCqS6yV9QhAtn7D3P7K9k0NHs3WZRU17M2r4z5memEeNC6xxOnD+/HnDHJPL06n9LqBp9c88s9Zby3yc4dZw1jYGK0T64JcNfZw5mQHscDS7b65N+6uaCSfYdqmTepe5s8e5MmCqW6yFZRR0bfXjy8YCKf5ZVx1+sbaWpxeO38y1qnO/VDsZOrBy4cQ31TC4+t6P7msk0tDn5tVWD7eiym8NAQHls4iSMNzTywZEu3V+IvzbUTERrC+eN910jBU5oolOoie0UdaXG9WHjKAB68ZCwrthdzrxdHJn1nQwGTB8QzLKm3V853ooYm9eaGGYN585uDbLd37/hIL36xjzwfVGC3Z0RKLPedP4oV24t5Z0NB5wecoIraRt7bbGf26CTieoV323W8TROFUl1kr6gnva+zA9xNpw/hvrmjWJpr55de+Gt0u72KnUXVXJ4ZGMUSPzlnBHG9wvntv7d321/aRZX1PL5yt08qsDty8+lDmD4kgd+8t52C8lqvnru+qYVFa/cw84+rKTvSwLXTB3n1/N1NE4VSXdDc4qCoqp50l57Sd84azl2zh/PmNwf5jYdfqO/mFHg83ak3xUWH89M5I/lq7yFWbC/ulmv4ugK7PSEhwqNXTMJhDD97ezMOLzwhOhyGpbk2zvnzpzz0/k4yB/Xlg5+cycyRwTWPjiYKpbqguLqBFoc5bkiNe84byQ9PH8wLX+zjsRUnNuBcc4vD+aUyOoX46BOf7tTbrpk+kOHJvXno/R00NnuvLgb8V4HdngEJ0fzqkrF8tfcQL321z6NzfbXnEPOe/oKfvJlLXK9wXrt1Oi/+cBqj+wf2uE5t0UShVBfYyusAjnqiAOdc1L+6eCxXnTKAJz/J569r8rt87s/yyig70siCACl2ahUeGsIvLxrDvkO1vPzVPq+d158V2B1ZmDWAs0cn8/AHO8kvOdLl4/OKq7nlxW+4+tl1HDrSwGMLJ/Hv/zyD04f364ZofUMThVJdYK9wJoq2BukTEX4/fwKXTkrjjx/u4qUv93Xp3O/kFJAQE8GsUd6Z7tSbZo9KZubIJJ5YlcfhmkavnLO1AvvXF/unArs9IsLDl08gOiKUe97eRLObLdpKqur5xeLNnP/4Wr7+9jA/nzuaT+6dxYLMDL81c/YWTRRKdYGtou0nilahIcKfF07i3LEp/HrZNv6ZfdCt81bWNbFiezGXTkoLiAl12vL/LhpDbWMLj6/0fC6H4qrvK7DnjPVfBXZ7kmOj+N1lE9h0sIJn1uzpcN+ahmb+smI3sx5dwzsbCrjxtMF8et9s7pg1LKASoCcC8xOpVICyVdSREBNBr4j2vwDCQ0N46popnDmiH/e/u5n3Ntk7Pe/yzYU0Njt8NlLsiRiZEss10wby2voD5BVXe3Su3y8PjArsjlw0MZVLJ6XxxKo8ttqOH023ucXB6+sPMOvRNTyxKo/Zo5JZefdZ/PqScSTEBE4dkzdoolCqC+wVdaTFdz7sQmRYKIuuzyJrUAI/fSuXlZ20GFqcU8CI5N6MTw/sis6fnjuS6IhQfrd8xwmf48s9ZSwLoArsjvxm3jgSe0dw9z9zvxv3yhjDqh3FzH3iMx5YsoVBCdEsvvM0nr42s1tGug0EmiiU6gJbeV27xU7H6hURyvM3ZTE2rQ93vp7D53llbe63r6yG7P3lXD61+6Y79ZaEmAh+cs4IPt1dyupdJV0+PlArsNsTHx3BI5dPZHfxER5bsZvNBRVc/ew6bnkpmxaH4W/XTeXtH80gc6B/Z8rrbpoolHKTMcZ6onB/trnYqHBevnkaQ/vFcNvL2WTvO3zcPos32hCByyYHVmun9twwYzCDE6OdxUddHLokUCuwOzJrVDLXTh/Is5/t5dKnviCv+Ai/mTeOj386k7nj+wd8cvcGTRRKuamqrpmaxha3nyhaxUdH8Mot00mNi+KHL3xz1OxxDodhcU4BZwzvR/+44BhJNCIshAcuHEN+yRFeX3/A7eMCvQK7Iw9cOIazRyXz49nDWPOzWdwwYzDhoT3n67Pn/EuV8lBBhXNYh64mCoCk2EhevXU6fXqFc/0/1rOryFkZ/M2+wxSU1wV0JXZbzh2bwmnDEvnLyt1U1ro3L0cwVGC3JyYyjOdvOoWfnT+a2KjgGaPJWzRRKOUme0U90HYfCnekxffi9dumExEawnXPr+fbshoW59iIiQjlvHHB9Re2iPD/LhpLZV0TT6zK63T/YKrAVsfTRKGUm1o727UOCHgiBiXG8Nqt02lxGK59dh3LtxRy4YRUoiN8N92pt4xN68OVWQN4+at97C1tvwdzsFVgq+NpolDKTbaKOiLCQkj0sI38iJRYXr55GtUNzRxpaPb7vBOeuOe8UUSFh/LQ++03lw3GCmx1NE0USrnJVuFsGuuNVi7j0+N4/dZTufe8kUwfkuCF6PwjKTaSO2cPY+WOkjab/7ZWYJ8dhBXY6nuaKJRyU1f6ULhjQkYcd509IujHAbr59CFk9O3F75ZvP27ypu8rsMf6KTrlDZoolHKTu72ye5qo8FB+ccEYdhZV89Y3349t5VqBfbL2WO4pNFEo5YaG5hZKqhtIj9cWO225cEJ/Thnclz9/vIuq+iatwD7JaKJQyg1Fla1NY/WJoi0iwv9cPJZDNY08vTpfK7BPMsHXJk8pP+hseHEFEzPiuTwzgxc+30d4qGgF9knEoycKEblCRLaJiENEslzWXysiuS4vh4hMFpFoEVkuIjut4x52OSZSRN4SkXwRWS8igz2JTSlv+m5mOw/6UPQE980dRWiIaAX2ScbTJ4qtwALg764rjTGvAa8BiMgE4F/GmFwRiQYeNcasFpEIYJWIXGCM+QC4BSg3xgwXkauAR4ArPYxPKa9o7ZUdLOMx+UtKnyj+el0mLS1GK7BPIh4lCmPMDqCzduVXA29a+9cCq63lRhHJAVp7G80DHrSW3wGeEhExxhzd3k4pP7BX1JEcG0lkmJa3d2Z2AE7lqjzji8rsK4E3jl0pIvHAJcAqa1U6cBDAGNMMVAKJbZ1QRG4XkWwRyS4tLe2WoJVyZevi8OJKnUw6TRQislJEtrbxmufGsdOBWmPM1mPWh+FMHv9njNnb1aCNMYuMMVnGmKykpKSuHq5Ul9krvNvZTqlg0mnRkzFmjgfnv4o2niaARUCeMeZxl3U2YABQYCWSOOCQB9dWyiuMMdgq6rQFj+qxuq3oSURCgIVY9RMu63+HMwn89zGHLANutJZ/AHyi9RMqEByqaaSh2UGaVmSrHsrT5rHzRaQAmAEsF5GPXDbPBA66Fi2JSAbwS2AskGM1nb3V2vw8kCgi+cDdwP2exKaUt3w/vLj2ylY9k6etnpYAS9rZtgY49Zh1BUCbTaSMMfXAFZ7Eo1R3aO1Dob2yVU+lQ3go1Qntla16Ok0USnXCXlFPTEQocb163lzJSoEmCqU6ZauoJc1LExYpFYw0USjVCXtFvY7xpHo0TRRKdUJ7ZaueThOFUh2oa2zhcE2jVmSrHk0ThVIdsFdqiyelNFEo1YHv+1BoolA9lyYKpTrwfa9sTRSq59JEoVQHbBV1hAikxEb6OxSl/EYThVIdsFXU0b9PFGGh+l9F9Vz66VeqA/aKOi12Uj2eJgqlOqB9KJTSRKFUu1ochqLKek0UqsfrsYmi7EgD1fVN/g5DBbDS6gaaWoz2oVA9nkfzUQSzpz7J58Uv95Ee34vR/WMZZb1G9+/D0KQYwrXyssfT4cWVcuqxieKSSWkkxUayq6iaXUXVfLq7lGaHc+bV8FBhWFLv7xJHayJJjYvSEUR7EO1DoZRTj00UUwf1Zeqgvt+9b2x2sKf0CLuKqtlZVM2uoiq++fYwS3Pt3+3TJyrsuyePUS4JpE+UzlNwMmp9okjVubJVD9djE8WxIsJCGJPahzGpfY5aX1nbxK5iZ+LYaT19LN1op7rhwHf7DOkXwxnD+3HmiH7MGJZIrCaOk4K9oo4+UWH6+1Q9niaKTsRFhzNtSALThiR8t84Yg72ynp2FzuSxYX8572wo4JV1+wkNEaYMiOfMEUmcObIfE9PjtLNWkLKV15HeN9rfYSjld5ooToCIkB7fi/T4XpwzJgWAhuYWcvZX8Hl+KZ/llfH4qt38ZeVu+kSFcdqwfpw5sh8zRyQxIEG/eIKFraKODK2fUEoThbdEhoUyY1giM4Yl8rPzobymkS/2lPHZ7jI+yyvlw21FAAxKjObMEf04c0QSM4Ylav1GALNX1DHd5UlSqZ5KE0U36RsTwcUT07h4YhrGGPaW1fDZ7lI+zy9jSY6NV9cdIDREmDwg/rvEMXlAPKEh2qoqEFTXN1FV36yd7ZRCE4VPiDib2w5L6s1Npw+hsdnBxgPlfJ5fxtq8Mp5YlcfjK/O4bHIaj181xd/hKpzzZIPOQ6EUaKLwi4iwEKYPTWT60ETuOW8UFbWN/OH9nbyTU8AvLxpLkg5p7Xfah0Kp72lznAAQHx3BbTOH0OIwLNtk7/wA1e0KtFe2Ut/RRBEghifHMjEjjsU5Bf4OReF8oggPFZJ669OdUpooAsiCKelss1exq6ja36H0eLbyOlLjehGijQuU0kQRSC6ZlEZYiLB4oz5V+Ju9oo60eB26QynQRBFQEntHMmtUEv/aaKPFGqBQ+Ye9oo70eO0cqRRoogg4CzIzKK5q4Ms9Zf4OpcdqanFQVFVPuj5RKAV4mChE5AoR2SYiDhHJcll/rYjkurwcIjL5mGOXichWl/cJIrJCRPKsn33pgc4enUyfqDCW5Nj8HUqPVVxVj8No01ilWnn6RLEVWACsdV1pjHnNGDPZGDMZuB741hiT27pdRBYAR4451/3AKmPMCGCV9b7HiQoP5aKJaXywtYiahmZ/h9Mj2cqdTWO1s51STh4lCmPMDmPMrk52uxp4s/WNiPQG7gZ+d8x+84CXrOWXgMs8iS2YXZ6ZTl1TCx9uLfJ3KD2SvVIThVKufFFHcSXwhsv73wJ/BmqP2S/FGFNoLRcBKT6ILSBNHdSXgQnR2vrJT1qH79DOdko5dZooRGSliGxt4zXPjWOnA7XGmK3W+8nAMGPMko6OM8YYoN1mPyJyu4hki0h2aWlpZ2EEHRFhQWY6X+45RKH1163ynYLyOhJjIogKD/V3KEoFhE4ThTFmjjFmfBuvpW6c/yqOfpqYAWSJyD7gc2CkiKyxthWLSCqA9bOkg5gWGWOyjDFZSUlJboQRfOZPSccY+NdGHdLD1+wVdVqRrZSLbit6EpEQYCEu9RPGmGeMMWnGmMHAGcBuY8wsa/My4EZr+UbAnUR00hqUGEPWoL4szinA+YClfMVWUUdanCYKpVp52jx2vogU4HxSWC4iH7lsngkcNMbsdfN0DwPnikgeMMd636MtyMwgr+QIW21V/g6lxzDGWL2yNVEo1crTVk9LjDEZxphIY0yKMeZ8l21rjDGndnDsPmPMeJf3h4wx5xhjRljFXYc9ie1kcNGEVCLCQnxeqe1wGH7+zmY+2Vns0+sGgsq6JmobW7ToSSkX2jM7gMVFhzNnTDLLcu00tTh8dt0lG228lX2Q37y3nWYfXjcQFJS3Di+uvbKVaqWJIsAtmJLBoZpG1u72Teuu+qYW/vzxLvpGh7PvUC3LtxR2ftBJpHXCIi16Uup7migC3FmjkkiIiWCxj4b0eOGLfdgr63n6mkxGpvTm6dX5OHrQAIU2nbBIqeNooghw4aEhXDopjRU7iqmsa+rWax2uaeSvq/M5Z3Qypw3vx49nD2d38RE+3t5z6irsFXVEhYeQEBPh71CUChiaKILAgsx0GpsdvN/NxUBPfpJHTWMz918wGnBWpg9KjObp1fk9pomuvaKetPheiOiERUq10kQRBCakxzE8uXe3TpO6/1ANr67bz5WnDGBESiwAYaEh3DlrGFtslXzqozoSfyuoqNNiJ6WOoYkiCLQO6fHNvnIOHDp2iCzv+ONHuwgLCeGnc0YetX7+lAzS4qJ46pOe8VRh1852Sh1HE0WQuGxyOiLOpqvetvFAOcs3F3LbzKEk9zm6WWhEWAj/cdYwsveXs/7bk7trS0NzC6XVDdqHQqljaKIIEmnxvZgxNJHFG707pIcxhj+8v5N+vSO5febQNve58pQB9OsdydOr87123UBUaI0aq01jlTqaJoogsiAzg/2Hask5UO61c67YXszX+w7z33NG0DsyrM19osJDufXMIXyWV0buwQqvXTvQ2LVprFJt0kQRROaO70+v8FCv9alobnHw8Ic7GZoUw1WnDOhw3+tOHURcr3Ce+uTkfaoo0EShVJs0UQSR3pFhnD8uhfc22WlobvH4fG9+c5C9pTXcP3eTXfe3AAAS6ElEQVQ0YaEdfxR6R4bxw9MHs3JHMTsKT85BCu0VdYhA/zgdvkMpV5oogsyCzAyq6pv5ZEe703W45UhDM4+v3M20wQmcO9a9yQRvOm0wvSPDTtq6CntFHcmxkUSE6X8LpVzp/4ggc/rwfiTHRvKuh8VPi9bupexIIw9cNMbtzmXx0RFcd+oglm8pZE/pEY+uH4hsOry4Um3SRBFkQkOEy6aks2ZXCYdrGk/oHMVV9Ty7di8XTUxl8oD4Lh1765lDiAwL4Zk1e07o2oHMXlGv9RNKtUETRRBakJlOs8Pw3qYTmyb18ZW7aXY4uO/8UV0+tl/vSK46ZSD/2mjj4OHu6fznDw6Hwaa9spVqkyaKIDS6fx/GpvY5oSE9dhdX89Y3B7nu1EEMSow5oev/x1lDEYG/rz15nioO1TTS2OzQoiel2qCJIkgtyExnU0El+SVdqyt45IOdxESG8V9njzjha6fG9eIHUzP4Z3YBJVX1J3yeQKJ9KJRqnyaKIHXp5DRCBJZ0YZrUr/YcYtXOEu6cNZy+Hg6jfcdZw2lxGJ79zN0p0QObTScsUqpdmiiCVHJsFDNHJrEkx+bWxEIOh+EPH+wgLS6KH54+2OPrD0yM5tJJaby67sAJV6oHku+eKHScJ6WOo4kiiC3IzMBeWc+6bw91uu97m+1sLqjknvNGERUe6pXr3zlrGPXNLbzwxbdeOZ8/FZTX0TsyjD5RbQ9jolRPpokiiJ03NoXYyDCWdNKnoqG5hT99tIuxqX2YPyXda9cfkRLL3HH9efHLfVTVd+/se93NXlFHWnyUTlikVBs0UQSxqPBQLpjQn/e3FFLX2P6QHq98tZ+C8joeuHAMISHe/SL88ezhVNc388pX+716Xl+zV2rTWKXao4kiyC3IzKCmsYWPtxe1ub2ytoknP8ln5sgkzhjRz+vXH58ex+xRSTz32V5qG5u9fn5fsZVrr2yl2qOJIshNG5xAenyvdof0eHpNPlX1TfzCmge7O9x19nDKa5t4ff2BbrtGd6ptbKa8tkkrspVqhyaKIBcSIsyfks7neaXH9Wk4eLiWF7/Yx+WZGYxJ7dNtMUwdlMCMoYksWruX+ibPR7X1Ne1DoVTHNFGcBOZnpuMwsDT36CE9/vzxLkTgnvNGtnOk99x19nBKqht4Z0PXe4v7m01ntlOqQ5ooTgLDknozeUA877oM6bGloJJ/5dq55YwhpMZ1/xfgacMSmTIwnr99uoemFke3X8+b9IlCqY5pojhJXJ6Zzs6iarbbqzDG8ND7O0iIieBHs4b55Poiwl2zh1NQXnfck02gs5XXERoiJMdG+jsUpQKSJoqTxMUT0wgPFZZsLGDNrlK+2nuI/zp7OH2iwn0Ww9mjkxmT2oe/rsmnxY3e4oHCXlFH/z5Rnc7yp1RPpf8zThJ9YyKYPSqZf+Xa+cMHOxicGM010wf5NIbWp4q9pTV8sLXQp9f2hA4vrlTHNFGcRBZkZlBa3cDu4iP8fO5ov0zpOXd8f4YlxfDUJ/kYExxPFTarV7ZSqm0efZOIyBUisk1EHCKS5bL+WhHJdXk5RGSytS1CRBaJyG4R2Skil1vrI0XkLRHJF5H1IjLYk9h6otmjk+gbHU7mwHjmju/vlxhCQ4Q7Zw1nZ1E1qzyc19sXWhyGosp67UOhVAc8/ZNzK7AAWOu60hjzmjFmsjFmMnA98K0xJtfa/EugxBgzEhgLfGqtvwUoN8YMB/4CPOJhbD1OZFgo79xxGotuyPLrmEWXTk5jQEIvnlod+E8VJdX1NDuMNo1VqgMeJQpjzA5jzK5OdrsaeNPl/c3AH6zjHcaYMmv9POAla/kd4BzREdq6bFhSb/r19m/rnfDQEH501jByD1bwRX7nI9v6k13noVCqU74oxL4SeANAROKtdb8VkRwReVtEUqx16cBBAGNMM1AJJLZ1QhG5XUSyRSS7tLS0e6NXJ+QHUzNI6RPJU6vz/B1Kh1o722VoolCqXZ0mChFZKSJb23jNc+PY6UCtMWartSoMyAC+NMZkAl8Bj3Y1aGPMImNMljEmKykpqauHKx+IDAvl9pnDWLf3MF/ml3V+gJ/YyvWJQqnOdJoojDFzjDHj23gtdeP8V2E9TVgOAbXAYuv920CmtWwDBgCISBgQZ+2vgtQ10wYyKDGaH726ga22Sn+H0yZ7RR3x0eHEROqERUq1p9uKnkQkBFiIS/2EcdZsvgfMsladA2y3lpcBN1rLPwA+MYFeE6o61CsilNdunU5sVDjXPree7fYqf4d0HFtFHWk+GOJEqWDmafPY+SJSAMwAlovIRy6bZwIHjTF7jzns58CDIrIZZ4uoe6z1zwOJIpIP3A3c70lsKjBk9I3mzdtPJSYilOueX8+uomp/h3QU58x2miiU6oinrZ6WGGMyjDGRxpgUY8z5LtvWGGNObeOY/caYmcaYicaYc4wxB6z19caYK4wxw40x09pIMCpIDUiI5vXbTiU8VLj2uXXklwROsrBV1JGhfSiU6pD2zFY+MbhfDG/cdioiwtXPrmdP6RF/h0RVfRPV9c3aK1upTmiiUD4zNKk3b9w2HWMM1zy7jn1lNX6N5/vhxaP9GodSgU4ThfKp4cmxvHbrqTS1OJPFwcO1fovl+6ax+kShVEc0USifG9U/lldvmU5tUwtXLVpHQbl/koVOWKSUezRRKL8Ym9aHV2+ZTnV9E9c8u57Cyjqfx2CrqCciNMTvQ54oFeg0USi/GZ8exyu3TKe8ppGrF62juKrep9e3VdSRGh9FSIgOKaZURzRRKL+aNCCeF2+eRml1A1c/u46Sat8lC7tOWKSUWzRRKL+bOqgvL948jaLKeq59dj1lRxp8cl3tbKeUezRRqIBwyuAE/nHTKRwsr+W659ZzuKaxW6/X1OKguKpeE4VSbtBEoQLGqUMTef7GU/i2rIbrnltPRW33JYuiynocRocXV8odmihUQDl9eD8W3ZBFfskRrn/+ayrrmrrlOjadsEgpt2miUAHnrJFJ/P36qewsquLGf3xNdb33k8V3fSh0nCelOqWJQgWk2aOT+eu1U9lqq+SmF77hSEOzV8/fmihS47RXtlKd0UShAta5Y1N48uop5B6s4OYXvqG20XvJwlZRR7/eEUSFh3rtnEqdrDRRqIB2wYRUnrhqMtn7D3P3W5twOLwzl5Wtol77UCjlJk0UKuBdPDGNX140lg+3FfGXlbu9ck5bea1WZCvlJk0UKijcfPpgrswawJOf5LM01+bRuYwx2PWJQim3aaJQQUFE+O1l45k2OIH73tlM7sGKEz5XRW0TdU0t+kShlJs0UaigEREWwjPXZZIUG8ntL2dTVHli40JpHwqlukYThQoqib0jef7GU6hpaOa2l7Opa2zp8jlaE4XOla2UezRRqKAzqn8s/3f1FLbaK7n3nU0Y07WWUN/PbKeJQil3aKJQQemcMSncP3c0yzcX8sSqvC4da6+oIyo8hL7R4d0UnVInlzB/B6DUibp95lB2Fx/h8ZV5jEiO5aKJqW4dZ690zkMhohMWKeUOfaJQQUtEeGjBeKYO6ss9b+eypaDSreNs5ToPhVJdoYlCBbXIsFD+fv1UEmMiue3lbErcmE7VVlGvFdlKdYEmChX0+vWO5Nkbsqiqb+K2l7Opb2q/JVR9UwtlRxpIi9NEoZS7NFGok8LYtD785crJbCqo5L53NrfbEqrQ6nuhRU9KuU8ThTppnD+uPz87fxTLNtl5enV+m/voPBRKdZ22elInlTtnDSOvuJpHP97N8ORY5o7vf9T21j4UOs6TUu7TJwp1UhERHr58IpMHxPPTt3LZZj+6JZStog4R6K8TFinlNk0U6qQTFR7KohumEh8dzm0vZVNS/X1LKHtFHSmxUYSH6kdfKXd59L9FRK4QkW0i4hCRLJf114pIrsvLISKTrW1Xi8gWEdksIh+KSD9rfYKIrBCRPOtnX8/+aaonS46N4tkbsiivbeI/XtnwXUsoW0UdafH6NKFUV3j6Z9VWYAGw1nWlMeY1Y8xkY8xk4HrgW2NMroiEAU8As40xE4HNwF3WYfcDq4wxI4BV1nulTtj49DgeWziJjQcqeGDxFmseijrS+0b7OzSlgopHicIYs8MYs6uT3a4G3rSWxXrFiHP8hD6A3do2D3jJWn4JuMyT2JQC51Sqd587ksUbbTzz6R7sFfX6RKFUF/mi1dOVOJMAxpgmEbkD2ALUAHnAj639UowxhdZyEZDig9hUD/CfZw9nd3E1f/zQ+TdNhrZ4UqpLOn2iEJGVIrK1jdc8N46dDtQaY7Za78OBO4ApQBrOoqdfHHuccfaWanfsaBG5XUSyRSS7tLS0szBUDyciPHrFJCZmxAHa2U6prur0icIYM8eD818FvOHyfrJ1zj0AIvJPvq+LKBaRVGNMoYikAiUdxLQIWASQlZXVtckIVI8UFR7Kszdk8cyaPUwbkuDvcJQKKt3WRlBEQoCFfF8/AWADxopIkvX+XGCHtbwMuNFavhFY2l2xqZ4ppU8UD146jtgonYdCqa7wtHnsfBEpAGYAy0XkI5fNM4GDxpi9rSuMMXbgf4G1IrIZ5xPGQ9bmh4FzRSQPmGO9V0op5WfS1WkkA01WVpbJzs72dxhKKRVURGSDMSar8z21Z7ZSSqlOaKJQSinVIU0USimlOqSJQimlVIc0USillOqQJgqllFIdCvrmsSJSCuw/wcP7AWVeDMcXNObuF2zxgsbsK8EWc0fxDjLGJLWz7ShBnyg8ISLZ7rYjDhQac/cLtnhBY/aVYIvZW/Fq0ZNSSqkOaaJQSinVoZ6eKBb5O4AToDF3v2CLFzRmXwm2mL0Sb4+uo1BKKdW5nv5EoZRSqhM9IlGIyFwR2SUi+SJyfxvbI0XkLWv7ehEZ7Psoj4pngIisFpHtIrJNRH7Sxj6zRKRSRHKt16/8EesxMe0TkS1WPMcN6StO/2fd580ikumPOK1YRrncu1wRqRKR/z5mH7/fYxH5h4iUiMhWl3UJIrJCRPKsn33bOfZGa588EbmxrX18GPOfRGSn9XtfIiLx7Rzb4WfIxzE/KCI2l9//he0c2+H3iw/jfcsl1n0iktvOsV2/x8aYk/oFhAJ7gKFABLAJGHvMPncCf7OWrwLe8nPMqUCmtRwL7G4j5lnAv/19f4+JaR/Qr4PtFwIfAAKcCqz3d8wun5EinO3KA+oe45zXJRPY6rLuj8D91vL9wCNtHJcA7LV+9rWW+/ox5vOAMGv5kbZiducz5OOYHwTudeOz0+H3i6/iPWb7n4Ffeese94QnimlAvjFmrzGmEeeMe8fO9z0PeMlafgc4R0TEhzEexRhTaIzJsZarcc4CmO6veLxoHvCycVoHxFvT3vrbOcAeY8yJdtzsNsaYtcDhY1a7fl5fAi5r49DzgRXGmMPGmHJgBTC32wJ10VbMxpiPjTHN1tt1QIYvYnFXO/fZHe58v3hdR/Fa310LOXoaao/0hESRDhx0eV/A8V+63+1jfZgrgUSfRNcJqxhsCrC+jc0zRGSTiHwgIuN8GljbDPCxiGwQkdvb2O7O78Ifjp3b3VWg3WOAFGNMobVcBKS0sU+g3muAm3E+Wbals8+Qr91lFZf9o50ivkC8z2cCxcaYvHa2d/ke94REEbREpDfwLvDfxpiqYzbn4CwqmQQ8CfzL1/G14QxjTCZwAfBjEZnp74A6IyIRwKXA221sDsR7fBTjLEsImqaLIvJLoBl4rZ1dAukz9AwwDOeUzYU4i3OCwdV0/DTR5XvcExKFDRjg8j7DWtfmPiISBsQBh3wSXTtEJBxnknjNGLP42O3GmCpjzBFr+X0gXET6+TjMY2OyWT9LgCU4H8tdufO78LULgBxjTPGxGwLxHluKW4vsrJ8lbewTcPdaRG4CLgautRLccdz4DPmMMabYGNNijHEAz7YTS0DdZ+v7awHwVnv7nMg97gmJ4htghIgMsf56vApYdsw+y4DWViE/AD5p74PsC1YZ4/PADmPMY+3s07+1HkVEpuH8XfotuYlIjIjEti7jrLzcesxuy4AbrNZPpwKVLkUo/tLuX1+Bdo9duH5ebwSWtrHPR8B5ItLXKjI5z1rnFyIyF7gPuNQYU9vOPu58hnzmmPqz+e3E4s73iy/NAXYaYwra2njC97i7a+cD4YWztc1unK0Tfmmt+w3ODy1AFM6ih3zga2Con+M9A2dxwmYg13pdCPwI+JG1z13ANpytLNYBp/k55qFWLJusuFrvs2vMAjxt/R62AFl+jjkG5xd/nMu6gLrHOJNYIdCEs/z7Fpz1Z6uAPGAlkGDtmwU853LszdZnOh/4oZ9jzsdZlt/6eW5tZZgGvN/RZ8iPMb9ifU434/zyTz02Zuv9cd8v/ojXWv9i6+fXZV+P77H2zFZKKdWhnlD0pJRSygOaKJRSSnVIE4VSSqkOaaJQSinVIU0USimlOqSJQimlVIc0USillOqQJgqllFId+v/Mln+PZVMKegAAAABJRU5ErkJggg==\n", - "text/plain": "
" - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ] - } - }, - "8d18e0fa10b94372a3edf64edb4814bc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_8b14eeb5b78e4e4cb98441ffaeccf4fb", - "style": "IPY_MODEL_a89219097e994deb9caa9b27d8bd2866", - "value": "Adam" - } - }, - "8d80128792d44bf1a0467b7e86df0b54": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_09c74a8b5d1a43828034e148d2edfbfc", - "max": 510, - "min": -490, - "readout_format": ".0f", - "style": "IPY_MODEL_e318e3ad8e11430d840261e7eb1b540e", - "value": 10 - } - }, - "8efed772f09f4ea1a1dabf91598fd49a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "8f01f6cb90754bcb8b2e64809505291d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "8f5e2c19238240c38947f1a5d8e72792": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_413fd706b68148a099ed9af1a952ec6d", - "style": "IPY_MODEL_ff0e9f4940eb4b57bd99d96059b5e194", - "value": "Action space:" - } - }, - "8f90c0a8d78442cfa05aff9b006a94d6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget013" - } - }, - "8f9477722bb54e6185f07c7069ed73bc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "8fd0788ed947457d8556dc976e0eda38": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "8ff956034aa047d0a8809922cbefa856": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget012", - "justify_content": "center" - } - }, - "90d52d8b63c342f087384246a76680d7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_f6f23b9ba55946d0aa626d62ba4bbdf5", - "style": "IPY_MODEL_3488ba4c7374447794395c4c315a1193", - "value": "Box(3,)" - } - }, - "91d86c9ddbfa4acdaf18e13d8adf3862": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_298f572cd2ec4a9ca5a6feafaf334040", - "style": "IPY_MODEL_de8a6e2e9cb447439055e987582fc63e", - "value": "Adam" - } - }, - "9384c24875c24e5b8be37d4c55e04820": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget008" - } - }, - "93fcd071ff834486b199ab26105f6901": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "946c2a2e7e8f4e36b0311e922520272f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "965b9a99694b4227a43121ae2e974290": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_bcb79998188240e99279f9cda7e004d9", - "style": "IPY_MODEL_02904d8bc2d442deb3da0b5e6e0363a9", - "value": "StochasticPolicyNetwork" - } - }, - "9689f9977c7f455282a9831bcd81905c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_59da397a7faa43c79c633dd523b6f07b", - "style": "IPY_MODEL_ec6b04eac2cd4e5a821244a954846a39", - "value": "Dense(n_units=1, tanh, in_channels='64', name='dense_2')" - } - }, - "9694a75a41e543a3b2642aee3572857d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_1202663af1bf4653bc967824c8574e1a", - "IPY_MODEL_e1d753092ae3420ead7a3086b9405f2a", - "IPY_MODEL_182107ee16aa4bfba497dd033e347d65", - "IPY_MODEL_6cb628f08ae2469db2ee42e38ca4de74", - "IPY_MODEL_885608d7df064c51ac0523ef9928e6b6", - "IPY_MODEL_22ff0e7129b04334b71044d77e3c9298", - "IPY_MODEL_43ca75c41e054155b5ad51e493b3b990", - "IPY_MODEL_84f7291061b34bfaaaec0711bd0cca56" - ], - "layout": "IPY_MODEL_3e9c9dcc814b47f8b2b392074c83d853" - } - }, - "96fc368f69794e5baa9433c3a31b1ec1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "9705108e9dd540fa8e02c1933e03eadd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "97b119b9f8fc4a5f80b7f35b2fbc20dd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "97f58376ed524fab85dde1ea5f67ee17": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_8fd0788ed947457d8556dc976e0eda38", - "style": "IPY_MODEL_c480ff00167c4205a51065548cbea855", - "value": "StochasticPolicyNetwork" - } - }, - "98824ad5eda8475394e9fb13819502a9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "98eeb6cc7ac643ac882d54fab647de04": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget014" - } - }, - "98f2c9b34e884cada9e2eedac93e1912": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "99ac959475eb4f75b586ed6599b99113": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_7a7ebee6dcf34f36b1d55d2cb443e387", - "style": "IPY_MODEL_55abe6fb296b491ba2e2a09a492b5ae8", - "value": "Dense(n_units=64, relu, in_channels='3', name='hidden_layer1')" - } - }, - "9a247aedcd64492d9b4ddf9d76c13062": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget005" - } - }, - "9ac98c15de5a4548a99d80e8ea3004c9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_d9b467355fa940af8f164b0b53137582", - "style": "IPY_MODEL_351ae05c16d040dab9a578c06a78858c", - "value": "Environment Selector" - } - }, - "9b276e72efa44a7e911ee209d08859b6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "9b5f3fd4ebd341ac91227f9ded9fab19": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "9c226167c8fb4cfab3a7161a87588ae1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "9ce0362f9fac4e45a87ebe7a085a24af": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DropdownModel", - "state": { - "_options_labels": [ - "default" - ], - "description": "state type:", - "index": 0, - "layout": "IPY_MODEL_a6379873f0434d53a6ad52553c164bdb", - "style": "IPY_MODEL_dceb338b27c742cd8733350448a2e798" - } - }, - "9dd1d4acaad44f16b1bbf0693ee9fad5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_7e128d275e3c4e88829167514cec3bc6", - "style": "IPY_MODEL_10b2a84971164564ac50d9f53bd98579", - "value": "Input(shape=(None, 3), name='input_layer')" - } - }, - "9dfcd5e4ec744ed4a0a9091bed5ed2d8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_88fc41c33c024f4eb22b13e0ea98e605", - "style": "IPY_MODEL_5caab83d7d4d4658ac739d02b56e9fd6", - "value": "render" - } - }, - "9e37b046f2d841dd9572b2284a729bf5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "description": "Progress", - "layout": "IPY_MODEL_33ecf71f75a649a285ea6a8211b5acbd", - "style": "IPY_MODEL_68fcf5652dd14e5fad220fcbe777ddbb", - "value": 18 - } - }, - "9ee876553e424052a509a2daed8da1c6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_cefe9c21582d46dc9471bee195b466b7", - "style": "IPY_MODEL_b04de6976e7d476fa4981293ded26bd6", - "value": "Dense(n_units=64, relu, in_channels='64', name='hidden_layer2')" - } - }, - "9fc5c513843a4c0fa7ae9c8b37c3b4ff": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "9fd6a74ce4e54ae38816e55d19327281": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_68d4eab6f1cf4e2fa0e229ecdce8d392", - "style": "IPY_MODEL_bb0110f57f39444db2d635a30437c85d", - "value": "amsgrad" - } - }, - "a01f34500cfc486289f3334e3cd222df": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_7a4be7c4229640b18c29d60d30cc0e70", - "style": "IPY_MODEL_7d64c7c8f2dc4d4eb6218e55ae44bfbe", - "value": "Algorithm Selector" - } - }, - "a02320673c484c46848d7aeb6fda6e18": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "a0371ec3949944198211395dc7848ba6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DropdownModel", - "state": { - "_options_labels": [ - "Acrobot-v1", - "CartPole-v1", - "CartPole-v0", - "MountainCar-v0", - "MountainCarContinuous-v0", - "Pendulum-v0" - ], - "description": "env name:", - "index": 5, - "layout": "IPY_MODEL_45e906bdfe7a464d848f9c972f536d31", - "style": "IPY_MODEL_ad07aedb699c4a3da0110a187e381619" - } - }, - "a038c2e1def5473484b4d9bbc5393145": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_6923c73eeac747fdbe41b2062e257a58", - "style": "IPY_MODEL_93fcd071ff834486b199ab26105f6901", - "value": "save_interval" - } - }, - "a0b2c18704554c60bfb62c5c7ea46e34": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_40747ee3248e4cbca2b22e3201e7ae52", - "style": "IPY_MODEL_7f3f44cbaac94755810c0e589d048490", - "value": "ValueNetwork" - } - }, - "a18265de326b4d399e760f9d2e5bb238": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_76d1b335a0134c19852090005ae135c4", - "IPY_MODEL_dd631605869640d9b8564da50fd7f14e", - "IPY_MODEL_dd3cb8ec44e2454a9fd787b26a794aa2", - "IPY_MODEL_e2d50772ac80494ea293f047efb33527", - "IPY_MODEL_3c695e15ebbd4ecfb555b0fe5221ad10", - "IPY_MODEL_f401d82a291f4cdb9d44cf62f1c48978", - "IPY_MODEL_f8eb99b0291b45dda1b391805141e984", - "IPY_MODEL_1d03aaf95d45497ca74e337a82632cee", - "IPY_MODEL_ec1d469669a2411f9a5a7a1774480576", - "IPY_MODEL_2c48650276864e79a7b82413ddd8c6fa", - "IPY_MODEL_e923a0f829b14a6b83f8ef159b7e1e67", - "IPY_MODEL_ad74a56ab452440e86d1ff508a37e2fc", - "IPY_MODEL_a8c7fbd1b9e64ebebfc11f7da9dfbfd5", - "IPY_MODEL_1eec2203d3bf49c2876604c21291cc18" - ], - "layout": "IPY_MODEL_31fe17808d8e4f7ead5964af2e4f5894" - } - }, - "a23a881ee9034a33a8d23c63c65490c7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "a2bb633318304f79a811eb07e18da7f5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_fca1d8802f264b48aa3f7bef2b5f5b81", - "IPY_MODEL_27fbf57b093b4444b8990601eaddca26", - "IPY_MODEL_4b9184b437ac441e8c485894889e7fd4" - ], - "layout": "IPY_MODEL_1c09f9523eb2469ab864ddcd5f15f417" - } - }, - "a2bf112fa96c4e8aba14a96af2788dbc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "a32e41356969452abe56558608109dc8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "a496bd2aabab465fbcf0022dc1acd19f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_f72ef10c1acd44608d2db2b932f2b167", - "style": "IPY_MODEL_077609b632e64492acbc9a009222e086", - "value": "ValueNetwork" - } - }, - "a517b57a04ed49bf82a0820df4bcf3b2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "center" - } - }, - "a592a51f7f3d40cf81de06ff0c9e1546": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget004" - } - }, - "a5d8986e9aad47b1ba7821ddf2850c7a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "a6379873f0434d53a6ad52553c164bdb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "a6a4d48baea44d659e3b2dd7e54fcd17": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_f9cd83ba01bb440b9510e0ada3cfd4aa", - "step": null, - "style": "IPY_MODEL_1a3aa6da2cad4cfd9696b32125ab645b", - "value": 200 - } - }, - "a7d002d3e5454965af1d9cdb2e54e7ca": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_1b48b0f90cef4077aa20b9ee8be52e9b", - "style": "IPY_MODEL_3d9166fc4fcf43f3b930ebc7f996a5bf", - "value": "Adam" - } - }, - "a7d8b17ff9fd43298bc30e0471ade94f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "a860d9c958c646aa89ae598dc67eaa08": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "a89219097e994deb9caa9b27d8bd2866": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "a899edcecbcf49d1a1f57b48bed97865": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "a8c7fbd1b9e64ebebfc11f7da9dfbfd5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_ae3b1f8332bd40ab9ef5ec6dfd688123", - "style": "IPY_MODEL_6efa143c4b9d43aa94ed8cfe56824583", - "value": "epsilon" - } - }, - "a8e550f371f94677a29e238776be2cdb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "aafbebe0ec5b4425acf54f0ad9f6c80f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_49c009585e524d98af99d984cf65a85b", - "step": null, - "style": "IPY_MODEL_76dec90334724f3ba9e51ba05856ff79", - "value": 100 - } - }, - "ab2e3b3dc5024debb0c00c3d27d48a8b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "ac4da45cf7d84d5fa0ea8963afbe5c12": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget001" - } - }, - "ad07aedb699c4a3da0110a187e381619": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "ad34362a6d0b43edb782d9f50d666a41": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "ad74a56ab452440e86d1ff508a37e2fc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_379d32750a8c4e88b3b6a8d76c3ee91b", - "style": "IPY_MODEL_b1240a01113b4044b84ce15397d29251", - "value": "0.0" - } - }, - "ae1716b3153545b394ccc02357c0cecc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "ae3b1f8332bd40ab9ef5ec6dfd688123": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget013" - } - }, - "ae877e1e2a554a19b78fb9a12f60e5d3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "aeecfc3325ec482ebd31ced3fc2e6839": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "af4e53453b1a434e9426fd63d61888c5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_fca98009fe56433b97f1fd16969f9a35", - "IPY_MODEL_0c0d922d9ed14199ab9b8f48b9e8ba1d", - "IPY_MODEL_5bced3d11d4a41a4b3e1c712f83b98e4" - ], - "layout": "IPY_MODEL_7a6c0819e1344119aae9ef136830ad44" - } - }, - "afeba836a14d4fb6a7c5407794848b80": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget004" - } - }, - "b04b868ce504489c82bd8818501b3ac3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_8991ca296f464086aab8e12cc644430c", - "style": "IPY_MODEL_683e3afa65604f1b85604a79ec228a2b", - "value": "decay" - } - }, - "b04de6976e7d476fa4981293ded26bd6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "b04fb98f9bb24f24bfa2c883cb8bd2fc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_434eec441fb94a30bcb70bec50c60d78", - "IPY_MODEL_0b1a53d081f547f8ab913cd15fe70058", - "IPY_MODEL_0af6103ca9e44bb4a44c62b84b39415f", - "IPY_MODEL_0b1a53d081f547f8ab913cd15fe70058", - "IPY_MODEL_6f0bd8ffadf44461a70b1031b3f65064" - ], - "layout": "IPY_MODEL_452324b6d7cc4cf28d456787efc23b8f" - } - }, - "b106f6f6a7f047a4a11ec9f9a23804e2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DropdownModel", - "state": { - "_options_labels": [ - "AC", - "DDPG", - "PG", - "PPO", - "SAC", - "TD3", - "TRPO" - ], - "description": "Algorithms:", - "index": 0, - "layout": "IPY_MODEL_eb5620a9d421450a9c0b629c52d3d8ba", - "style": "IPY_MODEL_1dbbcf0744194117b3463d5ae8af00ef" - } - }, - "b1240a01113b4044b84ce15397d29251": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "b18ac7a05b7c4d58813a3e735173a3ca": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_a23a881ee9034a33a8d23c63c65490c7", - "style": "IPY_MODEL_014bf4270fea44b6aad4c80c7a5979b7", - "value": "Choose your environment" - } - }, - "b20aaab10e6a49138d9cf0a414321c49": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_c06d332516bf42b2b764cc7b6117aade", - "IPY_MODEL_891909eab8204a4bb78c9a468bc20112" - ], - "layout": "IPY_MODEL_ce069bda2c504adabddf4308b196d410" - } - }, - "b2ed3221465c4c7097b79683b8e5c5f0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget002", - "justify_content": "center" - } - }, - "b316a517fda34deba03047080e565a59": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget008" - } - }, - "b3a43d5f73df48299fdf24a855c623a7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "b4047180a5aa44479c358d8c12f0c5d5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_3025ff51115247eebfcfe7e2a18e414e", - "style": "IPY_MODEL_5f1fda7eb4ac4ce694f721e312e205ab", - "value": "0.0001" - } - }, - "b42c755dec514e6fa26ca97f3f0ef923": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_85d35dbed0594a3a837f536309af0b59", - "IPY_MODEL_0201bde3e922471d9bb86857be61df95" - ], - "layout": "IPY_MODEL_5efb085669c2400a909ac37b5cb4e45e" - } - }, - "b4d945e45eae41ceb40de345939615ad": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_42f8297b00d240308e7403a004a1c6b4", - "style": "IPY_MODEL_f48e72d8d0b5470798d5faeed3dc8e40", - "value": "learning_rate" - } - }, - "b50b99192c944a348df722c9f5cdaa90": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "b5214d589d704727964cdb67261b2d47": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_ad34362a6d0b43edb782d9f50d666a41", - "step": null, - "style": "IPY_MODEL_dca0afd22296462f8a0e11b82566f289", - "value": 0.9 - } - }, - "b58381d8050044ee9df6c0857e3a06e4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_template_areas": "\n \"net_label net_info\"\n \"opt_label opt_info\"\n " - } - }, - "b5ac8df291f9438bacc64a6cb2805620": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "b5bcfb7873f44eba8f8f90e018f09b6a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget012" - } - }, - "b5dd447dec9c48bc8b1bb664c9553912": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "b64d5e345cb5482595aa92662c8f162c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "b672fea2d3ac4732a92e992eaaef260e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget009" - } - }, - "b85dbc19731e4b84bb6122ea52367809": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "b92bc4065ee4473aa6e1b4051e044dee": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_629ece3b43ac4c8a8c2f83733a180978", - "style": "IPY_MODEL_38f46c0b84c84233a228758c9b306a79", - "value": "amsgrad" - } - }, - "b9743661bbd24d94969c463e1f77d6e8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "b979276c5b584ebab1400eea707b2c39": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "b9ad33908a4f4a6ba687c820c123c37a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget014" - } - }, - "bb0110f57f39444db2d635a30437c85d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "bb04f52581bb496e9a6931ce291714c9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "bb5d38052b40427585a8ec928bdef7b5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_ca41ace6e197496b8d0e375f53b92729", - "IPY_MODEL_081136f1075542a3999ce83eba68fdb5" - ], - "layout": "IPY_MODEL_4a88a99c974d47da993c8bde3faab362" - } - }, - "bcb79998188240e99279f9cda7e004d9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "bd7afa2132154beebd89e4320ebcad26": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_b316a517fda34deba03047080e565a59", - "style": "IPY_MODEL_d21ecfeb69a54154ad0c0cadf69db4fa", - "value": "0.9" - } - }, - "bdb404863da84bdf870e550898f54848": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_7f94bb571172453a920e7bd6d7a9050f", - "IPY_MODEL_e2ecea0189544c41a0ca172743cf16a1" - ], - "layout": "IPY_MODEL_62a5e4f04f554e6580d63bb32f36b3be" - } - }, - "be4d4fbbc53d4705963f9b343aff399f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "bebb739676c74aacb396889de39592e6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "bf3a856d0c5f4d47abf596f528a2d947": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "bf620c54949846b49135585c61101b19": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "bf7a578fb6204ce694235598a0f00ea2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget001" - } - }, - "bfa16a837ebd4ec795d5aa0a893d5298": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "bfdfc9d77a654743a9ebdfc08ab167da": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "bffd75c7e90346ebb8214c6fe0ce2ab4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_d9864398064d4a4ea93f2f985bf70bb5", - "style": "IPY_MODEL_835ef9a1125846679a65d679afb62013", - "value": "./model/AC-Pendulum-v0" - } - }, - "c06d332516bf42b2b764cc7b6117aade": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_d466ecd3ea76446fa72d90acf2d7c5ba", - "style": "IPY_MODEL_c726054bb59f40aab21ea2d4485ce77e", - "value": "Learning curve" - } - }, - "c083a4b8f36848ed9f277f423ae18084": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_8f90c0a8d78442cfa05aff9b006a94d6", - "style": "IPY_MODEL_d220d182817c44408e2df2a364760e43", - "value": "epsilon" - } - }, - "c096b60cb96b4aa68be8728e6feb2366": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "c12ffb6b4533460bbdfc7404ff89d807": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_842ea79123034275adec1df392a4846d", - "style": "IPY_MODEL_0cabfd585d5d4421a05805698bc1c8ad", - "value": "beta_2" - } - }, - "c2160078393b421d9f3a4343f37307e2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_38484ea61c3449a1b809d8526ead582d", - "style": "IPY_MODEL_7ff9e3e9f09b40d398b6c898e5ee9653", - "value": "False" - } - }, - "c234ed19a3204e1d9452d6686e014efb": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "c2aa94c81efc4f3f826adcb847fbdb89": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "description": "Progress", - "layout": "IPY_MODEL_5b87473fb6cc473a89998a285388f4da", - "max": 10, - "style": "IPY_MODEL_6f525160109d45299758550c08196bd9", - "value": 10 - } - }, - "c2eca071d21942c98a47aaf881130883": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_ae1716b3153545b394ccc02357c0cecc", - "max": 400, - "readout_format": ".0f", - "style": "IPY_MODEL_8f9477722bb54e6185f07c7069ed73bc", - "value": 200 - } - }, - "c3233dc4967548279ff54f73e91e27a0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "c34d5f3024f24951b4f478bca62dd7c7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_40c1e5560977460b86028ca09ee94662", - "style": "IPY_MODEL_e00c049b23f34848a62ee225b63ec0b7", - "value": "amsgrad" - } - }, - "c35cf89d5b4c42c886c9c83fdc93c8e6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "c3c09aa3ecea45eda2b142c857c5d7c5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget011" - } - }, - "c3d17e5a575344968f8b84a174b26ba9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget004" - } - }, - "c3ef353dd171416da3dc55582107fa67": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_266e10703ed340a78b259c7d3ddc8836", - "IPY_MODEL_64750206fa3a48119aa85e75f5ff2de8" - ], - "layout": "IPY_MODEL_a517b57a04ed49bf82a0820df4bcf3b2" - } - }, - "c4662ffdadef4c7d82aba5ddca1fbfda": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "c480ff00167c4205a51065548cbea855": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "c60dc42b295c47138b76205df9071217": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_432a3a690b36409192aa3ee4dd5fedf8", - "IPY_MODEL_08f5684d8e194916ac04ed379e2bf022" - ], - "layout": "IPY_MODEL_48392da1f6c64d3fad859465d0d0095b" - } - }, - "c726054bb59f40aab21ea2d4485ce77e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "c75a9640bb26465785ca214520007519": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "c7a9f23b553e43a78d5c0ced37526327": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "c90e24c07a754360836c2acc6f3a7e22": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_9ac98c15de5a4548a99d80e8ea3004c9", - "IPY_MODEL_f10d3787733a4ece9120c3641017114b" - ], - "layout": "IPY_MODEL_6187b72c80f64272a6c33c90cb582c4c" - } - }, - "ca41ace6e197496b8d0e375f53b92729": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_516cc7132ca94faab3023ffcd1ed4cd4", - "IPY_MODEL_329f804132904f47a73d10b3ccba4b4d", - "IPY_MODEL_a0371ec3949944198211395dc7848ba6" - ], - "layout": "IPY_MODEL_9c226167c8fb4cfab3a7161a87588ae1" - } - }, - "ce069bda2c504adabddf4308b196d410": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "ce5b0166c393435a840819472b761b8c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "ce5b912531614dfe90ee3e20fa7ba467": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "height": "250px", - "width": "350px" - } - }, - "ce777268358f48608666122680449e3c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "ce96b4fa2ae14c6f8f4af830f9442000": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "cefe9c21582d46dc9471bee195b466b7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "cf3de6c59d124068af4aef37293c26e2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget003" - } - }, - "cfb6b6bcedad4f61893206fb1eb28385": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_283080f17fcf4286b2e6e059bcda3370", - "IPY_MODEL_04461564de8c45d6af4c6055f7b4c17f", - "IPY_MODEL_9dfcd5e4ec744ed4a0a9091bed5ed2d8", - "IPY_MODEL_334d1a726d2347db82e42df5760618b3", - "IPY_MODEL_6c751fa2c2aa415ea57d3c9b0e11b22d", - "IPY_MODEL_43730220bf8e489cae588fcf375d08cf", - "IPY_MODEL_a038c2e1def5473484b4d9bbc5393145", - "IPY_MODEL_7af9623e94c64555b01efa581f338e60", - "IPY_MODEL_389174ab87e24a48a23ad5f81a32da61", - "IPY_MODEL_4ee9cbafcaad44de9f9e7453ee765047", - "IPY_MODEL_3a3916bde1e849aeae0e2701258ddc34", - "IPY_MODEL_88aafdf648784ac7954ce933431f9a3a" - ], - "layout": "IPY_MODEL_19b0d8173d9141e0a0db8d0b2110c98c" - } - }, - "cfc4c351d9da4a2bbe36bb1288f74e82": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "d02f0cd6f8f94156ac86605286a6ee78": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "center" - } - }, - "d16d026731104f40ad77f1c7b8f77bf6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget009" - } - }, - "d1b7a611e0ea474991c6034e7e7a9e98": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "d1ba6fbf21674589b3f585f6e0f9638b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_79611f87c64c431794f17eccbbd60f38", - "style": "IPY_MODEL_a2bf112fa96c4e8aba14a96af2788dbc", - "value": "0.0" - } - }, - "d20f2266d6fc44df988c78b63b202a81": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget014" - } - }, - "d21ecfeb69a54154ad0c0cadf69db4fa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "d220d182817c44408e2df2a364760e43": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "d2ba7f491ec94768be174bba323aff6d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget009" - } - }, - "d34c7789bb974de1a36ef3cc45737b52": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget010" - } - }, - "d439f3de7aeb4f059483dedb8aca131a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "d466ecd3ea76446fa72d90acf2d7c5ba": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "d48e8464b37c4f0099d42e59369dbab6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_b672fea2d3ac4732a92e992eaaef260e", - "style": "IPY_MODEL_f834d6547a954a478d9e755653e4f5a1", - "value": "beta_2" - } - }, - "d4c91e304ca34f88a4c959ecc4683678": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "d5a3129aed5d47718c478523d35359ad": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "d6a04d9b77b54ae89af21fa5551e205e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_214c87e57eb641bb89644c9f465889ca", - "style": "IPY_MODEL_7a807eea55d14bae96d792b1e475adcb", - "value": "save_interval" - } - }, - "d6ddb43e654a421ead72beacfae7145e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_2dab24721ba34bd789afa55d1479464b", - "style": "IPY_MODEL_0a21d0f35913467a9b266a75d2af8db0", - "value": "Supported algorithms are shown below" - } - }, - "d915d378018e4bd085cf4a0a935e2aaa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_d16d026731104f40ad77f1c7b8f77bf6", - "style": "IPY_MODEL_7aba7921241e41af9a32cbe042699485", - "value": "test_episodes" - } - }, - "d91d58d65e864faa90c9cc7bfd2959b0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_182c5797541f4476bb02c95a710f1bca", - "step": null, - "style": "IPY_MODEL_6dc0399123f94dd1831a2b2cfb6c3078", - "value": 10 - } - }, - "d932e823fc31419d9d00cb89736f8a5f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_a0b2c18704554c60bfb62c5c7ea46e34", - "IPY_MODEL_f80bd1f80d99494595e88c9fc5f055d2" - ], - "layout": "IPY_MODEL_f3645a595f8c4e1f82d71ed6f97e7dd6" - } - }, - "d9864398064d4a4ea93f2f985bf70bb5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget004" - } - }, - "d99dceda8ae6483f8df298525d45be82": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "border": "solid" - } - }, - "d9b467355fa940af8f164b0b53137582": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "da04b8e9a4464f7ea141e41904fa3b0f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "da5536ed85464ee5a97c44660b985348": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "GridBoxModel", - "state": { - "children": [ - "IPY_MODEL_469da089cf804101a4cbc570975a1aed", - "IPY_MODEL_dc4226a0086147b29ba43f099ccad551", - "IPY_MODEL_7df23ef826fb4c568071b0667bafcd3b", - "IPY_MODEL_f5879b9ebaab4df9b53830cef8c25e62" - ], - "layout": "IPY_MODEL_de78a9211dba417182808fc83d0ebbf8" - } - }, - "da5694fd870b41e79f41ebc7d7b8db5e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget002" - } - }, - "dc12042cc1bb40c98a69bef90468797a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "dc4226a0086147b29ba43f099ccad551": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_f5c5c8e022aa4f239006a40e2ac8b990", - "IPY_MODEL_b5214d589d704727964cdb67261b2d47" - ], - "layout": "IPY_MODEL_b2ed3221465c4c7097b79683b8e5c5f0" - } - }, - "dca0afd22296462f8a0e11b82566f289": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "dceb338b27c742cd8733350448a2e798": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "dd3cb8ec44e2454a9fd787b26a794aa2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_39c394badc7246fdb12032649f71a1b6", - "style": "IPY_MODEL_ce96b4fa2ae14c6f8f4af830f9442000", - "value": "learning_rate" - } - }, - "dd51349042bc4341b061da02df9f8be2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget003" - } - }, - "dd631605869640d9b8564da50fd7f14e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_2dece16eb4994e5082a1cbeeea4163d0", - "style": "IPY_MODEL_d439f3de7aeb4f059483dedb8aca131a", - "value": "Adam" - } - }, - "ddaf2150308c4af2876f9f423d0b803d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "ddba268ea0db428898643ae0f9a259a3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "stretch", - "display": "flex", - "grid_area": "widget006", - "justify_content": "center" - } - }, - "de78a9211dba417182808fc83d0ebbf8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"", - "grid_template_columns": "repeat(2, 1fr)", - "grid_template_rows": "repeat(2, 1fr)" - } - }, - "de8a6e2e9cb447439055e987582fc63e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "def02ee29d9a44b19a1fd20f8a4be1a0": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "df228d4f3b644bb081011555c9f36485": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget010" - } - }, - "df84370f89e949518569f900854e2510": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "e00c049b23f34848a62ee225b63ec0b7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "e09e0ff65ebf454b80a965aaa0f61d32": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_5526ed8ea7b4499eadc0bbb165d7bbc4", - "IPY_MODEL_d932e823fc31419d9d00cb89736f8a5f" - ], - "layout": "IPY_MODEL_54927f9f2cde4416bf0e3b782fbd5118" - } - }, - "e0a1f12f4f0e4e31adc281b1fe6dee11": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "e14f5611fa9242af879512207669394f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "e1d753092ae3420ead7a3086b9405f2a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_e9add15a402448ee8f55d0a65f2d460c", - "style": "IPY_MODEL_ddaf2150308c4af2876f9f423d0b803d", - "value": "Pendulum-v0" - } - }, - "e1f03c622ff64b3bb4e59fc54e7898a6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_b5bcfb7873f44eba8f8f90e018f09b6a", - "style": "IPY_MODEL_c3233dc4967548279ff54f73e91e27a0", - "value": "0.0" - } - }, - "e1f175e02edf40f39585c485ec11cbff": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "height": "250px", - "width": "350px" - } - }, - "e210fdbc53d246a2ae55da6a3689745b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "e224793bc1524f0c91ce3d7ef0e98f8e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_4d8d22e583c64179817ad9c514bd4490", - "style": "IPY_MODEL_f91418c725364297a60aa4983253ae07", - "value": "0.0002" - } - }, - "e255dc6e7af7487e8a2729f670bffd8a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget008" - } - }, - "e27f2db74f874171acd272cf848ddc80": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget011" - } - }, - "e2d50772ac80494ea293f047efb33527": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_a592a51f7f3d40cf81de06ff0c9e1546", - "style": "IPY_MODEL_d5a3129aed5d47718c478523d35359ad", - "value": "0.0002" - } - }, - "e2ecea0189544c41a0ca172743cf16a1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_1adbcde168d04bcdaed1c410feae74ac", - "step": null, - "style": "IPY_MODEL_4e6414fcd34b454e94c982f7233402a7", - "value": 100 - } - }, - "e318e3ad8e11430d840261e7eb1b540e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "SliderStyleModel", - "state": { - "description_width": "" - } - }, - "e35bce23c28f4af3b0d4dce2266ed2e8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "e3adb676dd9b48a6bd4e895ac644b653": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "e41fe8ee1bf04764abe02428057a540a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "e4665eee9731436a839eaebea246f048": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_0d95601931d94f8cac55349f5886038a", - "style": "IPY_MODEL_ee84c4f73d284618aa3241fcb758da9f", - "value": "Box(1,)" - } - }, - "e467ed3285684035a013df63ebb6b422": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "e527873f8829445dbdb49e0710132c63": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "e53d3b32848c4872a5e1254a2ed080f1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "e57f860aafca4775a03574208f4944b7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_fd1693effce0420c8f4bbbebde0ef7c3", - "IPY_MODEL_4bbe95c5e6b34795a2058cc7bf7416f9", - "IPY_MODEL_9ee876553e424052a509a2daed8da1c6", - "IPY_MODEL_07b040199f664673b2cb1b45c5a5af34" - ], - "layout": "IPY_MODEL_41425cf814dc44c49ac901aeec4c668f" - } - }, - "e62a214128d34799be2e1cc2cdb98b8c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "e6958eae462d43d8bdb9c6227deddcc7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "e6c798aa900740009741c67dfccb0d92": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_df228d4f3b644bb081011555c9f36485", - "style": "IPY_MODEL_63d55c74d6ed493abe58361958b23046", - "value": "0.999" - } - }, - "e8260cb1f55049a49bdaf024528d43c4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget001" - } - }, - "e835260b70924edd959ac38cbdaa50d3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget013" - } - }, - "e8b87d816ccb409083b0c522ef0bd9dd": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget004" - } - }, - "e904337542fd4e5d8187b9b9190b7522": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatTextModel", - "state": { - "description": "Manual input:", - "layout": "IPY_MODEL_b50b99192c944a348df722c9f5cdaa90", - "step": null, - "style": "IPY_MODEL_831ed45407f74193acc07dacada162a9", - "value": 50 - } - }, - "e923a0f829b14a6b83f8ef159b7e1e67": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_e27f2db74f874171acd272cf848ddc80", - "style": "IPY_MODEL_b3a43d5f73df48299fdf24a855c623a7", - "value": "decay" - } - }, - "e944a76d793541058cf5f32563847fb3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "e9794b57be6c4c0e981a017d3fa82a36": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "e9add15a402448ee8f55d0a65f2d460c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "e9d6d91ceda64a63b9fe358e90337820": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_fe785154b75c4badbab0d946f05802cf", - "style": "IPY_MODEL_78f5897896d144fe839fafd65e76816e", - "value": "Environment Information" - } - }, - "eb54eb7b3c674e67b10610ce2aaf309a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_547d2113aae04e20ba41d30deb33ec5f", - "style": "IPY_MODEL_7b48f1fae96e40519787018ed628b99b", - "value": "1e-07" - } - }, - "eb5620a9d421450a9c0b629c52d3d8ba": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "eb5fdb48aa1d483fa9acf05a229ef307": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "ebff747fea3f4cf2abb9efcd9f998ddb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "ec1d469669a2411f9a5a7a1774480576": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_4749f46df2c4438e874ed6912a4d7ef1", - "style": "IPY_MODEL_7cc3bf6293494425b70569d1eca3af03", - "value": "beta_2" - } - }, - "ec6b04eac2cd4e5a821244a954846a39": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "ecc6da99cf7944f5a5a6cfd1f0516aa6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "ed746bfae28741e9ae1d450dd1394423": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "center" - } - }, - "ee84c4f73d284618aa3241fcb758da9f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "eef437964b4e4fa29ea42afc6b9a69ce": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_8f01f6cb90754bcb8b2e64809505291d", - "style": "IPY_MODEL_ce777268358f48608666122680449e3c", - "value": "Box(1,)" - } - }, - "ef95b43fb5cd436cb6f737f2defc8e38": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_747e88ebfefc4efb95f60f63e725dcc1", - "style": "IPY_MODEL_078c44ca72d24661bbeb9921196ddace", - "value": "The action space is continuous." - } - }, - "f10d3787733a4ece9120c3641017114b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "children": [ - "IPY_MODEL_26036b1a064245a6a1cef60ec7d39376", - "IPY_MODEL_af4e53453b1a434e9426fd63d61888c5" - ], - "layout": "IPY_MODEL_70c300868924433094e74b74d260a4a2" - } - }, - "f1888922c93c435f8bac11033ae325e9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_455c6fed537d48b188edef0200ab0fb1", - "IPY_MODEL_2e65a763e5db40ca8969c36950c0d9bd", - "IPY_MODEL_8c27b4b759354d64b25bcb3462c444ef" - ], - "layout": "IPY_MODEL_74d03d1491d4451d879384ab357f33a9" - } - }, - "f1985e262a7d401ea97c903091713789": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "f2612900bd944258af3be77cacc7a46b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "f29a7f4ff2a74bbf8d6485cbfb086152": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_bf3a856d0c5f4d47abf596f528a2d947", - "style": "IPY_MODEL_10685777c5384041b62b4ce3aa26bf6e", - "value": "Environment Selector" - } - }, - "f29ba87ee02f4fc38760b98a32e20581": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "f2db93e6094b47d0bfce3821b33d707a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "f2ffc80dd5074916b1a69e9de91149f9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget001" - } - }, - "f3645a595f8c4e1f82d71ed6f97e7dd6": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_content": "center", - "align_items": "center", - "border": "dotted" - } - }, - "f401d82a291f4cdb9d44cf62f1c48978": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_86e357397076415ba3ac239b26a8bc8f", - "style": "IPY_MODEL_faea715cb8894b8ca444f80d17c07e12", - "value": "False" - } - }, - "f48e72d8d0b5470798d5faeed3dc8e40": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "f4d0297192f5464bac7ab02b3dabed2c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "f5879b9ebaab4df9b53830cef8c25e62": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_afeba836a14d4fb6a7c5407794848b80", - "style": "IPY_MODEL_9fc5c513843a4c0fa7ae9c8b37c3b4ff", - "value": "./model/AC-Pendulum-v0" - } - }, - "f5c5c8e022aa4f239006a40e2ac8b990": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_e527873f8829445dbdb49e0710132c63", - "max": 1.8, - "readout_format": ".1f", - "step": 0.1, - "style": "IPY_MODEL_2b0d8567d4aa4e53a5837284b315cc58", - "value": 0.9 - } - }, - "f63f7fca433e4d32ad6252416895155b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "f6f23b9ba55946d0aa626d62ba4bbdf5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "f72ef10c1acd44608d2db2b932f2b167": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "f74c2a3b52114bbc80056d7097731209": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatSliderModel", - "state": { - "continuous_update": false, - "description": "Slider input:", - "layout": "IPY_MODEL_80d9bf94c37c49708820ccb5a2aa8f8b", - "max": 200, - "readout_format": ".0f", - "style": "IPY_MODEL_731d299fb9dd45c1a41a5d4df4f41f94", - "value": 100 - } - }, - "f77e6fff86704faea6c01e0262104c70": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_0a575cd57803474a9574922e07d3d316", - "IPY_MODEL_8d025735275c4dfdbbbf2d491e727c08" - ], - "layout": "IPY_MODEL_5b759ba6fc8f451c97ee15467069a6ed" - } - }, - "f80bd1f80d99494595e88c9fc5f055d2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_9dd1d4acaad44f16b1bbf0693ee9fad5", - "IPY_MODEL_1cb88e139a0642afb2f3c958dff539aa", - "IPY_MODEL_2e6e71650a6a48878fce055c8e563538", - "IPY_MODEL_fe6a7094bdd649e6b5270a701e12253a" - ], - "layout": "IPY_MODEL_bfa16a837ebd4ec795d5aa0a893d5298" - } - }, - "f834d6547a954a478d9e755653e4f5a1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "f8a20f2f4b8b4c03857bcd85bf96b136": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "f8eb99b0291b45dda1b391805141e984": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_5afcc13ec3d94e6299bd06fb87ed7885", - "style": "IPY_MODEL_d4c91e304ca34f88a4c959ecc4683678", - "value": "beta_1" - } - }, - "f91418c725364297a60aa4983253ae07": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "f9a9a8529629435f926e28c9e2ff6d21": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "f9b983bef3a14087b6d1f966b8b041ed": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "f9cd83ba01bb440b9510e0ada3cfd4aa": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "fa3877a284354fd08f33d320314b6765": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_57f97e2ebec542f8b297365916bf571e", - "style": "IPY_MODEL_454021a337164bae8a96f5a5a7749b78", - "value": "decay" - } - }, - "faea715cb8894b8ca444f80d17c07e12": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "fb06877af7ae451baefc12dfd27d9348": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "border": "dotted", - "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"\n\"widget013 widget014\"", - "grid_template_columns": "repeat(2, 1fr)", - "grid_template_rows": "repeat(7, 1fr)" - } - }, - "fb19638e8a38465f844aaf06c6378b29": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "fbd450c8b01f4ab9ae7ea1caa129bd66": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_content": "center", - "align_items": "center", - "border": "dotted" - } - }, - "fc20a5f1e967425c840960c1948f00c8": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget011" - } - }, - "fc69d16aa7e547b09859e2ca7dbfbde8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_97f58376ed524fab85dde1ea5f67ee17", - "IPY_MODEL_0dc03ae5db46484a85272ce1899e53c0" - ], - "layout": "IPY_MODEL_81f34a95028440608c8a5a307cd7ee9b" - } - }, - "fc6a2f4827034d64b99a15547f3d9f43": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_cf3de6c59d124068af4aef37293c26e2", - "style": "IPY_MODEL_1222c8a942134f83aa262d9b321ee413", - "value": "render" - } - }, - "fc83fd9df36b4c0fa6ee544fe520cde7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_area": "widget007" - } - }, - "fca1d8802f264b48aa3f7bef2b5f5b81": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "layout": "IPY_MODEL_c096b60cb96b4aa68be8728e6feb2366", - "style": "IPY_MODEL_7532b84aea3a4f4290efa4b0369e846a", - "value": "Algorithm Parameters" - } - }, - "fca98009fe56433b97f1fd16969f9a35": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_43f9446733e242f1977bbe394ddc479b", - "style": "IPY_MODEL_660e8c250f974ff685128c61b3d57fe3", - "value": "Environment settings" - } - }, - "fd1693effce0420c8f4bbbebde0ef7c3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_4fa0861e758940d9b9c2775304ebb140", - "style": "IPY_MODEL_661fd55473c0431aa9dffd6876d1d559", - "value": "Input(shape=(None, 3), name='input_layer')" - } - }, - "fe547223f16e423fa8493d4c6ae577ba": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "VBoxModel", - "state": { - "children": [ - "IPY_MODEL_f77e6fff86704faea6c01e0262104c70", - "IPY_MODEL_9e37b046f2d841dd9572b2284a729bf5" - ], - "layout": "IPY_MODEL_48a97cf1c4a44a858c3376f962060321" - } - }, - "fe6a7094bdd649e6b5270a701e12253a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "LabelModel", - "state": { - "layout": "IPY_MODEL_a860d9c958c646aa89ae598dc67eaa08", - "style": "IPY_MODEL_85514e8a938240e7b2df7c2a8ad6b6e8", - "value": "Dense(n_units=1, No Activation, in_channels='64', name='dense_1')" - } - }, - "fe785154b75c4badbab0d946f05802cf": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": {} - }, - "ff06931e66b544389c8f409734b472e3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "grid_template_areas": "\n \"net_label net_info\"\n \"opt_label opt_info\"\n " - } - }, - "ff0e9f4940eb4b57bd99d96059b5e194": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "description_width": "" - } - }, - "ffce2434eb114cd1a7f6961dd71ff755": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "align_items": "center" - } - } - }, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Interactive Hyper-parameter Configuration\n", + "This is a use case provided by RLzoo to support an interactive hyper-parameter configuration process. It is built with *ipywidgets* package, so make sure you have the package installed:\n", + "\n", + "```! pip3 install ipywidgets==7.5.1```\n", + "\n", + "You just need to **run** each cell (Shift+Enter) and **select** the sliders or dropdown lists to configure the hyper-parameters for the learning process, for whichever algorithm and environment supported in RLzoo. \n", + "\n", + "It follows four steps:\n", + "1. Environment Configuration\n", + "2. Environment Information Display and Algorithm Configuration\n", + "3. Algorithm Parameters Display and Learning Parameters Configuration\n", + "4. Launch Learning with Visualization \n", + "\n", + "Tips:\n", + "To stop the learning process and start a new one, you needs to restart the kernel (always work) or interrupt the kernel (not always work). \n", + "\n", + "Have fun!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "1. Environment Configuration\n", + "-----------------------------\n", + "Run a environment selector and select a environment you like.\n", + "\n", + "Tips: no need to rerun after selection, directly go to next cell.\n", + "\"\"\"\n", + "\n", + "from rlzoo.interactive.common import *\n", + "from rlzoo.interactive.components import *\n", + "from rlzoo.algorithms import *\n", + "from rlzoo.common.env_wrappers import build_env, close_env\n", + "env_sel = EnvironmentSelector()\n", + "display(env_sel)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "2. Environment Information Display and Algorithm Configuration\n", + "--------------------------------------------------------------\n", + "Run this code to create the enivronment instance.\n", + "\n", + "Tips: need to rerun every time you want to create a new environment with above cell, \\\n", + "because this cell builds the environment.\n", + "\"\"\"\n", + "\n", + "try:\n", + " close_env(env) # close the previous environment\n", + "except:\n", + " pass\n", + "env = build_env(**env_sel.value)\n", + "print('Environment created!')\n", + "display(EnvInfoViewer(env))\n", + "\n", + "# run a algorithm selector and select a RL algorithm\n", + "alog_sel = AlgorithmSelector(env)\n", + "display(alog_sel)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "3. Algorithm Parameters Display and Learning Parameters Configuration\n", + "----------------------------------------------------------------------\n", + "Call the default parameters of the selected algorithm in our environment and display them, \\\n", + "then select learning parameters.\n", + "\n", + "Tips: need to rerun after you created a different algorithm or environment.\n", + "\"\"\"\n", + "\n", + "EnvType, AlgName = env_sel.value['env_type'], alog_sel.value\n", + "alg_params, learn_params = call_default_params(env, EnvType, AlgName)\n", + "print('Default parameters loaded!')\n", + "\n", + "# see the networks, optimizers and adjust other parameters\n", + "algiv = AlgoInfoViewer(alog_sel, alg_params, learn_params)\n", + "display(algiv)\n", + "\n", + "# run this to generate the algorithm instance with the algorithm parameter settings above\n", + "alg_params = algiv.alg_params\n", + "alg = eval(AlgName+'(**alg_params)')\n", + "print('Algorithm instance created!')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "4. Launch Learning with Visualization \n", + "---------------------------------------\n", + "Run the cell to train the algorithm with the configurations above.\n", + "\"\"\"\n", + "\n", + "learn_params = algiv.learn_params\n", + "om = OutputMonitor(learn_params, smooth_factor=algiv.smooth_factor)\n", + "display(om)\n", + "with om.print_out:\n", + " alg.learn(env=env, plot_func=om.plot_func, **learn_params)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# whenever leaving the page, please close the environment by the way\n", + "close_env(env)\n", + "print('Environment closed')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "00663174be1342fbbd29bc99cdd6d3aa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "00ead8f3c1ea4020930b11c3bde3dd48": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_543b543dd8bb4fcb9dc9f4a16ac4bd6e", + "style": "IPY_MODEL_f63f7fca433e4d32ad6252416895155b", + "value": "max_steps" + } + }, + "0106cced0fe54fbb9a3a261b11941cce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_593926166a704759992244f9732d0f8d", + "style": "IPY_MODEL_4a1bc5d7007848cb89e08eff1479ddf8", + "value": "Learn Parameters" + } + }, + "012eeb7c3bab46d9baa05356cd4ff0f6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "0143906a10054b1594675c3674642d83": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "014bf4270fea44b6aad4c80c7a5979b7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "019cd764de374cb382236f88a5d204af": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "01cece59d650454b9cf09d03e85a6a10": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_81d1f55272ef4977b06be173bdd59b8c", + "style": "IPY_MODEL_e62a214128d34799be2e1cc2cdb98b8c", + "value": "Network information:" + } + }, + "0201bde3e922471d9bb86857be61df95": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_8178676fb5e441ec92464938695643a8", + "step": null, + "style": "IPY_MODEL_0143906a10054b1594675c3674642d83", + "value": 24 + } + }, + "02904d8bc2d442deb3da0b5e6e0363a9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "04461564de8c45d6af4c6055f7b4c17f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "RadioButtonsModel", + "state": { + "_options_labels": [ + "train", + "test" + ], + "index": 0, + "layout": "IPY_MODEL_520b2e1af36547edbae1352d82099fda", + "style": "IPY_MODEL_2c9a721e0f084f8f8f437a5d4d875e3f" + } + }, + "04abdee05e514880bb74dfe64bca36ff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_9384c24875c24e5b8be37d4c55e04820", + "style": "IPY_MODEL_bebb739676c74aacb396889de39592e6", + "value": "0.9" + } + }, + "0580852520e142a89d7b42c50bfef6a1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "066c122ea5f64991b7347279a79e8061": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "06d5c4249f3d404793fe2defc8eb0051": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_5af1a3e17ac64264905701b109c013e2", + "IPY_MODEL_691c17934ca3435eb36a2d84d15ecdf7" + ], + "layout": "IPY_MODEL_7d163d682d5744d6ac7be041fb66c158" + } + }, + "070bc781a91449c6a7fb227586d347e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_494deb5503e842b78948ed2c14e28e3e", + "style": "IPY_MODEL_2d1f0d1b81ee4e1f85ae2f777dcd0db9", + "value": "beta_2" + } + }, + "07377f1ec0e74dd4897d484914a44f99": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "077609b632e64492acbc9a009222e086": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "078c44ca72d24661bbeb9921196ddace": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "07b040199f664673b2cb1b45c5a5af34": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_47513573787c4ab1bfafee8a38450355", + "style": "IPY_MODEL_0abdf6aca8e44b2f96d9e278ce60a016", + "value": "Dense(n_units=1, tanh, in_channels='64', name='dense_2')" + } + }, + "07b0e1377c414989a1d7ce1bf1da1c4e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_0bd6e0b89391415fa6fc2c7f7fbf3bd3", + "style": "IPY_MODEL_da04b8e9a4464f7ea141e41904fa3b0f", + "value": "0.999" + } + }, + "080346c4f0ae457182549d3c68aaaaea": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "layout": "IPY_MODEL_23d66d78336541bf8b3f863dc3e554d4", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "Training... | Algorithm: AC | Environment: Pendulum-v0\nEpisode: 0/10 | Episode Reward: -1730.5698 | Running Time: 1.6412\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_actor\n[TL] [*] Saved\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_critic\n[TL] [*] Saved\nEpisode: 1/10 | Episode Reward: -1738.3357 | Running Time: 3.3340\nEpisode: 2/10 | Episode Reward: -1744.1233 | Running Time: 4.9608\nEpisode: 3/10 | Episode Reward: -1854.8743 | Running Time: 6.5518\nEpisode: 4/10 | Episode Reward: -1678.3274 | Running Time: 8.1632\nEpisode: 5/10 | Episode Reward: -1833.9245 | Running Time: 9.7298\nEpisode: 6/10 | Episode Reward: -1805.7677 | Running Time: 11.3628\nEpisode: 7/10 | Episode Reward: -1822.8594 | Running Time: 12.9569\nEpisode: 8/10 | Episode Reward: -1409.2653 | Running Time: 14.5867\nEpisode: 9/10 | Episode Reward: -1752.4231 | Running Time: 16.2574\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_actor\n[TL] [*] Saved\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_critic\n[TL] [*] Saved\n" + } + ] + } + }, + "081136f1075542a3999ce83eba68fdb5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_1ef9aa26484548e99e94bb3d8aae3cce", + "IPY_MODEL_45847f561d154d999d93f170524e2bdf", + "IPY_MODEL_9ce0362f9fac4e45a87ebe7a085a24af" + ], + "layout": "IPY_MODEL_ab2e3b3dc5024debb0c00c3d27d48a8b" + } + }, + "08f5684d8e194916ac04ed379e2bf022": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_019cd764de374cb382236f88a5d204af", + "step": null, + "style": "IPY_MODEL_c4662ffdadef4c7d82aba5ddca1fbfda", + "value": 0.9 + } + }, + "093fd11986764d78ad5dcf1429a496c9": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "layout": "IPY_MODEL_0b19536128d34993b9a3354b2a05e2dc", + "msg_id": "8f19b370e7f641249abb608a3c84b213", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "Training... | Algorithm: AC | Environment: Pendulum-v0\nEpisode: 0/100 | Episode Reward: -1730.5698 | Running Time: 1.6647\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_actor\n[TL] [*] Saved\n[TL] [*] Saving TL weights into ./model/AC-Pendulum-v0/model_critic\n[TL] [*] Saved\nEpisode: 1/100 | Episode Reward: -1738.3357 | Running Time: 3.3156\nEpisode: 2/100 | Episode Reward: -1744.1233 | Running Time: 4.9611\nEpisode: 3/100 | Episode Reward: -1854.8743 | Running Time: 6.5757\nEpisode: 4/100 | Episode Reward: -1678.3274 | Running Time: 8.2029\nEpisode: 5/100 | Episode Reward: -1833.9245 | Running Time: 9.7915\nEpisode: 6/100 | Episode Reward: -1805.7677 | Running Time: 11.3793\nEpisode: 7/100 | Episode Reward: -1822.8594 | Running Time: 12.9897\nEpisode: 8/100 | Episode Reward: -1409.2653 | Running Time: 14.5941\nEpisode: 9/100 | Episode Reward: -1752.4231 | Running Time: 16.2545\nEpisode: 10/100 | Episode Reward: -1595.9812 | Running Time: 17.8784\nEpisode: 11/100 | Episode Reward: -1750.5559 | Running Time: 19.4594\nEpisode: 12/100 | Episode Reward: -1780.9001 | Running Time: 21.0874\nEpisode: 13/100 | Episode Reward: -1645.4007 | Running Time: 22.7261\nEpisode: 14/100 | Episode Reward: -1684.3441 | Running Time: 24.3810\nEpisode: 15/100 | Episode Reward: -1764.5074 | Running Time: 25.9965\nEpisode: 16/100 | Episode Reward: -1688.8096 | Running Time: 27.6359\nEpisode: 17/100 | Episode Reward: -1582.7040 | Running Time: 29.2999\n" + } + ] + } + }, + "094d34956035446984a6cb8a6efc22a7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "09c74a8b5d1a43828034e148d2edfbfc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "09eb8f946d00416dace2ee661ad55fbd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget001" + } + }, + "0a179f0e33df4522b9286a546e181b60": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_f2ffc80dd5074916b1a69e9de91149f9", + "style": "IPY_MODEL_8784dbc322c7455aaef2b352bae2f205", + "value": "name" + } + }, + "0a21d0f35913467a9b266a75d2af8db0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "0a575cd57803474a9574922e07d3d316": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_5532430429754176a10d6ab53ba4b6d9", + "style": "IPY_MODEL_e35bce23c28f4af3b0d4dce2266ed2e8", + "value": "Learning curve" + } + }, + "0abdf6aca8e44b2f96d9e278ce60a016": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "0af6103ca9e44bb4a44c62b84b39415f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_4112e1653afc41a795418fc54377af6c", + "IPY_MODEL_10d4f1af65b0492594efc926d9976e59" + ], + "layout": "IPY_MODEL_1e197bc7d05a4518969ee7d3f97f211c" + } + }, + "0b081708649d446ab37f522f5a019e19": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "0b19536128d34993b9a3354b2a05e2dc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "border": "1px solid black", + "height": "300px", + "overflow": "scroll", + "width": "60%" + } + }, + "0b1a53d081f547f8ab913cd15fe70058": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "layout": "IPY_MODEL_d99dceda8ae6483f8df298525d45be82" + } + }, + "0bd6e0b89391415fa6fc2c7f7fbf3bd3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget010" + } + }, + "0c0d922d9ed14199ab9b8f48b9e8ba1d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "IntTextModel", + "state": { + "description": "multi envs:", + "layout": "IPY_MODEL_f2db93e6094b47d0bfce3821b33d707a", + "step": 1, + "style": "IPY_MODEL_454f999c2ca44e7b86263594806f6191", + "value": 1 + } + }, + "0c64eb2046714b6c885261124bcb09f8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_167de1c7956c4ede9fa6a584404bc568", + "style": "IPY_MODEL_5469680f21e44e77b1092b8354d9aee0", + "value": "Dense(n_units=1, No Activation, in_channels='64', name='dense_1')" + } + }, + "0cabfd585d5d4421a05805698bc1c8ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "0d95601931d94f8cac55349f5886038a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "0dc03ae5db46484a85272ce1899e53c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_8ca1f8992583484a8a0ff2f7f46afee2", + "IPY_MODEL_99ac959475eb4f75b586ed6599b99113", + "IPY_MODEL_2ab7b4c8b49a4163b5521127d8329674", + "IPY_MODEL_9689f9977c7f455282a9831bcd81905c" + ], + "layout": "IPY_MODEL_eb5fdb48aa1d483fa9acf05a229ef307" + } + }, + "0e74af77352a4b40b0f9e5163d92a836": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget007" + } + }, + "0eb34e6e2b07401dae9a2bfa4f1d49df": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_47ed36f4da904759bb9adcf9f1f1685b", + "style": "IPY_MODEL_7dc1333733194435934e6ca098ede1ad", + "value": "False" + } + }, + "0ec6f6b7c7c84bb4b54e92db8342ce85": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_options_labels": [ + "atari", + "classic_control", + "box2d", + "mujoco", + "robotics", + "dm_control", + "rlbench" + ], + "description": "env type:", + "index": 1, + "layout": "IPY_MODEL_bfdfc9d77a654743a9ebdfc08ab167da", + "style": "IPY_MODEL_ce5b0166c393435a840819472b761b8c" + } + }, + "0fb529fd883648edb15d72a94813126e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_a01f34500cfc486289f3334e3cd222df", + "IPY_MODEL_d6ddb43e654a421ead72beacfae7145e", + "IPY_MODEL_b106f6f6a7f047a4a11ec9f9a23804e2" + ], + "layout": "IPY_MODEL_ffce2434eb114cd1a7f6961dd71ff755" + } + }, + "1022056a831a477e91366a9deda960de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_ecc6da99cf7944f5a5a6cfd1f0516aa6", + "style": "IPY_MODEL_ebff747fea3f4cf2abb9efcd9f998ddb", + "value": "Dense(n_units=64, relu, in_channels='3', name='hidden_layer1')" + } + }, + "10685777c5384041b62b4ce3aa26bf6e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "10b2a84971164564ac50d9f53bd98579": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "10d4f1af65b0492594efc926d9976e59": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_50ce374ed2fc4f2ebc2c156c16ba4f38", + "IPY_MODEL_11337137fc3b4e19b06d48508495d2ce", + "IPY_MODEL_fc6a2f4827034d64b99a15547f3d9f43", + "IPY_MODEL_1846a28797b64a7a8266f33f497550d4", + "IPY_MODEL_00ead8f3c1ea4020930b11c3bde3dd48", + "IPY_MODEL_89ae5379ee8b4e2d92f116a018b9420e", + "IPY_MODEL_d6a04d9b77b54ae89af21fa5551e205e", + "IPY_MODEL_b42c755dec514e6fa26ca97f3f0ef923", + "IPY_MODEL_d915d378018e4bd085cf4a0a935e2aaa", + "IPY_MODEL_162bfef08113403d82be4e50b362acb9", + "IPY_MODEL_30d87705b48648089aaa078817a89da2", + "IPY_MODEL_bdb404863da84bdf870e550898f54848" + ], + "layout": "IPY_MODEL_81a50427a5384feeaaee374a19ad5931" + } + }, + "11337137fc3b4e19b06d48508495d2ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "RadioButtonsModel", + "state": { + "_options_labels": [ + "train", + "test" + ], + "index": 0, + "layout": "IPY_MODEL_da5694fd870b41e79f41ebc7d7b8db5e", + "style": "IPY_MODEL_3a389cd3e9254722a3bef185d92c9ac4" + } + }, + "1202663af1bf4653bc967824c8574e1a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_98f2c9b34e884cada9e2eedac93e1912", + "style": "IPY_MODEL_67a79ba4cbf84418967857e237a5a1be", + "value": "Environment name:" + } + }, + "1222c8a942134f83aa262d9b321ee413": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "125f5c3fd35e49339e558a30a39a9f8a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_fc83fd9df36b4c0fa6ee544fe520cde7", + "style": "IPY_MODEL_3f7607f9884f482498bb28a91df5ab02", + "value": "beta_1" + } + }, + "12a0f20f2ecd423889594f36b15647f1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "12e50eba7f3e4e9f888416f46172b60f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "145001c5826a41cd989997ea61244ca1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "14a01344ad1b48b3becfe74fa709a0c6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_40848c8562dc485fa88be8cf89c7a5e2", + "style": "IPY_MODEL_a7d8b17ff9fd43298bc30e0471ade94f", + "value": "Input(shape=(None, 3), name='input_layer')" + } + }, + "1537ab75a9dd4f429ffb3812c485116f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_0a179f0e33df4522b9286a546e181b60", + "IPY_MODEL_91d86c9ddbfa4acdaf18e13d8adf3862", + "IPY_MODEL_b4d945e45eae41ceb40de345939615ad", + "IPY_MODEL_715b10d741354c8db506fb8ba945a074", + "IPY_MODEL_b92bc4065ee4473aa6e1b4051e044dee", + "IPY_MODEL_c2160078393b421d9f3a4343f37307e2", + "IPY_MODEL_125f5c3fd35e49339e558a30a39a9f8a", + "IPY_MODEL_04abdee05e514880bb74dfe64bca36ff", + "IPY_MODEL_070bc781a91449c6a7fb227586d347e6", + "IPY_MODEL_2bb83c7012914171b4b76d559b92034c", + "IPY_MODEL_fa3877a284354fd08f33d320314b6765", + "IPY_MODEL_5446746816dd4edf8dffb29995d15715", + "IPY_MODEL_3755df840c214a33941879b316489adf", + "IPY_MODEL_776cdbcecc004924a856eb45ec0a5699" + ], + "layout": "IPY_MODEL_4b5dc49fbc1743c8abe6cded3f9ed703" + } + }, + "159f94f25de5436aafa6fec3c88e3356": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_0106cced0fe54fbb9a3a261b11941cce", + "IPY_MODEL_cfb6b6bcedad4f61893206fb1eb28385" + ], + "layout": "IPY_MODEL_89880b2c3e03469da53b8a7e9e2e930b" + } + }, + "15ae64b32d794189a34bba91e2f7a15b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "162bfef08113403d82be4e50b362acb9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_474e0de897334eb69236cc05ae69f164", + "IPY_MODEL_aafbebe0ec5b4425acf54f0ad9f6c80f" + ], + "layout": "IPY_MODEL_66bc7fd58a2743a0960e9dd5df378998" + } + }, + "167816e5912f4ea18d96b6e468d82ae7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "167de1c7956c4ede9fa6a584404bc568": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "1764805129704afcb7c170e877b81788": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_fe547223f16e423fa8493d4c6ae577ba", + "IPY_MODEL_093fd11986764d78ad5dcf1429a496c9" + ], + "layout": "IPY_MODEL_2bea049f9ec74da0bcf2a7eeffce8720" + } + }, + "182107ee16aa4bfba497dd033e347d65": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_e6958eae462d43d8bdb9c6227deddcc7", + "style": "IPY_MODEL_f9a9a8529629435f926e28c9e2ff6d21", + "value": "Observation space:" + } + }, + "1826b147229c4a96b6603cc13978a090": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "182c5797541f4476bb02c95a710f1bca": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "1846a28797b64a7a8266f33f497550d4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "CheckboxModel", + "state": { + "disabled": false, + "indent": false, + "layout": "IPY_MODEL_e8b87d816ccb409083b0c522ef0bd9dd", + "style": "IPY_MODEL_167816e5912f4ea18d96b6e468d82ae7", + "value": false + } + }, + "18470dca56a94ced8388c8eec402515f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_294896e2ec5f413e9e23d9ec81e6bbbf", + "IPY_MODEL_8c59866961674911b2157bded443e366", + "IPY_MODEL_261d86e673814c6b9c6ed7b921861867", + "IPY_MODEL_6d5b0a5b26874cfd874c4a0bdf307eff" + ], + "layout": "IPY_MODEL_b58381d8050044ee9df6c0857e3a06e4" + } + }, + "18a7121ba72e42af9a496a39fb8c6f6a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "18ea002dd43344a5864f8a8651ceeaeb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget007" + } + }, + "19b0d8173d9141e0a0db8d0b2110c98c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"", + "grid_template_columns": "repeat(2, 1fr)", + "grid_template_rows": "repeat(6, 1fr)" + } + }, + "1a3aa6da2cad4cfd9696b32125ab645b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "1adbcde168d04bcdaed1c410feae74ac": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "1b48b0f90cef4077aa20b9ee8be52e9b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget002" + } + }, + "1c09f9523eb2469ab864ddcd5f15f417": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "1c75d4a07143476588ce4826116ea8ee": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "1cb1d8e98bef410e85502ad2edb46c45": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"", + "grid_template_columns": "repeat(2, 1fr)", + "grid_template_rows": "repeat(2, 1fr)" + } + }, + "1cb88e139a0642afb2f3c958dff539aa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_9705108e9dd540fa8e02c1933e03eadd", + "style": "IPY_MODEL_2126fce329534e2b98f039a35e99344a", + "value": "Dense(n_units=64, relu, in_channels='3', name='hidden_layer1')" + } + }, + "1d03aaf95d45497ca74e337a82632cee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_2ee89b46bdc146f9b9f4f48f5874a349", + "style": "IPY_MODEL_e0a1f12f4f0e4e31adc281b1fe6dee11", + "value": "0.9" + } + }, + "1db128fafd984258b040b5295b477f0d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "1dbbcf0744194117b3463d5ae8af00ef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "1e197bc7d05a4518969ee7d3f97f211c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "1e327c8e4b844c2fbb017a5544fa678e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget004" + } + }, + "1e6d0c80ceaa4e58846e9f554371b363": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "1eec2203d3bf49c2876604c21291cc18": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_d20f2266d6fc44df988c78b63b202a81", + "style": "IPY_MODEL_5228a7a8160f421f846e2d7d06c9d159", + "value": "1e-07" + } + }, + "1ef9aa26484548e99e94bb3d8aae3cce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_5ac9e6a121a3488ea93f85f5589429a0", + "style": "IPY_MODEL_698f9329e3754e7482dc32690ba58f4a", + "value": "Environment settings" + } + }, + "1f0e424278554da08fbb15138e571a62": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "1f37fdacb85646a1b5ff9a2b1d6ab38a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "2126fce329534e2b98f039a35e99344a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "214c87e57eb641bb89644c9f465889ca": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget007" + } + }, + "2205db5769754bf0948d81dde160eab4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "22126658c9d54cfab48b63029798c705": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "223fd915d3a5472aabdde3b5dd47a5f1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "22ff0e7129b04334b71044d77e3c9298": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "BoxModel", + "state": { + "children": [ + "IPY_MODEL_eef437964b4e4fa29ea42afc6b9a69ce" + ], + "layout": "IPY_MODEL_759c11789beb46f798f3b48c4cf88577" + } + }, + "23424247d797485dba0788eb6b7614aa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "235af533ab1c41a6b82350c6f3a88426": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget010" + } + }, + "23d66d78336541bf8b3f863dc3e554d4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "border": "1px solid black", + "height": "300px", + "overflow": "scroll", + "width": "60%" + } + }, + "24f450d31f2d47a68aa2c58be28170fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_09eb8f946d00416dace2ee661ad55fbd", + "style": "IPY_MODEL_f8a20f2f4b8b4c03857bcd85bf96b136", + "value": "name" + } + }, + "254576dd293543d384c9e5620c3db225": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget005" + } + }, + "26036b1a064245a6a1cef60ec7d39376": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_b18ac7a05b7c4d58813a3e735173a3ca", + "IPY_MODEL_0ec6f6b7c7c84bb4b54e92db8342ce85", + "IPY_MODEL_467644544d33439284f04fe2a9883182" + ], + "layout": "IPY_MODEL_f9b983bef3a14087b6d1f966b8b041ed" + } + }, + "261d86e673814c6b9c6ed7b921861867": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_e9794b57be6c4c0e981a017d3fa82a36", + "style": "IPY_MODEL_946c2a2e7e8f4e36b0311e922520272f", + "value": "Optimizer information:" + } + }, + "266e10703ed340a78b259c7d3ddc8836": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_6d6739242111448eaf1e80a8962f1aac", + "style": "IPY_MODEL_bf620c54949846b49135585c61101b19", + "value": "Environment Information" + } + }, + "26c0e699dae643b58817819a3d134e6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_f29a7f4ff2a74bbf8d6485cbfb086152", + "IPY_MODEL_bb5d38052b40427585a8ec928bdef7b5" + ], + "layout": "IPY_MODEL_d02f0cd6f8f94156ac86605286a6ee78" + } + }, + "27fbf57b093b4444b8990601eaddca26": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_01cece59d650454b9cf09d03e85a6a10", + "IPY_MODEL_e09e0ff65ebf454b80a965aaa0f61d32", + "IPY_MODEL_83c18b3b4c374f70947e47230ffe4f82", + "IPY_MODEL_06d5c4249f3d404793fe2defc8eb0051" + ], + "layout": "IPY_MODEL_ff06931e66b544389c8f409734b472e3" + } + }, + "283080f17fcf4286b2e6e059bcda3370": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_4b23820dcff647a6ad204c7c4a596248", + "style": "IPY_MODEL_1826b147229c4a96b6603cc13978a090", + "value": "mode" + } + }, + "28ad6172b7f34ba9923847d24dd555b3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "294896e2ec5f413e9e23d9ec81e6bbbf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_98824ad5eda8475394e9fb13819502a9", + "style": "IPY_MODEL_79953b3e59c048548c96bb197d46a7ea", + "value": "Network information:" + } + }, + "2982ccca674f4bfc839557e06cde9993": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_14a01344ad1b48b3becfe74fa709a0c6", + "IPY_MODEL_1022056a831a477e91366a9deda960de", + "IPY_MODEL_814eef7fa97a4fa2b4c5f1ed1b3728f3", + "IPY_MODEL_0c64eb2046714b6c885261124bcb09f8" + ], + "layout": "IPY_MODEL_223fd915d3a5472aabdde3b5dd47a5f1" + } + }, + "298f572cd2ec4a9ca5a6feafaf334040": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget002" + } + }, + "29a207365d934cc4a402ed72a19194ca": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "border": "solid" + } + }, + "2a9fb576ef6145abaf95398bf620cd8d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget010", + "justify_content": "center" + } + }, + "2ab7b4c8b49a4163b5521127d8329674": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_7d70e416e925499f93e5837aabc6afc2", + "style": "IPY_MODEL_69268529fca5425e9f11506c968490e7", + "value": "Dense(n_units=64, relu, in_channels='64', name='hidden_layer2')" + } + }, + "2b0d8567d4aa4e53a5837284b315cc58": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "2bb83c7012914171b4b76d559b92034c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_235af533ab1c41a6b82350c6f3a88426", + "style": "IPY_MODEL_75c167ca66774581880b2500d5176a36", + "value": "0.999" + } + }, + "2bea049f9ec74da0bcf2a7eeffce8720": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "2c0353597c114ba184977dac607510c3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget006" + } + }, + "2c48650276864e79a7b82413ddd8c6fa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_d34c7789bb974de1a36ef3cc45737b52", + "style": "IPY_MODEL_626ae439ee1f4ce4895764fb66f9c6d3", + "value": "0.999" + } + }, + "2c9a721e0f084f8f8f437a5d4d875e3f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "2d1f0d1b81ee4e1f85ae2f777dcd0db9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "2da2537f2e444e16ad634693e684af58": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "BoxModel", + "state": { + "children": [ + "IPY_MODEL_e4665eee9731436a839eaebea246f048" + ], + "layout": "IPY_MODEL_e944a76d793541058cf5f32563847fb3" + } + }, + "2dab24721ba34bd789afa55d1479464b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "2dece16eb4994e5082a1cbeeea4163d0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget002" + } + }, + "2e65a763e5db40ca8969c36950c0d9bd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_1c75d4a07143476588ce4826116ea8ee", + "style": "IPY_MODEL_15ae64b32d794189a34bba91e2f7a15b", + "value": "Supported algorithms are shown below" + } + }, + "2e6e71650a6a48878fce055c8e563538": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_f1985e262a7d401ea97c903091713789", + "style": "IPY_MODEL_2205db5769754bf0948d81dde160eab4", + "value": "Dense(n_units=64, relu, in_channels='64', name='hidden_layer2')" + } + }, + "2e8b3025623248e2a92daa5a7750997f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "2ece943ff83c48e8b69e0b2396b6064c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_aeecfc3325ec482ebd31ced3fc2e6839", + "style": "IPY_MODEL_b979276c5b584ebab1400eea707b2c39", + "value": "Pendulum-v0" + } + }, + "2ee89b46bdc146f9b9f4f48f5874a349": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget008" + } + }, + "2f93a27048a44beda22771c8249fba0d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "border": "dotted", + "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"\n\"widget013 widget014\"", + "grid_template_columns": "repeat(2, 1fr)", + "grid_template_rows": "repeat(7, 1fr)" + } + }, + "3025ff51115247eebfcfe7e2a18e414e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget004" + } + }, + "3044da8a1f89485398f1ea9d4965bc55": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget006", + "justify_content": "center" + } + }, + "304f4dcdb42b4bca91451ccfe7eba639": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "30d87705b48648089aaa078817a89da2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_c3c09aa3ecea45eda2b142c857c5d7c5", + "style": "IPY_MODEL_e3adb676dd9b48a6bd4e895ac644b653", + "value": "train_episodes" + } + }, + "31276a604cf14bcd82297907c46c17f8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_a899edcecbcf49d1a1f57b48bed97865", + "max": 400, + "readout_format": ".0f", + "style": "IPY_MODEL_4711e3b757ae4ba08ece2d994aa46c2a", + "value": 200 + } + }, + "31f3ea5f445a4342b1a4db664f61eb93": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "31fe17808d8e4f7ead5964af2e4f5894": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "border": "dotted", + "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"\n\"widget013 widget014\"", + "grid_template_columns": "repeat(2, 1fr)", + "grid_template_rows": "repeat(7, 1fr)" + } + }, + "329f804132904f47a73d10b3ccba4b4d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_options_labels": [ + "atari", + "classic_control", + "box2d", + "mujoco", + "robotics", + "dm_control", + "rlbench" + ], + "description": "env type:", + "index": 1, + "layout": "IPY_MODEL_8ae2c037e98f420486a61a8570daf106", + "style": "IPY_MODEL_df84370f89e949518569f900854e2510" + } + }, + "334d1a726d2347db82e42df5760618b3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "CheckboxModel", + "state": { + "disabled": false, + "indent": false, + "layout": "IPY_MODEL_c3d17e5a575344968f8b84a174b26ba9", + "style": "IPY_MODEL_31f3ea5f445a4342b1a4db664f61eb93", + "value": false + } + }, + "33ecf71f75a649a285ea6a8211b5acbd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "3488ba4c7374447794395c4c315a1193": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "351ae05c16d040dab9a578c06a78858c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "35525c0fbffa497eb43f7d5bd081bb0b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "initial" + } + }, + "3556d6d1fe0c4e558b21b70b8c7b9395": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget003" + } + }, + "3755df840c214a33941879b316489adf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_e835260b70924edd959ac38cbdaa50d3", + "style": "IPY_MODEL_7aa2babe24dc4fab84bfbd511f0b5e98", + "value": "epsilon" + } + }, + "379d32750a8c4e88b3b6a8d76c3ee91b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget012" + } + }, + "383cf0cb101341d4bdfb65604a24a4d5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget003" + } + }, + "38484ea61c3449a1b809d8526ead582d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget006" + } + }, + "389174ab87e24a48a23ad5f81a32da61": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_d2ba7f491ec94768be174bba323aff6d", + "style": "IPY_MODEL_a32e41356969452abe56558608109dc8", + "value": "test_episodes" + } + }, + "38f46c0b84c84233a228758c9b306a79": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "3909591203bd4321b62ed4e0aa575a3e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_a2bb633318304f79a811eb07e18da7f5", + "IPY_MODEL_4ab1ce52edf54c879f2ee002e94c98f1", + "IPY_MODEL_159f94f25de5436aafa6fec3c88e3356", + "IPY_MODEL_4ab1ce52edf54c879f2ee002e94c98f1", + "IPY_MODEL_88b977df9d82476298ff3c70d714afe0" + ], + "layout": "IPY_MODEL_886c73a1052a4a2da9ec06c958855a51" + } + }, + "39219af0b9a34c03a11682fdbaf85b04": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_e9d6d91ceda64a63b9fe358e90337820", + "IPY_MODEL_9694a75a41e543a3b2642aee3572857d" + ], + "layout": "IPY_MODEL_ed746bfae28741e9ae1d450dd1394423" + } + }, + "39c394badc7246fdb12032649f71a1b6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget003" + } + }, + "3a389cd3e9254722a3bef185d92c9ac4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "3a3916bde1e849aeae0e2701258ddc34": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_fc20a5f1e967425c840960c1948f00c8", + "style": "IPY_MODEL_c75a9640bb26465785ca214520007519", + "value": "train_episodes" + } + }, + "3a96e3ae233940e18c75f004da9e0459": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_bf7a578fb6204ce694235598a0f00ea2", + "style": "IPY_MODEL_f2612900bd944258af3be77cacc7a46b", + "value": "name" + } + }, + "3b0358464a32494ea410b866646b79b1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_dd51349042bc4341b061da02df9f8be2", + "style": "IPY_MODEL_63c30e87411c45dd8d58dfa485850fc2", + "value": "learning_rate" + } + }, + "3c695e15ebbd4ecfb555b0fe5221ad10": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_9a247aedcd64492d9b4ddf9d76c13062", + "style": "IPY_MODEL_96fc368f69794e5baa9433c3a31b1ec1", + "value": "amsgrad" + } + }, + "3c77984eb49f4b3fbf5b78b313af8071": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget013" + } + }, + "3cfd11894b514078901081bddd35c83d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "3d9166fc4fcf43f3b930ebc7f996a5bf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "3e9c9dcc814b47f8b2b392074c83d853": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_template_areas": "\n \"a00 a01\"\n \"a10 a11\"\n \"a20 a21\"\n \"t0 t1\"\n " + } + }, + "3f7607f9884f482498bb28a91df5ab02": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "40747ee3248e4cbca2b22e3201e7ae52": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "4080aa3475b94001b5324fd14d18816c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "initial" + } + }, + "40848c8562dc485fa88be8cf89c7a5e2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "40c1e5560977460b86028ca09ee94662": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget005" + } + }, + "4112e1653afc41a795418fc54377af6c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_808fb0e5d6b940388d588196c927564d", + "style": "IPY_MODEL_9b276e72efa44a7e911ee209d08859b6", + "value": "Learn Parameters" + } + }, + "413fd706b68148a099ed9af1a952ec6d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "41425cf814dc44c49ac901aeec4c668f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "420cda5d7fd34a05b48fa845558987c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_cfc4c351d9da4a2bbe36bb1288f74e82", + "step": null, + "style": "IPY_MODEL_9b5f3fd4ebd341ac91227f9ded9fab19", + "value": 200 + } + }, + "42f8297b00d240308e7403a004a1c6b4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget003" + } + }, + "432a3a690b36409192aa3ee4dd5fedf8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_45b014170b1e4c6b8efc9d245b587b48", + "max": 1.8, + "readout_format": ".1f", + "step": 0.1, + "style": "IPY_MODEL_4c528854314c4df18a84eafa4f1a7404", + "value": 0.9 + } + }, + "434eec441fb94a30bcb70bec50c60d78": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_681fa50d92ed4da0afda87805d2383ca", + "IPY_MODEL_18470dca56a94ced8388c8eec402515f", + "IPY_MODEL_da5536ed85464ee5a97c44660b985348" + ], + "layout": "IPY_MODEL_74dc8e60490943c8b9601232bf24f608" + } + }, + "43730220bf8e489cae588fcf375d08cf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_31276a604cf14bcd82297907c46c17f8", + "IPY_MODEL_420cda5d7fd34a05b48fa845558987c4" + ], + "layout": "IPY_MODEL_ddba268ea0db428898643ae0f9a259a3" + } + }, + "43ca75c41e054155b5ad51e493b3b990": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_e53d3b32848c4872a5e1254a2ed080f1", + "style": "IPY_MODEL_e467ed3285684035a013df63ebb6b422", + "value": "Tips:" + } + }, + "43f9446733e242f1977bbe394ddc479b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "452324b6d7cc4cf28d456787efc23b8f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "454021a337164bae8a96f5a5a7749b78": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "454f999c2ca44e7b86263594806f6191": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "455c6fed537d48b188edef0200ab0fb1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_07377f1ec0e74dd4897d484914a44f99", + "style": "IPY_MODEL_a5d8986e9aad47b1ba7821ddf2850c7a", + "value": "Algorithm Selector" + } + }, + "45847f561d154d999d93f170524e2bdf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "IntTextModel", + "state": { + "description": "multi envs:", + "layout": "IPY_MODEL_4cff6dcb31874722a4fcd9052bb1f9b6", + "step": 1, + "style": "IPY_MODEL_e41fe8ee1bf04764abe02428057a540a", + "value": 1 + } + }, + "45850b0512424834a6d4c70e60892ae8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "45b014170b1e4c6b8efc9d245b587b48": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "45e906bdfe7a464d848f9c972f536d31": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "467644544d33439284f04fe2a9883182": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_options_labels": [ + "Acrobot-v1", + "CartPole-v1", + "CartPole-v0", + "MountainCar-v0", + "MountainCarContinuous-v0", + "Pendulum-v0" + ], + "description": "env name:", + "index": 5, + "layout": "IPY_MODEL_e210fdbc53d246a2ae55da6a3689745b", + "style": "IPY_MODEL_f29ba87ee02f4fc38760b98a32e20581" + } + }, + "469da089cf804101a4cbc570975a1aed": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_ac4da45cf7d84d5fa0ea8963afbe5c12", + "style": "IPY_MODEL_dc12042cc1bb40c98a69bef90468797a", + "value": "gamma" + } + }, + "4711e3b757ae4ba08ece2d994aa46c2a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "4749f46df2c4438e874ed6912a4d7ef1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget009" + } + }, + "474e0de897334eb69236cc05ae69f164": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_c234ed19a3204e1d9452d6686e014efb", + "max": 200, + "readout_format": ".0f", + "style": "IPY_MODEL_22126658c9d54cfab48b63029798c705", + "value": 100 + } + }, + "47513573787c4ab1bfafee8a38450355": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "47d275b36e704a74a22098c38f14f301": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "47ed36f4da904759bb9adcf9f1f1685b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget006" + } + }, + "48392da1f6c64d3fad859465d0d0095b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget002", + "justify_content": "center" + } + }, + "48a97cf1c4a44a858c3376f962060321": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "48d65f9009904854b076047201074a2c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_template_areas": "\n \"a00 a01\"\n \"a10 a11\"\n \"a20 a21\"\n \"t0 t1\"\n " + } + }, + "494deb5503e842b78948ed2c14e28e3e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget009" + } + }, + "49c009585e524d98af99d984cf65a85b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "4a1bc5d7007848cb89e08eff1479ddf8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "4a2a0ec5e8f641f489d58e31f3f5fcef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_d1b7a611e0ea474991c6034e7e7a9e98", + "style": "IPY_MODEL_60104c359482485eaa44f621628fb667", + "value": "Box(3,)" + } + }, + "4a88a99c974d47da993c8bde3faab362": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "justify_content": "center" + } + }, + "4ab1ce52edf54c879f2ee002e94c98f1": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "layout": "IPY_MODEL_29a207365d934cc4a402ed72a19194ca" + } + }, + "4b23820dcff647a6ad204c7c4a596248": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget001" + } + }, + "4b5dc49fbc1743c8abe6cded3f9ed703": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "border": "dotted", + "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"\n\"widget013 widget014\"", + "grid_template_columns": "repeat(2, 1fr)", + "grid_template_rows": "repeat(7, 1fr)" + } + }, + "4b9184b437ac441e8c485894889e7fd4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_8865f419c3a04323907d8e9d11f06c24", + "IPY_MODEL_c60dc42b295c47138b76205df9071217", + "IPY_MODEL_85165a2de0d64a2bb9baf9b64b3ffa38", + "IPY_MODEL_bffd75c7e90346ebb8214c6fe0ce2ab4" + ], + "layout": "IPY_MODEL_1cb1d8e98bef410e85502ad2edb46c45" + } + }, + "4bbe95c5e6b34795a2058cc7bf7416f9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_615934e58366458ea65a907cae98c64e", + "style": "IPY_MODEL_570c4f6867da492cafc6318dd145f87d", + "value": "Dense(n_units=64, relu, in_channels='3', name='hidden_layer1')" + } + }, + "4c528854314c4df18a84eafa4f1a7404": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "4cff6dcb31874722a4fcd9052bb1f9b6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "4d8d22e583c64179817ad9c514bd4490": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget004" + } + }, + "4e6414fcd34b454e94c982f7233402a7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "4ee9cbafcaad44de9f9e7453ee765047": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_f74c2a3b52114bbc80056d7097731209", + "IPY_MODEL_7fbbe1851a944d69a568c06875de2b0f" + ], + "layout": "IPY_MODEL_2a9fb576ef6145abaf95398bf620cd8d" + } + }, + "4fa0861e758940d9b9c2775304ebb140": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "50ce374ed2fc4f2ebc2c156c16ba4f38": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_5ee808e0128f4e85921b2855f4ff3831", + "style": "IPY_MODEL_6a001a1bb11844d0b85857486c544879", + "value": "mode" + } + }, + "510e33d521264ac387af97dbbb46dd39": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_content": "center", + "align_items": "center", + "border": "dotted" + } + }, + "516cc7132ca94faab3023ffcd1ed4cd4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_5af150388cac4ebc96775a3696923399", + "style": "IPY_MODEL_81621cd1e69f47a1868bf499caac5824", + "value": "Choose your environment" + } + }, + "520b2e1af36547edbae1352d82099fda": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget002" + } + }, + "5228a7a8160f421f846e2d7d06c9d159": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "532ea00fd94045298f69a3917ced39c7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget008", + "justify_content": "center" + } + }, + "53c0481b6b294cf888f2b3abdc33a95c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "543b543dd8bb4fcb9dc9f4a16ac4bd6e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget005" + } + }, + "5446746816dd4edf8dffb29995d15715": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_6406ec864c1848d88b92c9b5248a9c9e", + "style": "IPY_MODEL_891e2bdcc12d4314affa4fd372ed7ade", + "value": "0.0" + } + }, + "5469680f21e44e77b1092b8354d9aee0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "547d2113aae04e20ba41d30deb33ec5f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget014" + } + }, + "54927f9f2cde4416bf0e3b782fbd5118": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "5526ed8ea7b4499eadc0bbb165d7bbc4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_965b9a99694b4227a43121ae2e974290", + "IPY_MODEL_e57f860aafca4775a03574208f4944b7" + ], + "layout": "IPY_MODEL_510e33d521264ac387af97dbbb46dd39" + } + }, + "5532430429754176a10d6ab53ba4b6d9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "55790721852a4ac38f0bf04e1016c16a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_28ad6172b7f34ba9923847d24dd555b3", + "style": "IPY_MODEL_c35cf89d5b4c42c886c9c83fdc93c8e6", + "value": "Environment name:" + } + }, + "55abe6fb296b491ba2e2a09a492b5ae8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "570c4f6867da492cafc6318dd145f87d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "575f6d3a87c041e4a3005385d7ec75b4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_b20aaab10e6a49138d9cf0a414321c49", + "IPY_MODEL_c2aa94c81efc4f3f826adcb847fbdb89" + ], + "layout": "IPY_MODEL_8173f889450249d58f18acfe83d63ddd" + } + }, + "57f97e2ebec542f8b297365916bf571e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget011" + } + }, + "58201f662dc74741bcdeb0e7753843c4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "593926166a704759992244f9732d0f8d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "595aeae2634948268510587998ec9587": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_575f6d3a87c041e4a3005385d7ec75b4", + "IPY_MODEL_080346c4f0ae457182549d3c68aaaaea" + ], + "layout": "IPY_MODEL_b9743661bbd24d94969c463e1f77d6e8" + } + }, + "59da397a7faa43c79c633dd523b6f07b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "5ac9e6a121a3488ea93f85f5589429a0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "5adceaf568da4a1d88d6bf7b379965c2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_0e74af77352a4b40b0f9e5163d92a836", + "style": "IPY_MODEL_c7a9f23b553e43a78d5c0ced37526327", + "value": "beta_1" + } + }, + "5af150388cac4ebc96775a3696923399": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "5af1a3e17ac64264905701b109c013e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_24f450d31f2d47a68aa2c58be28170fb", + "IPY_MODEL_a7d002d3e5454965af1d9cdb2e54e7ca", + "IPY_MODEL_3b0358464a32494ea410b866646b79b1", + "IPY_MODEL_b4047180a5aa44479c358d8c12f0c5d5", + "IPY_MODEL_9fd6a74ce4e54ae38816e55d19327281", + "IPY_MODEL_0eb34e6e2b07401dae9a2bfa4f1d49df", + "IPY_MODEL_5fc0273b28ca4f42b441948986c98e99", + "IPY_MODEL_bd7afa2132154beebd89e4320ebcad26", + "IPY_MODEL_d48e8464b37c4f0099d42e59369dbab6", + "IPY_MODEL_07b0e1377c414989a1d7ce1bf1da1c4e", + "IPY_MODEL_b04b868ce504489c82bd8818501b3ac3", + "IPY_MODEL_d1ba6fbf21674589b3f585f6e0f9638b", + "IPY_MODEL_c083a4b8f36848ed9f277f423ae18084", + "IPY_MODEL_8c168f5c8ecc4d0ba203b60193856d1c" + ], + "layout": "IPY_MODEL_2f93a27048a44beda22771c8249fba0d" + } + }, + "5afcc13ec3d94e6299bd06fb87ed7885": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget007" + } + }, + "5b759ba6fc8f451c97ee15467069a6ed": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "5b87473fb6cc473a89998a285388f4da": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "5bced3d11d4a41a4b3e1c712f83b98e4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_options_labels": [ + "default" + ], + "description": "state type:", + "index": 0, + "layout": "IPY_MODEL_f4d0297192f5464bac7ab02b3dabed2c", + "style": "IPY_MODEL_7fea48aa29c24b4b94784890589e01e4" + } + }, + "5caab83d7d4d4658ac739d02b56e9fd6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "5daa3bcd6829495cb223328230f0f8e4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "5ee808e0128f4e85921b2855f4ff3831": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget001" + } + }, + "5efb085669c2400a909ac37b5cb4e45e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget008", + "justify_content": "center" + } + }, + "5f1fda7eb4ac4ce694f721e312e205ab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "5fc0273b28ca4f42b441948986c98e99": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_18ea002dd43344a5864f8a8651ceeaeb", + "style": "IPY_MODEL_e14f5611fa9242af879512207669394f", + "value": "beta_1" + } + }, + "60104c359482485eaa44f621628fb667": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "611b3bc2e8e749a38fe77bbdab064670": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_3c77984eb49f4b3fbf5b78b313af8071", + "style": "IPY_MODEL_b64d5e345cb5482595aa92662c8f162c", + "value": "epsilon" + } + }, + "615934e58366458ea65a907cae98c64e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "6187b72c80f64272a6c33c90cb582c4c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "center" + } + }, + "626ae439ee1f4ce4895764fb66f9c6d3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "629ece3b43ac4c8a8c2f83733a180978": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget005" + } + }, + "62a5e4f04f554e6580d63bb32f36b3be": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget012", + "justify_content": "center" + } + }, + "63c30e87411c45dd8d58dfa485850fc2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "63d55c74d6ed493abe58361958b23046": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "6406ec864c1848d88b92c9b5248a9c9e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget012" + } + }, + "64750206fa3a48119aa85e75f5ff2de8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_55790721852a4ac38f0bf04e1016c16a", + "IPY_MODEL_2ece943ff83c48e8b69e0b2396b6064c", + "IPY_MODEL_7a5d99612efa45acb82149814a4a7e82", + "IPY_MODEL_87b22017505c4d14a335692f09abd816", + "IPY_MODEL_8f5e2c19238240c38947f1a5d8e72792", + "IPY_MODEL_2da2537f2e444e16ad634693e684af58", + "IPY_MODEL_6e144126a66b48f9a22641284932ad73", + "IPY_MODEL_ef95b43fb5cd436cb6f737f2defc8e38" + ], + "layout": "IPY_MODEL_48d65f9009904854b076047201074a2c" + } + }, + "660e8c250f974ff685128c61b3d57fe3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "661fd55473c0431aa9dffd6876d1d559": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "66bc7fd58a2743a0960e9dd5df378998": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget010", + "justify_content": "center" + } + }, + "677e2010d7ce45eb9adc6f26a8977636": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_3556d6d1fe0c4e558b21b70b8c7b9395", + "style": "IPY_MODEL_0580852520e142a89d7b42c50bfef6a1", + "value": "learning_rate" + } + }, + "67a79ba4cbf84418967857e237a5a1be": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "681fa50d92ed4da0afda87805d2383ca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_fb19638e8a38465f844aaf06c6378b29", + "style": "IPY_MODEL_47d275b36e704a74a22098c38f14f301", + "value": "Algorithm Parameters" + } + }, + "683e3afa65604f1b85604a79ec228a2b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "689e8f05af2f4f159239a896e7e9843a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "68d4eab6f1cf4e2fa0e229ecdce8d392": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget005" + } + }, + "68fcf5652dd14e5fad220fcbe777ddbb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "691c17934ca3435eb36a2d84d15ecdf7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_3a96e3ae233940e18c75f004da9e0459", + "IPY_MODEL_8d18e0fa10b94372a3edf64edb4814bc", + "IPY_MODEL_677e2010d7ce45eb9adc6f26a8977636", + "IPY_MODEL_e224793bc1524f0c91ce3d7ef0e98f8e", + "IPY_MODEL_c34d5f3024f24951b4f478bca62dd7c7", + "IPY_MODEL_6bb0b7ee0cdf49ca97bb0c3b528131e8", + "IPY_MODEL_5adceaf568da4a1d88d6bf7b379965c2", + "IPY_MODEL_6c1a4850cad844f4bd144b78177e6d31", + "IPY_MODEL_c12ffb6b4533460bbdfc7404ff89d807", + "IPY_MODEL_e6c798aa900740009741c67dfccb0d92", + "IPY_MODEL_75b1aa83fa184214aecc8ea858858cd3", + "IPY_MODEL_e1f03c622ff64b3bb4e59fc54e7898a6", + "IPY_MODEL_611b3bc2e8e749a38fe77bbdab064670", + "IPY_MODEL_eb54eb7b3c674e67b10610ce2aaf309a" + ], + "layout": "IPY_MODEL_fb06877af7ae451baefc12dfd27d9348" + } + }, + "6923c73eeac747fdbe41b2062e257a58": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget007" + } + }, + "69268529fca5425e9f11506c968490e7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "698f9329e3754e7482dc32690ba58f4a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "6a001a1bb11844d0b85857486c544879": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "6ab9513a615a4551a596a3d2e637d181": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "6bb0b7ee0cdf49ca97bb0c3b528131e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_2c0353597c114ba184977dac607510c3", + "style": "IPY_MODEL_82c3b758724944d0b02d17ecfdd05698", + "value": "False" + } + }, + "6c1a4850cad844f4bd144b78177e6d31": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_e255dc6e7af7487e8a2729f670bffd8a", + "style": "IPY_MODEL_012eeb7c3bab46d9baa05356cd4ff0f6", + "value": "0.9" + } + }, + "6c751fa2c2aa415ea57d3c9b0e11b22d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_254576dd293543d384c9e5620c3db225", + "style": "IPY_MODEL_304f4dcdb42b4bca91451ccfe7eba639", + "value": "max_steps" + } + }, + "6caef128e4df40ebb76ef90ad9a40d41": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_a496bd2aabab465fbcf0022dc1acd19f", + "IPY_MODEL_2982ccca674f4bfc839557e06cde9993" + ], + "layout": "IPY_MODEL_fbd450c8b01f4ab9ae7ea1caa129bd66" + } + }, + "6cb628f08ae2469db2ee42e38ca4de74": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "BoxModel", + "state": { + "children": [ + "IPY_MODEL_90d52d8b63c342f087384246a76680d7" + ], + "layout": "IPY_MODEL_759fddd650134c46bbbbd4b4c6f8c744" + } + }, + "6d5b0a5b26874cfd874c4a0bdf307eff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_1537ab75a9dd4f429ffb3812c485116f", + "IPY_MODEL_a18265de326b4d399e760f9d2e5bb238" + ], + "layout": "IPY_MODEL_7208b8f21c77462dad67124eb0fd8164" + } + }, + "6d6739242111448eaf1e80a8962f1aac": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "6db9105409df4485909f169fc6e6d696": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget003" + } + }, + "6dc0399123f94dd1831a2b2cfb6c3078": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "6e144126a66b48f9a22641284932ad73": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_53c0481b6b294cf888f2b3abdc33a95c", + "style": "IPY_MODEL_a8e550f371f94677a29e238776be2cdb", + "value": "Tips:" + } + }, + "6efa143c4b9d43aa94ed8cfe56824583": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "6f0bd8ffadf44461a70b1031b3f65064": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "learning curve smooth factor", + "layout": "IPY_MODEL_145001c5826a41cd989997ea61244ca1", + "max": 1, + "step": 0.01, + "style": "IPY_MODEL_4080aa3475b94001b5324fd14d18816c", + "value": 0.8 + } + }, + "6f525160109d45299758550c08196bd9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "70c300868924433094e74b74d260a4a2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "justify_content": "center" + } + }, + "715b10d741354c8db506fb8ba945a074": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_1e327c8e4b844c2fbb017a5544fa678e", + "style": "IPY_MODEL_6ab9513a615a4551a596a3d2e637d181", + "value": "0.0001" + } + }, + "7208b8f21c77462dad67124eb0fd8164": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "731d299fb9dd45c1a41a5d4df4f41f94": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "747e88ebfefc4efb95f60f63e725dcc1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "74d03d1491d4451d879384ab357f33a9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "center" + } + }, + "74dc8e60490943c8b9601232bf24f608": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "7532b84aea3a4f4290efa4b0369e846a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "759c11789beb46f798f3b48c4cf88577": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "759fddd650134c46bbbbd4b4c6f8c744": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "75b1aa83fa184214aecc8ea858858cd3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_7cdb0eb01b9b434ca4c08fd25f243f09", + "style": "IPY_MODEL_3cfd11894b514078901081bddd35c83d", + "value": "decay" + } + }, + "75c167ca66774581880b2500d5176a36": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "76c7ceb7a42e44048e694b71f27f56eb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "76d1b335a0134c19852090005ae135c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_e8260cb1f55049a49bdaf024528d43c4", + "style": "IPY_MODEL_def02ee29d9a44b19a1fd20f8a4be1a0", + "value": "name" + } + }, + "76dec90334724f3ba9e51ba05856ff79": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "776cdbcecc004924a856eb45ec0a5699": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_98eeb6cc7ac643ac882d54fab647de04", + "style": "IPY_MODEL_a02320673c484c46848d7aeb6fda6e18", + "value": "1e-07" + } + }, + "78f5897896d144fe839fafd65e76816e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "79611f87c64c431794f17eccbbd60f38": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget012" + } + }, + "79953b3e59c048548c96bb197d46a7ea": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7a4be7c4229640b18c29d60d30cc0e70": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "7a5d99612efa45acb82149814a4a7e82": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_7e40917d81264ee9986d07bae8291022", + "style": "IPY_MODEL_1e6d0c80ceaa4e58846e9f554371b363", + "value": "Observation space:" + } + }, + "7a6c0819e1344119aae9ef136830ad44": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "7a7ebee6dcf34f36b1d55d2cb443e387": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "7a807eea55d14bae96d792b1e475adcb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7aa2babe24dc4fab84bfbd511f0b5e98": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7aba7921241e41af9a32cbe042699485": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7af9623e94c64555b01efa581f338e60": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_841b7f5d915e4f639784140b23610d75", + "IPY_MODEL_e904337542fd4e5d8187b9b9190b7522" + ], + "layout": "IPY_MODEL_532ea00fd94045298f69a3917ced39c7" + } + }, + "7b48f1fae96e40519787018ed628b99b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7cc3bf6293494425b70569d1eca3af03": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7cdb0eb01b9b434ca4c08fd25f243f09": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget011" + } + }, + "7d163d682d5744d6ac7be041fb66c158": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "7d64c7c8f2dc4d4eb6218e55ae44bfbe": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7d70e416e925499f93e5837aabc6afc2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "7dc1333733194435934e6ca098ede1ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7df23ef826fb4c568071b0667bafcd3b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_6db9105409df4485909f169fc6e6d696", + "style": "IPY_MODEL_84111028e0ea4937a6fea8f96b279bec", + "value": "model save path" + } + }, + "7e128d275e3c4e88829167514cec3bc6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "7e40917d81264ee9986d07bae8291022": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "7f3f44cbaac94755810c0e589d048490": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7f9233b831cc448a97a909e398122bb9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "7f94bb571172453a920e7bd6d7a9050f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_58201f662dc74741bcdeb0e7753843c4", + "max": 600, + "min": -400, + "readout_format": ".0f", + "style": "IPY_MODEL_b5dd447dec9c48bc8b1bb664c9553912", + "value": 100 + } + }, + "7fbbe1851a944d69a568c06875de2b0f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_12e50eba7f3e4e9f888416f46172b60f", + "step": null, + "style": "IPY_MODEL_18a7121ba72e42af9a496a39fb8c6f6a", + "value": 100 + } + }, + "7fea48aa29c24b4b94784890589e01e4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7ff9e3e9f09b40d398b6c898e5ee9653": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "808fb0e5d6b940388d588196c927564d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "80d9bf94c37c49708820ccb5a2aa8f8b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "814eef7fa97a4fa2b4c5f1ed1b3728f3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_2e8b3025623248e2a92daa5a7750997f", + "style": "IPY_MODEL_bb04f52581bb496e9a6931ce291714c9", + "value": "Dense(n_units=64, relu, in_channels='64', name='hidden_layer2')" + } + }, + "81621cd1e69f47a1868bf499caac5824": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "8173f889450249d58f18acfe83d63ddd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "8178676fb5e441ec92464938695643a8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "81a50427a5384feeaaee374a19ad5931": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"", + "grid_template_columns": "repeat(2, 1fr)", + "grid_template_rows": "repeat(6, 1fr)" + } + }, + "81d1f55272ef4977b06be173bdd59b8c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "81f34a95028440608c8a5a307cd7ee9b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_content": "center", + "align_items": "center", + "border": "dotted" + } + }, + "82c3b758724944d0b02d17ecfdd05698": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "831ed45407f74193acc07dacada162a9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "835ef9a1125846679a65d679afb62013": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "8387714984af4e9cbaf16cbff2a45cbb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget001" + } + }, + "83c18b3b4c374f70947e47230ffe4f82": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_be4d4fbbc53d4705963f9b343aff399f", + "style": "IPY_MODEL_8efed772f09f4ea1a1dabf91598fd49a", + "value": "Optimizer information:" + } + }, + "84111028e0ea4937a6fea8f96b279bec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "841b7f5d915e4f639784140b23610d75": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_0b081708649d446ab37f522f5a019e19", + "readout_format": ".0f", + "style": "IPY_MODEL_12a0f20f2ecd423889594f36b15647f1", + "value": 50 + } + }, + "842ea79123034275adec1df392a4846d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget009" + } + }, + "84f7291061b34bfaaaec0711bd0cca56": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_ae877e1e2a554a19b78fb9a12f60e5d3", + "style": "IPY_MODEL_1f0e424278554da08fbb15138e571a62", + "value": "The action space is continuous." + } + }, + "85165a2de0d64a2bb9baf9b64b3ffa38": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_383cf0cb101341d4bdfb65604a24a4d5", + "style": "IPY_MODEL_23424247d797485dba0788eb6b7614aa", + "value": "model save path" + } + }, + "85514e8a938240e7b2df7c2a8ad6b6e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "85d35dbed0594a3a837f536309af0b59": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_1db128fafd984258b040b5295b477f0d", + "max": 74, + "min": -26, + "readout_format": ".0f", + "style": "IPY_MODEL_066c122ea5f64991b7347279a79e8061", + "value": 24 + } + }, + "86e357397076415ba3ac239b26a8bc8f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget006" + } + }, + "8784dbc322c7455aaef2b352bae2f205": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "87b22017505c4d14a335692f09abd816": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "BoxModel", + "state": { + "children": [ + "IPY_MODEL_4a2a0ec5e8f641f489d58e31f3f5fcef" + ], + "layout": "IPY_MODEL_1f37fdacb85646a1b5ff9a2b1d6ab38a" + } + }, + "885608d7df064c51ac0523ef9928e6b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_689e8f05af2f4f159239a896e7e9843a", + "style": "IPY_MODEL_b85dbc19731e4b84bb6122ea52367809", + "value": "Action space:" + } + }, + "8865f419c3a04323907d8e9d11f06c24": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_8387714984af4e9cbaf16cbff2a45cbb", + "style": "IPY_MODEL_5daa3bcd6829495cb223328230f0f8e4", + "value": "gamma" + } + }, + "886c73a1052a4a2da9ec06c958855a51": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "88aafdf648784ac7954ce933431f9a3a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_8d80128792d44bf1a0467b7e86df0b54", + "IPY_MODEL_d91d58d65e864faa90c9cc7bfd2959b0" + ], + "layout": "IPY_MODEL_8ff956034aa047d0a8809922cbefa856" + } + }, + "88b977df9d82476298ff3c70d714afe0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "learning curve smooth factor", + "layout": "IPY_MODEL_7f9233b831cc448a97a909e398122bb9", + "max": 1, + "step": 0.01, + "style": "IPY_MODEL_35525c0fbffa497eb43f7d5bd081bb0b", + "value": 0.8 + } + }, + "88fc41c33c024f4eb22b13e0ea98e605": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget003" + } + }, + "891909eab8204a4bb78c9a468bc20112": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "layout": "IPY_MODEL_e1f175e02edf40f39585c485ec11cbff", + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD8CAYAAACCRVh7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VOXZ//HPlX0hJBACgbBDAiJCkMiiwqOCilsRKluR2tY+tFar1tpq26e/9nm6qK11a91otaUtKopasKi4F0VLDcgShEzCmgAZEoQwScg61++PDBgwIcssJ8v1fr3mxcx9zpm5MmK+nHPf575FVTHGGGMaE+Z0AcYYY9ovCwljjDFNspAwxhjTJAsJY4wxTbKQMMYY0yQLCWOMMU2ykDDGGNMkCwljjDFNspAwxhjTpAinC/BXr169dPDgwU6XYYwxHcqGDRtKVDWluf06fEgMHjyY7Oxsp8swxpgORUT2tmQ/u9xkjDGmSRYSxhhjmmQhYYwxpkkWEsYYY5pkIWGMMaZJFhLGGGOaZCFhjDGmSRYSxhjTCq9tPciu4jKnywgZCwljjGmhQ55Kblq2kase+YDlH+9DVZ0uKegsJIwxpoVcRfVnEH26R3PXi1v57rOfcKyyxuGqgstCwhhjWijX7QHg+W9N5geXj+C1nCKufPh9Nu474nBlwWMhYYwxLeQq8tAzPoqUhGhuvng4z39rMqow54mPePTdfLzeznf5yULCGGNaKNftIaNPN0QEgPGDevDqbVOYMTqV367JZdHT63Efq3S4ysCykDDGmBZQVfLcHkb0STilPTE2kj8sGMd9Xz6HDXuPcMXD7/PODrdDVQaehYQxxrTA/qPHKa+uIyM14QvbRIR55w3kn9+9kN4J0XzjL9n83yufUlVb50ClgeVXSIjIHBHZJiJeEclq0L5QRDY1eHhFJNO37VciUiAiZae9V7SILBeRfBFZLyKD/anNGGMCyeXrtM7o88WQOGF47wT+cfMFfO38wTy9bjezHv2QnR38ngp/zyRygNnA2oaNqrpMVTNVNRNYBOxW1U2+za8AExp5rxuBI6o6HHgQuM/P2owxJmByfcNfM3o3HRIAMZHh/PxLZ/Onr2ZxsPQ41/z+A17ILuiw91T4FRKqul1Vc5vZbQHwXINj/q2qBxvZbyaw1Pd8BTBNTvQOGWOMw1xuD6ndY0iMi2zR/tNH9eG126Yypn8iP1ixhdue24SnA95TEYo+iXnAsy3YLw0oAFDVWqAUSG5sRxFZLCLZIpJdXFwcsEKNMaYpLren0f6IM0lNjGHZNyfx/UszWL31IFc+8j6bCo4GqcLgaDYkROQtEclp5DGzBcdOBCpUNScg1fqo6hJVzVLVrJSUZtfxNsYYv9R5lbxDZYzo063Vx4aHCd+dls7yxZPweuG6xz/kiX/t7DD3VEQ0t4OqTvfj/efTsrMIgP3AAKBQRCKAROCwH59tjDEBsfdwOdW1XtLP0GndnKzBPXn11inc/dIW7n1tBx/klfDAvLH0TogJYKWBF7TLTSISBsylQX9EM1YBN/ieXwe8ox21p8cY06mcGNl0+j0SrZUYF8ljC8/lntnnkL33M6546H3ezT0UiBKDxt8hsLNEpBCYDKwWkTUNNk8FClR112nH/MZ3TJyIFIrIz32bngKSRSQfuAO425/ajDEmUE6MbEpvw+Wm04kICyYM5JVbLqRXt2i+/ueP+eU/P6W61uv3eweDdPR/rGdlZWl2drbTZRhjOrGbn9nI1sJS1v7w4oC+b2VNHb9avZ2//Xsv56Ql8siCcQzpFR/Qz2iKiGxQ1azm9rM7ro0xphmuIs8Zb6Jrq5jIcH5x7WieXDSefZ9VcPUj7/PSxsKAf44/LCSMMeYMqmrr2F1STkYALjU15fKzU3nttimc3S+RO57fzPeWb6KsqjZon9caFhLGGHMGu0vKqfUqI1p5j0Rr9UuK5dnFk/je9AxWbtrPVY+8z5ZC5++psJAwxpgzcLl903EE4XLT6cLDhNump/Pc4snU1HqZ/diHLFnr7D0VFhLGGHMGriIP4WHC0JTQdCgDTBjSk1dvm8K0s3rz61d38LW/fEyxpypkn9+QhYQxxpxBrtvDkF7xREeEh/Rzk+KieOL68fzy2tGs33WYKx5ey1pX6KchspAwxpgzcPlWo3OCiHD9pEGsuuVCesZH8dWn/8M9r24P6T0VFhLGGNOE49V17PusIiT9EWcyIjWBlTdfyFcmDuTJtbuY88SH7D1cHpLPtpAwxpgm5B8qQ9X/6TgCITYqnF/POofHF57L7pJyrnrkA9bllwT9c5ud4M8YY7qq3BOr0QV5+GtrXHFOX8YMSOL/XtkWkjMcCwljjGmCy+0hKiKMQT3jnC7lFGlJsTy5qNkZNQLCLjcZY0wTcos8DEvpRkR41/1V2XV/cmOMaYbL7WnTQkOdiYWEMcY04lhlDQdLK9tVf4QTLCSMMaYReQFaaKijs5AwxphGnFhoyOl7JJxmIWGMMY1wuT3ERYWTlhTrdCmO8nf50jkisk1EvCKS1aB9oYhsavDwikimiMSJyGoR2eE77t4Gx0SLyHIRyReR9SIy2J/ajDHGH7lFHtL7JBAWJk6X4ih/zyRygNnA2oaNqrpMVTNVNRNYBOxW1U2+zfer6khgHHCBiFzha78ROKKqw4EHgfv8rM0YY9os75CNbAI/Q0JVt6tqbjO7LQCe8+1foarv+p5XAxuB/r79ZgJLfc9XANNEpGtHuDHGESVlVZSUVXf5/ggITZ/EPODZ0xtFJAm4Bnjb15QGFACoai1QCiSHoD5jjDmF68TIpi4+/BVaMC2HiLwFpDay6SequrKZYycCFaqac1p7BPXB8Yiq7mpFvSeOXwwsBhg4cGBrDzfGmDNyFfnmbLIzieZDQlWn+/H+82nkLAJYAuSp6kMN2vYDA4BCX4gkAoebqGmJ7z3Iyspybl0/Y0ynlOsuIzE2kt4J0U6X4rigXW4SkTBgLr7+iAbtv6Q+AG4/7ZBVwA2+59cB76iqBYAxJuTy3B5G9EnAukX9HwI7S0QKgcnAahFZ02DzVKCg4eUkEekP/AQYBWz0DY/9pm/zU0CyiOQDdwB3+1ObMca0haqS6/aQkWojm8DPqcJV9WXg5Sa2vQdMOq2tEGg0mlW1EpjjTz3GGOOvomOVeCpru/x0HCfYHdfGGNNArq/TOt1CArCQMMaYU5wY/mojm+pZSBhjTAMudxkpCdH0jI9yupR2wULCGGMacPlGNpl6FhLGGOPj9Sout8cuNTVgIWGMMT4FRyqorPGSYRP7nWQhYYwxPidGNnX1JUsbspAwxhifvEP1q9Gl97YziRMsJIwxxie3yENaUiwJMZFOl9JuWEgYY4yPy+2x6cFPYyFhjDFATZ2XncVlNrLpNBYSxhgD7Ckpp6ZObWTTaSwkjDGG+jutwabjOJ2FhDHGALluD2ECw21k0yksJIwxhvolSwcnxxMTGe50Ke2KhYQxxoBNx9EECwljTJdXWVPHnsPl1mndCAsJY0yXt7O4DK/adByN8XeN6zkisk1EvCKS1aB9oW/96hMPr4hk+ra9LiKbfcc9ISLhvvaeIvKmiOT5/uzh349mjDEtc2KhIZsi/Iv8PZPIAWYDaxs2quoyVc1U1UxgEbBbVTf5Ns9V1bHAaCCFz9e1vht4W1XTgbd9r40xJuhyi8qIDBcG94p3upR2x6+QUNXtqprbzG4LgOcaHHPM9zQCiALU93omsNT3fClwrT+1GWNMS7ncHoaldCMy3K7Any4U38g84NmGDSKyBjgEeIAVvuY+qnrQ97wI6BOC2owxhtwiD+l2qalREc3tICJvAamNbPqJqq5s5tiJQIWq5jRsV9XLRSQGWAZcArx52nYVEaUJIrIYWAwwcODA5n6ERv1zywG2FJaSlhRL/x6xpPWItdkfjemCyqpq2X/0OAsmDHC6lHap2ZBQ1el+vP98TjuLaPC+lSKykvrLTG8CbhHpq6oHRaQv9WcaTdW0BFgCkJWV1WSYnMnWwlL+8uEeqmu9p7R3j4mgf4+4k6HRv4cvRJLq23rERSIibflIY0w7lOfrtLZ7JBrXbEi0lYiEAXOBKQ3augEJviCIAK4C3vdtXgXcANzr+/OMZyn++tGVZ3HXjJGUlFex/8hx9h89TuGR4yef7z1czof5JZRX151yXFxUOGlJ9WceDcMjLSmWAT1i6dUtmrAwCxFjOoqTI5ts+Guj/AoJEZkF/J76UUqrRWSTql7u2zwVKFDVXQ0OiQdWiUg09f0h7wJP+LbdCzwvIjcCe6kPmKAKCxN6J8TQOyGGcQO/OOJWVSk9XkPhEV+AHK0PkcIjFew/epxNBUc5WlFzyjFR4WH0S4qpPxtJij3ljCStRyyp3WOIsM4xY9qN3KIyYiLDGNAjzulS2iW/QkJVXwZebmLbe8Ck09rcwHlN7H8YmOZPPYEmIiTFRZEUF8XotMRG9ymrqvWdfVT4AuQ4hb4weXvHIUrKqk7ZPzxMSO0eU38m4guRoSnxjOqbyLCUeAsQY0LM5faQ3jvBrgA0IWiXm7qKbtERjEhNaPJUtbKmjgNHTz0Tqb+0VcG/dx2m6FglXl+vSlREGCNTExjVtztn9+vOqH7dGZnanfho+89kTLC43B6mpKc4XUa7Zb99giwmMpyhKd0YmtL4nDA1dV52l5Sz7UApnx44xqcHj/H6tiKe+7gAABEYkhzPWf18wdG3Pjx6J8SE8scwplM6Ul7NIU8VI1JtzqamWEg4LDI8jIw+CWT0SWDWuPo2VeVgaSXbDhzzBUcpmwuOsnrLwZPHpSREn3LGMapvdwYnx9spszGt4LKRTc2ykGiHRIR+SbH0S4rl0lGf31NYerzm5NnGpweOse1AKevyS6j1Xa+KiwrnrL6nnnFk9Emw+fGNaYKNbGqehUQHkhgbyeRhyUwelnyyraq2jjx32cnw2HaglJc27uevVXuB+o7y4SndTjnjGNWvO0lxUU79GMa0Gy53GQnREaR2t8u3TbGQ6OCiI8IZnZZ4yugrr1cpOFLR4HLVMdbtLOGlT/af3CctKfaU0Di7X3f62xBA08Xkuj1kpCbYDbJnYCHRCYWFCYOS4xmUHM+V5/Q92V5SVvWFy1VvbXejvtFVt09P5/bpGQ5VbUxoqSout4crRvdtfucuzEKiC+nVLZqpGSlMzfh8uF9FdS07ijw89m4+j723k3nnDaBvYqyDVRoTGsWeKo5W1DDCVqM7I7tzq4uLi4rg3IE9+Nk1Z6OqPPJ2ntMlGRMSuSdGNlmn9RlZSBgABvSMY+HEQTyfXciu4jKnyzEm6Fzu+r/nNvz1zCwkzEk3Xzyc6IgwHnjT5XQpxgSdq8hDcnwUvbpFO11Ku2YhYU5KSYjmGxcM4Z9bDpKzv9TpcowJqly3x84iWsBCwpziv6cOJTE2kvvfaG5VWmM6Lq9XyXN77Ca6FrCQMKdIjI3kpouG8V5uMet3HXa6HGOCYv/R45RX19mZRAtYSJgvuGHyYHonRPObNbmotmnhP2PatbxDJ+ZssuGvzbGQMF8QGxXOrdPS2bD3CO/saHIVWWM6rNyi+pFN6XYm0SwLCdOoeecNYFByHL9dk4vXa2cTpnNxuT30TYwhMTbS6VLaPQsJ06jI8DDuuDSDHUUeXtlywOlyjAmo3CIb2dRSfoWEiMwRkW0i4hWRrAbtC0VkU4OHV0QyTzt2lYjkNHjdU0TeFJE8359fXHTahNQ1Y/oxMjWBB950UVPndbocYwKizqvkF5fZyKYW8vdMIgeYDaxt2Kiqy1Q1U1UzgUXAblXddGK7iMwGTr+t927gbVVNB972vTYOCgsTfnD5CPYermC5b6U8Yzq6vYfLqa71kt7bOq1bwq+QUNXtqtrcgPoFwHMnXohIN+AO4Jen7TcTWOp7vhS41p/aTGBcMrI34wf14JG38zheXed0Ocb4zRYaap1Q9EnMA55t8PoXwO+AitP266OqJ9bnLAL60AQRWSwi2SKSXVxcHNBizalEhLtmjOSQp4qlH+1xuhxj/JZbVIYIDLcziRZpNiRE5C0RyWnkMbMFx04EKlQ1x/c6Eximqi+f6TitH5zf5JAaVV2iqlmqmpWSktLUbiZAJgzpyUUjUnj8vZ2UHq9xuhxj/OJyexjYM464KFspoSWaDQlVna6qoxt5rGzB+8/n1LOIyUCWiOwBPgAyROQ93za3iPQF8P1pA/TbkTsvG0Hp8Rr+uHaX06UY4xebs6l1gna5SUTCgLk06I9Q1cdVtZ+qDgYuBFyqepFv8yrgBt/zG4CWhJAJkdFpiVw9pi9Pr9tNsafK6XKMaZOq2jr2lJTbndat4O8Q2FkiUkj9GcJqEVnTYPNUoEBVW/pPz3uBS0UkD5jue23ake9fNoKqWi+PvpvvdCnGtMnuknJqvWpnEq3g10U5X99Co/0LqvoeMOkMx+4BRjd4fRiY5k89JriG9IpnblZ/lq3fy40XDmFAzzinSzKmVXKLbGRTa9kd16ZVbp2Wjojw0Fudd5nTvYfL+dnKHI6UVztdigkwl9tDRJgwtJddbmopCwnTKn0TY7lh8iBe+qTw5HjzzqS8qpZvLs1m6Ud7+fHLW20W3E4mt6iMIb3iiYqwX30tZd+UabWbLhpOfFQE96/pXAsTqSo/XLGFncVlXDWmL6/lFPHyJ/udLssEUN4hG9nUWhYSptV6xkfx31OG8sanbj7Zd8TpcgJmydpdrN56kLtmjOSR+ePIGtSDn63cRuGR0+/7NB1RRXUt+z6rsJBoJQsJ0yY3ThlCcnwUv+0kZxPr8ku47/UdXHVOXxZPHUp4mPDA3Ey8qtz5wmabLr0TyD9UhiqMSLX+iNawkDBt0i06gu9cPJwPdx7mg7wSp8vxS+GRCm55ZiPDUrrxm+vGICIADEyO42fXnM2/d33G0+t2O1yl8deJkU12JtE6FhKmzRZOHEhaUiy/XbOjw3bwVtbU8e2/b6C2Tnly0Xjio08dFT4nqz+XjurDb17PPflLxnRMLreHqIgwBiXHO11Kh2IhYdosJjKc26ans7mwlDXbipwup9VUlf/5Rw45+4/x4LxMhqZ88TKEiHDP7HPoHhvB7cs3UVVrM+F2VC53GcNTuhEeJk6X0qFYSBi/zB6XxrCUeO5/w0VdB7tu//f1+1ixoZBbp6UzfVSTkw7Tq1s0984ew/aDx3jwzc57f0hn53J77Ca6NrCQMH6JCA/jzstGkH+ojJc2FjpdTott2PsZ//fKNi4ekcLt09Kb3X/6qD4smDCAJ9fu5D+7PwtBhSaQSo/XcLC00voj2sBCwvhtxuhUxvRP5KG38jrE5ZhDxyr59t830i8plofmjSOshZcf/ueqUQzoEccdz2/CU2lTpnckeScXGrKRTa1lIWH8JlK/zOn+o8d5Zv0+p8s5o+paL99ZtpGyylqeXDSexLjIFh8bHx3Bg/PGcuDocf7vlU+DWKUJtFy3jWxqKwsJExAXDu/F5KHJ/OGdfMqrap0up0m/XP0p2XuPcN91YxiZ2r3Vx48f1JObLhrGCxsKO2RnfVeV5y4jPiqctKRYp0vpcCwkTECICD+YMYLD5dU8/UH7vKdgxYZC/vrRXv57yhC+NLZfm9/ntmkZnN2vOz96aautrdFB5BZ5SO+TcPIeGNNyFhImYM4d2INLR/Vhydpd7W4G1Zz9pfzk5a1MHprMXTNG+vVeURFhPDQvk7KqWu5+cUuHvUekK3G5PYywS01tYiFhAurOy0ZQVl3L4//a6XQpJ31WXs23/raB5Pgo/vCVcUSE+//XPr1PAnfPGMnbOw7x3McFAajSBEtJWRWHy6vJsOGvbWIhYQJqRGoCszLTWPrhHopKK50uh9o6L7c++wnFZVU8fv14krtFB+y9v3b+YC4Ynswv/vkpe0rKA/a+JrBcJxYasjOJNvF3+dI5IrJNRLwiktWgfaGIbGrw8IpIpm/beyKS22Bbb197tIgsF5F8EVkvIoP9qc0453uXZuBV5eG3nb/x7P43XHyQX8IvZ45m7ICkgL53WJhw/5yxRIQJdzy/ido6b0Df3wSG6+TIJhv+2hb+nknkALOBtQ0bVXWZqmaqaiawCNitqpsa7LLwxHZVPeRruxE4oqrDgQeB+/yszThkQM84FkwYyPPZBex28F/Yr249yBP/2slXJg5k7nkDgvIZfRNj+cW1o9m47yhPtKNLbOZzue4ykuIiSUkI3FlkV+JXSKjqdlVtbq7oBcBzLXi7mcBS3/MVwDSxoQgd1i2XDCcqPIwH3nQ58vl5bg93vrCZcQOT+Nk1o4L6WTMz07hmbD8eeiuPrYWlQf0s03oud/1CQ/brpG1C0ScxD3j2tLY/+y41/bRBEKQBBQCqWguUAskhqM8EQe+EGL5x4WBe2XyAbQdC+4vzWGUNi/+2gbioCB5fOJ7oiPCgf+YvZp5Ncrcobl/+CZU17f+u865CVXEV2cgmfzQbEiLylojkNPKY2YJjJwIVqprToHmhqp4DTPE9FrW2aBFZLCLZIpJdXFzc2sNNiCyeOozE2MiQLnPq9Sp3LN9MwWcVPLbwXFITY0LyuUlxUdw/Zyw7i8u57/UdIflM07yDpZV4qmptZJMfmg0JVZ2uqqMbeaxswfvP57SzCFXd7/vTAzwDTPBt2g8MABCRCCARONxETUtUNUtVs1JSUlpQhnFCYmwk3/6vYbybW8zHe0IzKd4f3s3nre1ufnLVWUwY0jMkn3nClPQUvnb+YP68bk+HX4ipszjZad3bOq3bKmiXm0QkDJhLg/4IEYkQkV6+55HA1dR3fgOsAm7wPb8OeEftLqUO72vnD6Z3QjS/eT34CxO9u+MQD77lYta4NL52/uCgflZT7poxkmEp8dz5wmZKK2wSQKe5bM4mv/k7BHaWiBQCk4HVIrKmweapQIGq7mrQFg2sEZEtwCbqzx7+6Nv2FJAsIvnAHcDd/tRm2ofYqHC+Oy2dj/cc4b3c4F0a3FNSzm3PfcJZqd359axzHOukjI0K58F5mZSUVfHTlTnNH2CCKreojN4J0fSIj3K6lA7L39FNL6tqf1WNVtU+qnp5g23vqeqk0/YvV9XxqjpGVc9W1dtUtc63rVJV56jqcFWdcFq4mA5sXtYABvaM4zdrcvEGYWGiiupavv33DYgITy4aT2xU8Duqz2RM/yRum5bOqs0HWLlpv6O1dHW20JD/7I5rE3RREWHccWkG2w8e459bDwb0vVWVu17cSq7bwyMLxjGgZ1xA37+tbrpoGOMGJvHTf+RwsPS40+V0SV6vknfIY5ea/GQhYULiS2P7MTI1gQfeyKUmgHcmP/XBbl7ZfIA7LxvBf2W0n0EMEeFhPDg3k5o65QcvbAnKGZQ5s4IjFVTWeO1Oaz9ZSJiQCAsT7rxsBHsOV/BCdmCWOf1wZwn3vLaDy8/uw3cuGhaQ9wykwb3i+enVo/ggv4S/frTH6XK6nNwi67QOBAsJEzLTzurNuQOTePhtl983nB04epzvPvMJg5PjuH/O2HZ7N+2CCQO4ZGRv7nltB/mHPE6X06WcGNmUbiHhFwsJEzIiwg9njMR9rIqlH+5p8/tU1tRx0983UFXr5clFWSTEtHwJ0lATEe798jnERYVz+/JNVNfaJIChkusuo3+PWLpFRzhdSodmIWFCatLQZKZmpPD4v3ZyrLJt9xH8fNU2NheW8ru5YxneAW6S6p0Qwz2zzyFn/zF+/47zM+N2FXm20FBAWEiYkPvh5SM4WlHDH9e2fpTzM+v38dzHBdxy8XAuPzs1CNUFx4zRfblufH8efTefDXuPOF1Op1dT52VncZldagoACwkTcqPTErlqTF+e+mB3q9aI3rjvCD9blcPUjBS+d2lGECsMjp9dM4q+ibHc8fwmyqtqnS6nU9tTUk5NnTIitf2fabZ3FhLGEd+/NIOqWi+Pvpvfov2LPVV85+8bSU2M4ZH5mYSHtc+O6jNJiInkgblj2fdZBb9cvd3pcjq1XJuOI2AsJIwjhqZ0Y874/jyzfh+FRyrOuG9NnZebn9nI0ePVPHl9FklxHXeKhYlDk1k8dSjP/mcfb293O11Op+Uq8hAmMCzFziT8ZSFhHHPb9HQQeOitM3fm/vrV7fxn92fcO3sMo/p1D1F1wXPHpRmMTE3grhe3cris5ZfbTMu53GUM7hVPTKSzU7R0BhYSxjF9E2P56qRBvLSxkDx34/cQ/OOT/fx53R6+fsFgrh2XFuIKgyM6IpyH5mdy7HgNP3ppa9Bnx+2KXG4PGb3tUlMgWEgYR33n4uHERUXwuze+uMzptgOl3P3SFiYM6cmPrzzLgeqCZ2Rqd+68PIM3PnWzYkNg7kA39Spr6thzuNwWGgoQCwnjqJ7xUXxzyhBe31bE5oKjJ9uPVlTz7b9vICk2ike/ci6R4Z3vr+qNFw5l4pCe/O8rn1Lw2Zn7ZUzL5R8qw6vYPRIB0vn+zzMdzjenDKVnfBS/9S1zWudVbn1uE+7SKh6//lxSEqIdrjA4wsOE380dC8D3n99MnU0CGBAnpuOw4a+BYSFhHNctOoLvXDSMD/JL+DC/hAffdLHWVczPv3Q24wb2cLq8oOrfI47//dLZ/GfPZ/zxfVtCJRBc7jKiwsMYlBzvdCmdgoWEaReunzSIfokx3PnCZv7wbj7zzxvAVyYOdLqskJh9bhpXjE7ld2/k8umBY06X0+G53B6GpsR3ykuUTrBv0bQLMZHh3DY9nQOllYztn8jPv3S20yWFjIjwq1nnkBQXxfeWb/J7htyuLrfIFhoKJH/XuJ4jIttExCsiWQ3aF4rIpgYPr4hk+rZFicgSEXGJyA4R+bKvPVpElotIvoisF5HB/tRmOp4vn9ufX80azR9vyOpy49t7xkfxm+vGkOv28MCbXxzpZVrGU1nD/qPHbcnSAPL3TCIHmA2sbdioqstUNVNVM4FFwG5V3eTb/BPgkKpmAKOAf/nabwSOqOpw4EHgPj9rMx1MRHgYCycOondCjNOlOOLiEb25ftJA/vj+Lj7aedjpcjqkvENlgE3HEUh+hYSqblfV3GZ2WwA81+D1N4B7fMd7VbXE1z4TWOp7vgKYJu11JRljguTHV57F4OR47nydNTO5AAAPXklEQVRhc5unUu/KTtyUacNfAycUfRLzgGcBRCTJ1/YLEdkoIi+ISB9fWxpQAKCqtUApkByC+oxpN+KiInhg7liKjlXy81XbnC6nw8ktKiM2Mpz+PWKdLqXTaDYkROQtEclp5DGzBcdOBCpUNcfXFAH0Bz5U1XOBj4D7W1u0iCwWkWwRyS4uLm7t4ca0a+MG9uDmi4fz0sb9vLr1oNPldCgut4f0Pt0I64CzBLdXza7rp6rT/Xj/+fjOInwOAxXAS77XL1DfFwGwHxgAFIpIBJDo27+xmpYASwCysrLsDiTT6Xz3kuG8l3uIu1ZsQYArzunrdEkdQq7bw39lpDhdRqcStMtNIhIGzKVBf4TWz2T2CnCRr2ka8Knv+SrgBt/z64B31GY+M11UZHgYjy08lyEp8dy0bCM/XLHZFipqxpHyaoo9VdYfEWD+DoGdJSKFwGRgtYisabB5KlCgqqffRnoX8HMR2UL9yKfv+9qfApJFJB+4A7jbn9qM6ej694jjxZvO5+aLh/HChkKufOR9PtlnS5825cR0HDaxX2A1e7npTFT1ZeDlJra9B0xqpH0v9QFyenslMMefeozpbCLDw/jB5SP5r4zefG/5Jq574iNuvSSdmy8eRoTdUXyKkyHRx+ZsCiT7W2ZMBzBhSE9evW0KV4/py4NvuZi35N/sO2wzxzaU6/aQEBNBaveueZ9NsFhIGNNBJMZG8vD8cTw8PxNXkYcrH3mfFzcU2qJFPq6iMkb0ScBurwosCwljOpiZmWm8dvsURvXtzvdf2Mwtz35CaUXXvvFOVcl1e6w/IggsJIzpgPr3iOPZxZP4weUjWJNTxIyH1/LhzpLmD+ykij1VlB6vsZFNQWAhYUwHFR4m9Tfdfed8YiLDWfin9dzz2naqa71OlxZyub5O63TrtA44CwljOrgx/ZNYfeuFzD9vIE/+axezHltH/iGP02WFVG6RzdkULBYSxnQCcVER3DP7HJYsGs+Bo8e5+vcf8LeP9nSZTm2X20OvblEkd+ucS906yULCmE7ksrNTWXP7VCYMSeanK7dx49JsSsqqnC4r6HLdZTY9eJBYSBjTyfTuHsNfvnYeP7tmFB/klzDjobW8u+OQ02UFjder5LttNbpgsZAwphMKCxO+fsEQXrnlQnp1i+brf/mY/7cyp1Mujbr/6HHKq+ssJILEQsKYTmxEagL/uPkCbrxwCH/9aC9X//4Dth0odbqsgDoxHceIVBvZFAwWEsZ0cjGR4fz06lH87cYJHDtew7WPruPJf+3E6+0cndqfD3+1M4lgsJAwpouYkp7CmtuncsnI3tzz2g6uf2o9B0uPO12W31xFHvolxtA9JtLpUjolCwljupAe8VE8cf147vvyOWwqOMqMh95n9ZaOvfqdy11m03EEkYWEMV2MiDDvvIGsvnUKg3vFc/MzG/n+85sp64CLGtXWeckvtuGvwWQhYUwXNaRXPCu+PZlbLxnOy58UcuXD77Nhb8da1GjvZxVU13otJILIQsKYLiwyPIw7LhvB8m9NxqvK3Cc/4sE3XdTWdYz5n1w2HUfQWUgYYzhvcP2iRjPH9uPht/OY8+RH7D1c7nRZzcp1exCB4b1t+Guw+LvG9RwR2SYiXhHJatC+UEQ2NXh4RSRTRBJOay8RkYd8x0SLyHIRyReR9SIy2L8fzRjTGt1jInlgXiaPLBhH/qEyrnz4fV7ILmjX8z/lucsY1DOO2Khwp0vptPw9k8gBZgNrGzaq6jJVzVTVTGARsFtVN6mq50S7b9te4CXfYTcCR1R1OPAgcJ+ftRlj2uBLY/vx+u1TGZ2WyA9WbOHmZzZytKLa6bIalev22P0RQeZXSKjqdlXNbWa3BcBzpzeKSAbQG3jf1zQTWOp7vgKYJrYOoTGOSEuK5Zn/nsRdM0byxjY3Vz3yAdsPHnO6rFNU1daxu6Tc+iOCLBR9EvOAZxtpnw8s18/PZdOAAgBVrQVKgeTG3lBEFotItohkFxcXB6FkY0x4mHDTRcN48abzqfV6+fLjH/Lmp26nyzppV3E5dV61eySCrNmQEJG3RCSnkcfMFhw7EahQ1ZxGNs+n8fBolqouUdUsVc1KSUlpy1sYY1po7IAkVt1yIcN7d2Px37J58l8720U/xck5m+xMIqgimttBVaf78f6NBoGIjAUiVHVDg+b9wACgUEQigETgsB+fbYwJkD7dY1i+eDJ3rtjMPa/tIO9QGb+aNZroCOc6jF1uDxFhwpBe8Y7V0BU0GxJtJSJhwFxgSiObF/DF8FgF3AB8BFwHvKPt4Z8rxhgAYqPC+cOCcQxP6cbDb+ex93A5T1w/3rHV4HKLyhjSK56oCBvJH0z+DoGdJSKFwGRgtYisabB5KlCgqrsaOXQuXwyJp4BkEckH7gDu9qc2Y0zgiQjfuzSD3y8Yx5bCUmY+uu7k+tKh5nJ7rD8iBPwd3fSyqvZX1WhV7aOqlzfY9p6qTmriuKGquuO0tkpVnaOqw1V1QhPhYoxpB64Z24/nvzWZ6lovsx9bxzs7QtuhXVFdy77PKqw/IgTsPM0Y0yZjBySx8pYLGNwrnhuXZvOn93eFrEM7z10GYHM2hYCFhDGmzfomxvLCtycz4+xUfrl6O3e/uJXq2uDP+/T5anQWEsFmIWGM8UtcVASPfuVcvnvJcJZnF3D9U+v5rDy4d2i73B6iI8IY2DMuqJ9jLCSMMQEQFiZ8/7IRPDw/k00FR7n20XXkuYPXoZ3rLmN4726Eh9mkDMFmIWGMCZiZmWksXzyJiuo6Zj/2Ie/lHgrK57iKPNZpHSIWEsaYgBo3sAcrb7mA/j3j+MZfPubP63YHtEO79HgNRccqbfhriFhIGGMCLi0plhXfnsz0s/rwv698yo9fzqEmQAsZ5dl0HCFlIWGMCYr46AieuH4837loGM/+Zx9ffeo/AZlyPNcXEul9bKGhULCQMMYETViY8MMZI3lg7lg27D3CtY+uI/9QmV/v6SryEB8VTlpSbICqNGdiIWGMCbrZ5/bn2cUT8VTWMuuxdbyf1/Yp/nN903HYcjOhYSFhjAmJ8YN6svKWC0hLiuVrf/6Yv360p03vk+cus/6IELKQMMaETP8ecay46XwuHpHC/1u5jZ/+o3Ud2iVlVRwur7bpOELIQsIYE1LdoiN4clEW35o6lL/9ey9f//PHlFbUtOhYl2/GWQuJ0LGQMMaEXHiY8KMrz+I3141h/e7DzHpsHbuKm+/QPjGyKSPVRjaFioWEMcYxc7MGsOybkzh6vIZrH13HuvySM+7vcnvoERdJikMLHXVFFhLGGEdNGNKTlTdfQGpiDF99+j8sW7+3yX1d7jIy+tjIplCykDDGOG5AzzhevOl8pqb34icv5/DzVduoPa1DW1Xr52yy6ThCyt/lS+eIyDYR8YpIVoP2hSKyqcHDKyKZvm0LRGSriGwRkddFpJevvaeIvCkieb4/e/j3oxljOpKEmEj+dMN5fPPCIfzlwz18/S8fU3r88w7tg6WVeKpqSbdO65Dy90wiB5gNrG3YqKrLVDVTVTOBRcBuVd0kIhHAw8DFqjoG2ALc4jvsbuBtVU0H3sbWuDamywkPE/7n6lHcO/scPtp5mNmPrWNPSTnweae13SMRWv6ucb1dVXOb2W0B8Jzvufge8VJ/UbE7cMC3bSaw1Pd8KXCtP7UZYzqu+RMG8rcbJ3K4vJprH1vHRzsPNxj+aiObQikUfRLzgGcBVLUGuAnYSn04jAKe8u3XR1UP+p4XAX1CUJsxpp2aPCyZlTdfQHJ8FIueWs9zHxfQp3s0SXFRTpfWpTQbEiLylojkNPKY2YJjJwIVqprjex1JfUiMA/pRf7npR6cfp/WTzzc5Ab2ILBaRbBHJLi5u+xwwxpj2bVByPC/ffAHnD+/F7pJyu4nOARHN7aCq0/14//n4ziJ8Mn3vuRNARJ7n874Ht4j0VdWDItIXaHJJK1VdAiwByMrKCtxqJsaYdqd7TCRP35DF0+t2Mzot0elyupygXW4SkTBgLp/3RwDsB0aJSIrv9aXAdt/zVcANvuc3ACuDVZsxpmOJCA9j8dRhnD+sl9OldDn+DoGdJSKFwGRgtYisabB5KlCgqrtONKjqAeB/gbUisoX6M4tf+zbfC1wqInnAdN9rY4wxDpJArj3rhKysLM3Ozna6DGOM6VBEZIOqZjW3n91xbYwxpkkWEsYYY5pkIWGMMaZJFhLGGGOaZCFhjDGmSRYSxhhjmtThh8CKSDHQ9ColZ9YLOPNSWF2LfR+fs+/iVPZ9nKozfB+DVDWluZ06fEj4Q0SyWzJOuKuw7+Nz9l2cyr6PU3Wl78MuNxljjGmShYQxxpgmdfWQWOJ0Ae2MfR+fs+/iVPZ9nKrLfB9duk/CGGPMmXX1MwljjDFn0GVDQkRmiEiuiOSLyN3NH9E5icgAEXlXRD4VkW0icpvTNbUHIhIuIp+IyD+drsVpIpIkIitEZIeIbBeRyU7X5BQR+Z7v/5McEXlWRGKcrinYumRIiEg48ChwBfXrbC8QkVHOVuWYWuD7qjoKmATc3IW/i4Zu4/MFsbq6h4HXVXUkMJYu+r2ISBpwK5ClqqOBcOpX3+zUumRIABOAfFXdparV1K+e1+ya3Z2Rqh5U1Y2+5x7qfwGkOVuVs0SkP3AV8Cena3GaiCRSv4DYUwCqWq2qR52tylERQKyIRABxwAGH6wm6rhoSaUBBg9eFdPFfjAAiMhgYB6x3thLHPQT8EPA6XUg7MAQoBv7su/z2JxGJd7ooJ6jqfuB+YB9wEChV1TecrSr4umpImNOISDfgReB2VT3mdD1OEZGrgUOqusHpWtqJCOBc4HFVHQeUA12yD09EelB/xWEI0A+IF5Hrna0q+LpqSOwHBjR43d/X1iWJSCT1AbFMVV9yuh6HXQB8SUT2UH8Z8hIR+buzJTmqEChU1RNnlyuoD42uaDqwW1WLVbUGeAk43+Gagq6rhsTHQLqIDBGRKOo7n1Y5XJMjRESov968XVUfcLoep6nqj1S1v6oOpv7vxTuq2un/tdgUVS0CCkRkhK9pGvCpgyU5aR8wSUTifP/fTKMLdOJHOF2AE1S1VkRuAdZQP0LhaVXd5nBZTrkAWARsFZFNvrYfq+qrDtZk2pfvAst8/6DaBXzd4XocoarrRWQFsJH6UYGf0AXuvLY7ro0xxjSpq15uMsYY0wIWEsYYY5pkIWGMMaZJFhLGGGOaZCFhjDGmSRYSxhhjmmQhYYwxpkkWEsYYY5r0/wGKtE2Kfdh95gAAAABJRU5ErkJggg==\n", + "text/plain": "
" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ] + } + }, + "891e2bdcc12d4314affa4fd372ed7ade": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "89880b2c3e03469da53b8a7e9e2e930b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "8991ca296f464086aab8e12cc644430c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget011" + } + }, + "89ae5379ee8b4e2d92f116a018b9420e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_c2eca071d21942c98a47aaf881130883", + "IPY_MODEL_a6a4d48baea44d659e3b2dd7e54fcd17" + ], + "layout": "IPY_MODEL_3044da8a1f89485398f1ea9d4965bc55" + } + }, + "8ae2c037e98f420486a61a8570daf106": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "8b14eeb5b78e4e4cb98441ffaeccf4fb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget002" + } + }, + "8c168f5c8ecc4d0ba203b60193856d1c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_b9ad33908a4f4a6ba687c820c123c37a", + "style": "IPY_MODEL_094d34956035446984a6cb8a6efc22a7", + "value": "1e-07" + } + }, + "8c27b4b759354d64b25bcb3462c444ef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_options_labels": [ + "AC", + "DDPG", + "PG", + "PPO", + "SAC", + "TD3", + "TRPO" + ], + "description": "Algorithms:", + "index": 0, + "layout": "IPY_MODEL_b5ac8df291f9438bacc64a6cb2805620", + "style": "IPY_MODEL_45850b0512424834a6d4c70e60892ae8" + } + }, + "8c59866961674911b2157bded443e366": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_fc69d16aa7e547b09859e2ca7dbfbde8", + "IPY_MODEL_6caef128e4df40ebb76ef90ad9a40d41" + ], + "layout": "IPY_MODEL_00663174be1342fbbd29bc99cdd6d3aa" + } + }, + "8ca1f8992583484a8a0ff2f7f46afee2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_76c7ceb7a42e44048e694b71f27f56eb", + "style": "IPY_MODEL_97b119b9f8fc4a5f80b7f35b2fbc20dd", + "value": "Input(shape=(None, 3), name='input_layer')" + } + }, + "8d025735275c4dfdbbbf2d491e727c08": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "layout": "IPY_MODEL_ce5b912531614dfe90ee3e20fa7ba467", + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAD8CAYAAABpcuN4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd81fW9+PHXO5uEkJCQhAz23hAiiANBUXEiWHGPum613ttbtdba3229HVZba/WqtUWte1WFgsUFCOICDSHskYCMnJMJWWQn5/P743yjB8g44ZycQd7Px+M88j3f+eabw3nn+5lijEEppZRqT4i/A1BKKRXYNFEopZTqkCYKpZRSHdJEoZRSqkOaKJRSSnVIE4VSSqkOaaJQSinVIU0USimlOqSJQimlVIfCPDlYRK4AHgTGANOMMdku2yYCfwf6AA7gFGNMvYhcDTwAGMAOXGeMKRORBOAtYDCwD1hojCnvLIZ+/fqZwYMHe/LPUEqpHmfDhg1lxpgkd/YVT4bwEJExOJPA34F7WxOFiIQBOcD1xphNIpIIVACCMzmMtZLDH4FaY8yD1vJhY8zDInI/0NcY8/POYsjKyjLZ2dmd7aaUUsqFiGwwxmS5s69HRU/GmB3GmF1tbDoP2GyM2WTtd8gY04IzUQgQIyKC82nDbh0zD3jJWn4JuMyT2JRSSnlHd9VRjASMiHwkIjkich+AMaYJuAPYgvVkATxvHZNijCm0louAlG6KTSmlVBd0mihEZKWIbG3jNa+Dw8KAM4BrrZ/zReQcEQnHmSimAGnAZuAXxx5snOVh7ZaJicjtIpItItmlpaWd/ROUUkp5oNPKbGPMnBM4bwGw1hhTBiAi7wOZQJV1zj3W+n8C91vHFItIqjGmUERSgZIOYloELAJnHcUJxKeUUspN3VX09BEwQUSirYrts4DtgA0YKyKtNe3nAjus5WXAjdbyjcDSbopNKaVUF3jaPHY+8CSQBCwXkVxjzPnGmHIReQz4BmcR0vvGmOXWMf8LrBWRJmA/cJN1uoeBf4rILdb6hZ7EppRSyjs8ah4bCLR5rFJKdZ3PmscqpZTyvZqGZn777+3sK6vxyfU0USilVJBZuaOY5z//ltIjDT65niYKpZQKMktz7aTH92LqwL4+uZ4mCqWUCiKHaxpZu7uUiyelEhIiPrmmJgqllAoi728ppNlhmDcp3WfX1EShlFJBZFmunRHJvRmTGuuza2qiUEqpIGGrqOPrfYeZNzkN57iqvqGJQimlgsR7m5yDbV/qw2In0EShlFJBY2munSkD4xmYGO3T62qiUEqpIJBXXM2OwirmTUrz+bU1USilVBBYtslOiMBFEzVRKKWUOoYxhqW5dk4f3o+k2EifX18ThVJKBbjcgxUcOFzLpX4odgJNFEopFfCW5tqJCAvh/PH9/XJ9TRRKKRXAmlsc/HtzIeeMTqZPVLhfYtBEoZRSAeyrvYcoO9LAvMn+KXYCTRRKKRXQlubaiY0MY9aoZL/FoIlCqSBXXd/EVlulv8NQ3aC+qYUPtxYxd3x/osJD/RaHJgqlgtyvl27j0qc+Z93eQ/4ORXnZ6p0lHGloZt5k3w7ZcSxNFEoFsSMNzby/tRCHgf96YyNlPprxTPnGsk12+vWOZMawRL/GoYlCqSD2/pZC6pscPDR/AhV1Tfz0rVwcDuPvsJQXVNU3sWpnCZdMSiXURxMUtUcThVJBbHFOAUP7xXD1tAH8+pKxfJZXxjOf7vF3WMoLPtpaRGOzw2+d7FxpolAqSB08XMu6vYdZkJmOiHDNtIFcPDGVx1bs5utvD/s7POWhZZvsDEyIZvKAeH+HoolCqWD1r402AC6b4qzoFBH+sGACA/r24r/e2MjhmkZ/hqc8UFJdzxf5ZT6foKg9miiUCkLGGBZvtDFjaCIZfb+fmyA2KpynrsnkcE0jd/9T6yuC1fLNzgYK/uxk50oThVJBKOdABd+W1bAg8/hmk+PT4/ifi8ewZlcpiz7b64folKeW5toZm9qH4cm+mxe7I5oolApCi3MK6BUeygUTUtvcft2pg7hwQn/+9NEuNuzX+opgsv9QDbkHKwLmaQI0USgVdOqbWnhvk5254/vTOzKszX1EhIcvn0h6fC/+8/WNlGt9RdBYluucF/uSAGjt1EoThVJB5pOdJVTVN3N5ZkaH+/WJCufpazIpO9LIvW9vwhitrwh0xhiWbrIzbUgCafG9/B3OdzRRKBVk3t1QQP8+UW711p2QEccDF45m1c4SnvvsWx9Epzyxo7Ca/JIjAdF3wpUmCqWCSNmRBtbsLmV+ZrrbvXVvPG0wc8f155EPd5JzoLybI1SeWLrJRliIcGE7dU/+oolCqSCyNNdOi8OwYIr7g8SJCI/8YCL946L4z9c3Ulnb1I0RqhPlcBjey7Uzc2QSCTER/g7nKB4lChG5QkS2iYhDRLJc1l8rIrkuL4eITLa2TRWRLSKSLyL/J1ZvEhFJEJEVIpJn/ezr2T9NqZPP4pwCJmbEMSKla80m43o5+1eUVNdz7ztaXxGIsveXY6+sD6jWTq08faLYCiwA1rquNMa8ZoyZbIyZDFwPfGuMybU2PwPcBoywXnOt9fcDq4wxI4BV1nullGVnURXb7FWdVmK3Z/KAeH4+dzQrthfzwhf7vBuc8tjSXBu9wkOZMybF36Ecx6NEYYzZYYzZ1cluVwNvAohIKtDHGLPOOP+keRm4zNpvHvCStfySy3qlFLA4x0Z4qHjUbPKWM4YwZ0wKf/hgB5sOVngxOuWJxmYHy7cUcu7YFGLaafLsT76oo7gSeMNaTgcKXLYVWOsAUowxhdZyERB4aVUpP2lucbBko43Zo5I9Kr8WER69YiLJsVH8+PUcKutO3voKYwxrd5fyo1c2sGyT3d/hdOjz/FIqapsCstgJoNPUJSIrgf5tbPqlMWZpJ8dOB2qNMVu7EpQxxohIu4WoInI7cDvAwIEDu3JqpYLS5/lllFY3sOAEi51cxUdH8OQ1U1j4t6/4+Tubeea6zIAYeM5bGpsdLNtk57nP9rKzqJoQgU93lzIxPY7B/WL8HV6blubaiY8O58wRSf4OpU2dPlEYY+YYY8a38eowSViu4vunCQAb4PpJz7DWARRbRVOtRVQlHcS0yBiTZYzJSkoKzBurlDctzrERHx3O2aOTvXK+zIF9uW/uKD7cVsTLX+33yjn9raK2kadX53PGI59YHQzhTz+YyJp7ZxMeKtzz9iZaAnCQxNrGZlZsL+bCCalEhAVmQ9RuKwwTkRBgIXBm6zpjTKGIVInIqcB64AbgSWvzMuBG4GHrpzuJSKmTXlV9Ex9tK+LKUwZ49Yvk1jOGsm7vYX6/fAeZA/syISPOa+f2pf2HavjH59/yz+wC6ppaOHNEP/50xSRmjuj33ZPSby8bz0/ezGXR2r3cMWuYnyM+2sodJdQ2tgRcJztXnjaPnS8iBcAMYLmIfOSyeSZw0Bhz7PCVdwLPAfnAHuADa/3DwLkikgfMsd4r1eN9sKWQhmaHV4qdXIWECH++YhKJvSP48es5VNUHV33Fhv3l3PHqBmY/uobXvz7AhRNS+eAnZ/LKLdM5a2TSUcVpl05K48IJ/XlsxS52FFb5MerjLcu10b9PFNMGJ/g7lHZJsLenzsrKMtnZ2f4OQ6lus/BvX3GopoGVd5/VLXUJG/YfZuHf1zF3XH+eumZKQNdXtDgMH28r4tnP9pJzoIK4XuFcO30gN542mJQ+UR0ee7imkfP+spak2EiW/vj0gCjmKa9p5JTfr+TmM4bwwIVjfHptEdlgjMnqfE/tma1UQDtwqJav9x1mQWZGt32BTx2UwL3njWL5lkJeXX+gW67hqdrGZl76ch+zH13DHa/lUHakkf+9dBxf3n82980d3WmSAEiIieDhBRPYUVjFE6t2+yDqzn2wtYhmhwnoYifoxjoKpZTnFm8sQATmd2HIjhPxHzOH8tXeQzy0fAfnjU1x64vXF0qq6nnxy328tv4AlXVNZA6M5xcXjOa8cf3dHuvK1ZyxKSzMyuCZNXs4e3QKUwf5dwCIpbk2hiXFMC6tj1/j6Iw+USgVoIwxLM6xcdqwxG4fcjokRPjdvPG0OAx//LCzPrS+8ehHuzj9kU/426d7OG1YIu/ecRqL7zydCyaknlCSaPU/F48lNa4X9769idrGZi9G3DX2ijq+3neYeZPTA7q4DzRRKBWwNuwv58Dh2hMesqOrBiZG88MzBvNuTgFbCip9cs32fLmnjKdW53P+uP6svncWz1w31Wt//cdGhfPoFZP4tqyGhz/Y6ZVznoh/b7ZjDAFf7ASaKJQKWO/mFBAdEcr549rq79o97po9nMSYCH7z721+GziwqcXBr5ZuY0BCLx69YhKDEr3fSW7GsERuOWMIL3+1n8/ySr1+fncszbUzaUB8wHYCdKWJQqkAVN/Uwr83FzJ3fH+fjv0TGxXOPeeN4pt95Xywtchn13X1whffkl9yhAcvGUdUeGi3Xedn549ieHJvfvb2Zp8PZZJfcoRt9irmBcHTBGiiUKrLSqrq+eOHO6nuxn4HK7YXU13fzA98VOzk6spTBjC6fywPvb+D+qYWn167qLKeJ1bmcc7oZM7p5lFUo8JDeWzhJEqPNPC/y7Z167WOtWyTnRCBiycG1gRF7dFEoVQXfby9mL+u2cMtL2ZT19g9X6SLcwpIi4vi1KGdT3fqbaEhwv9cPJaC8jqfD0f++/d30OQw/PqScT653sSMeO6aPZzFG218uLWw8wO8wBjDslwbM4Ylkhwgrcs6o4lCqS6yV9QhAtn7D3P7K9k0NHs3WZRU17M2r4z5memEeNC6xxOnD+/HnDHJPL06n9LqBp9c88s9Zby3yc4dZw1jYGK0T64JcNfZw5mQHscDS7b65N+6uaCSfYdqmTepe5s8e5MmCqW6yFZRR0bfXjy8YCKf5ZVx1+sbaWpxeO38y1qnO/VDsZOrBy4cQ31TC4+t6P7msk0tDn5tVWD7eiym8NAQHls4iSMNzTywZEu3V+IvzbUTERrC+eN910jBU5oolOoie0UdaXG9WHjKAB68ZCwrthdzrxdHJn1nQwGTB8QzLKm3V853ooYm9eaGGYN585uDbLd37/hIL36xjzwfVGC3Z0RKLPedP4oV24t5Z0NB5wecoIraRt7bbGf26CTieoV323W8TROFUl1kr6gnva+zA9xNpw/hvrmjWJpr55de+Gt0u72KnUXVXJ4ZGMUSPzlnBHG9wvntv7d321/aRZX1PL5yt08qsDty8+lDmD4kgd+8t52C8lqvnru+qYVFa/cw84+rKTvSwLXTB3n1/N1NE4VSXdDc4qCoqp50l57Sd84azl2zh/PmNwf5jYdfqO/mFHg83ak3xUWH89M5I/lq7yFWbC/ulmv4ugK7PSEhwqNXTMJhDD97ezMOLzwhOhyGpbk2zvnzpzz0/k4yB/Xlg5+cycyRwTWPjiYKpbqguLqBFoc5bkiNe84byQ9PH8wLX+zjsRUnNuBcc4vD+aUyOoX46BOf7tTbrpk+kOHJvXno/R00NnuvLgb8V4HdngEJ0fzqkrF8tfcQL321z6NzfbXnEPOe/oKfvJlLXK9wXrt1Oi/+cBqj+wf2uE5t0UShVBfYyusAjnqiAOdc1L+6eCxXnTKAJz/J569r8rt87s/yyig70siCACl2ahUeGsIvLxrDvkO1vPzVPq+d158V2B1ZmDWAs0cn8/AHO8kvOdLl4/OKq7nlxW+4+tl1HDrSwGMLJ/Hv/zyD04f364ZofUMThVJdYK9wJoq2BukTEX4/fwKXTkrjjx/u4qUv93Xp3O/kFJAQE8GsUd6Z7tSbZo9KZubIJJ5YlcfhmkavnLO1AvvXF/unArs9IsLDl08gOiKUe97eRLObLdpKqur5xeLNnP/4Wr7+9jA/nzuaT+6dxYLMDL81c/YWTRRKdYGtou0nilahIcKfF07i3LEp/HrZNv6ZfdCt81bWNbFiezGXTkoLiAl12vL/LhpDbWMLj6/0fC6H4qrvK7DnjPVfBXZ7kmOj+N1lE9h0sIJn1uzpcN+ahmb+smI3sx5dwzsbCrjxtMF8et9s7pg1LKASoCcC8xOpVICyVdSREBNBr4j2vwDCQ0N46popnDmiH/e/u5n3Ntk7Pe/yzYU0Njt8NlLsiRiZEss10wby2voD5BVXe3Su3y8PjArsjlw0MZVLJ6XxxKo8ttqOH023ucXB6+sPMOvRNTyxKo/Zo5JZefdZ/PqScSTEBE4dkzdoolCqC+wVdaTFdz7sQmRYKIuuzyJrUAI/fSuXlZ20GFqcU8CI5N6MTw/sis6fnjuS6IhQfrd8xwmf48s9ZSwLoArsjvxm3jgSe0dw9z9zvxv3yhjDqh3FzH3iMx5YsoVBCdEsvvM0nr42s1tGug0EmiiU6gJbeV27xU7H6hURyvM3ZTE2rQ93vp7D53llbe63r6yG7P3lXD61+6Y79ZaEmAh+cs4IPt1dyupdJV0+PlArsNsTHx3BI5dPZHfxER5bsZvNBRVc/ew6bnkpmxaH4W/XTeXtH80gc6B/Z8rrbpoolHKTMcZ6onB/trnYqHBevnkaQ/vFcNvL2WTvO3zcPos32hCByyYHVmun9twwYzCDE6OdxUddHLokUCuwOzJrVDLXTh/Is5/t5dKnviCv+Ai/mTeOj386k7nj+wd8cvcGTRRKuamqrpmaxha3nyhaxUdH8Mot00mNi+KHL3xz1OxxDodhcU4BZwzvR/+44BhJNCIshAcuHEN+yRFeX3/A7eMCvQK7Iw9cOIazRyXz49nDWPOzWdwwYzDhoT3n67Pn/EuV8lBBhXNYh64mCoCk2EhevXU6fXqFc/0/1rOryFkZ/M2+wxSU1wV0JXZbzh2bwmnDEvnLyt1U1ro3L0cwVGC3JyYyjOdvOoWfnT+a2KjgGaPJWzRRKOUme0U90HYfCnekxffi9dumExEawnXPr+fbshoW59iIiQjlvHHB9Re2iPD/LhpLZV0TT6zK63T/YKrAVsfTRKGUm1o727UOCHgiBiXG8Nqt02lxGK59dh3LtxRy4YRUoiN8N92pt4xN68OVWQN4+at97C1tvwdzsFVgq+NpolDKTbaKOiLCQkj0sI38iJRYXr55GtUNzRxpaPb7vBOeuOe8UUSFh/LQ++03lw3GCmx1NE0USrnJVuFsGuuNVi7j0+N4/dZTufe8kUwfkuCF6PwjKTaSO2cPY+WOkjab/7ZWYJ8dhBXY6nuaKJRyU1f6ULhjQkYcd509IujHAbr59CFk9O3F75ZvP27ypu8rsMf6KTrlDZoolHKTu72ye5qo8FB+ccEYdhZV89Y3349t5VqBfbL2WO4pNFEo5YaG5hZKqhtIj9cWO225cEJ/Thnclz9/vIuq+iatwD7JaKJQyg1Fla1NY/WJoi0iwv9cPJZDNY08vTpfK7BPMsHXJk8pP+hseHEFEzPiuTwzgxc+30d4qGgF9knEoycKEblCRLaJiENEslzWXysiuS4vh4hMFpFoEVkuIjut4x52OSZSRN4SkXwRWS8igz2JTSlv+m5mOw/6UPQE980dRWiIaAX2ScbTJ4qtwALg764rjTGvAa8BiMgE4F/GmFwRiQYeNcasFpEIYJWIXGCM+QC4BSg3xgwXkauAR4ArPYxPKa9o7ZUdLOMx+UtKnyj+el0mLS1GK7BPIh4lCmPMDqCzduVXA29a+9cCq63lRhHJAVp7G80DHrSW3wGeEhExxhzd3k4pP7BX1JEcG0lkmJa3d2Z2AE7lqjzji8rsK4E3jl0pIvHAJcAqa1U6cBDAGNMMVAKJbZ1QRG4XkWwRyS4tLe2WoJVyZevi8OJKnUw6TRQislJEtrbxmufGsdOBWmPM1mPWh+FMHv9njNnb1aCNMYuMMVnGmKykpKSuHq5Ul9krvNvZTqlg0mnRkzFmjgfnv4o2niaARUCeMeZxl3U2YABQYCWSOOCQB9dWyiuMMdgq6rQFj+qxuq3oSURCgIVY9RMu63+HMwn89zGHLANutJZ/AHyi9RMqEByqaaSh2UGaVmSrHsrT5rHzRaQAmAEsF5GPXDbPBA66Fi2JSAbwS2AskGM1nb3V2vw8kCgi+cDdwP2exKaUt3w/vLj2ylY9k6etnpYAS9rZtgY49Zh1BUCbTaSMMfXAFZ7Eo1R3aO1Dob2yVU+lQ3go1Qntla16Ok0USnXCXlFPTEQocb163lzJSoEmCqU6ZauoJc1LExYpFYw0USjVCXtFvY7xpHo0TRRKdUJ7ZaueThOFUh2oa2zhcE2jVmSrHk0ThVIdsFdqiyelNFEo1YHv+1BoolA9lyYKpTrwfa9sTRSq59JEoVQHbBV1hAikxEb6OxSl/EYThVIdsFXU0b9PFGGh+l9F9Vz66VeqA/aKOi12Uj2eJgqlOqB9KJTSRKFUu1ochqLKek0UqsfrsYmi7EgD1fVN/g5DBbDS6gaaWoz2oVA9nkfzUQSzpz7J58Uv95Ee34vR/WMZZb1G9+/D0KQYwrXyssfT4cWVcuqxieKSSWkkxUayq6iaXUXVfLq7lGaHc+bV8FBhWFLv7xJHayJJjYvSEUR7EO1DoZRTj00UUwf1Zeqgvt+9b2x2sKf0CLuKqtlZVM2uoiq++fYwS3Pt3+3TJyrsuyePUS4JpE+UzlNwMmp9okjVubJVD9djE8WxIsJCGJPahzGpfY5aX1nbxK5iZ+LYaT19LN1op7rhwHf7DOkXwxnD+3HmiH7MGJZIrCaOk4K9oo4+UWH6+1Q9niaKTsRFhzNtSALThiR8t84Yg72ynp2FzuSxYX8572wo4JV1+wkNEaYMiOfMEUmcObIfE9PjtLNWkLKV15HeN9rfYSjld5ooToCIkB7fi/T4XpwzJgWAhuYWcvZX8Hl+KZ/llfH4qt38ZeVu+kSFcdqwfpw5sh8zRyQxIEG/eIKFraKODK2fUEoThbdEhoUyY1giM4Yl8rPzobymkS/2lPHZ7jI+yyvlw21FAAxKjObMEf04c0QSM4Ylav1GALNX1DHd5UlSqZ5KE0U36RsTwcUT07h4YhrGGPaW1fDZ7lI+zy9jSY6NV9cdIDREmDwg/rvEMXlAPKEh2qoqEFTXN1FV36yd7ZRCE4VPiDib2w5L6s1Npw+hsdnBxgPlfJ5fxtq8Mp5YlcfjK/O4bHIaj181xd/hKpzzZIPOQ6EUaKLwi4iwEKYPTWT60ETuOW8UFbWN/OH9nbyTU8AvLxpLkg5p7Xfah0Kp72lznAAQHx3BbTOH0OIwLNtk7/wA1e0KtFe2Ut/RRBEghifHMjEjjsU5Bf4OReF8oggPFZJ669OdUpooAsiCKelss1exq6ja36H0eLbyOlLjehGijQuU0kQRSC6ZlEZYiLB4oz5V+Ju9oo60eB26QynQRBFQEntHMmtUEv/aaKPFGqBQ+Ye9oo70eO0cqRRoogg4CzIzKK5q4Ms9Zf4OpcdqanFQVFVPuj5RKAV4mChE5AoR2SYiDhHJcll/rYjkurwcIjL5mGOXichWl/cJIrJCRPKsn33pgc4enUyfqDCW5Nj8HUqPVVxVj8No01ilWnn6RLEVWACsdV1pjHnNGDPZGDMZuB741hiT27pdRBYAR4451/3AKmPMCGCV9b7HiQoP5aKJaXywtYiahmZ/h9Mj2cqdTWO1s51STh4lCmPMDmPMrk52uxp4s/WNiPQG7gZ+d8x+84CXrOWXgMs8iS2YXZ6ZTl1TCx9uLfJ3KD2SvVIThVKufFFHcSXwhsv73wJ/BmqP2S/FGFNoLRcBKT6ILSBNHdSXgQnR2vrJT1qH79DOdko5dZooRGSliGxt4zXPjWOnA7XGmK3W+8nAMGPMko6OM8YYoN1mPyJyu4hki0h2aWlpZ2EEHRFhQWY6X+45RKH1163ynYLyOhJjIogKD/V3KEoFhE4ThTFmjjFmfBuvpW6c/yqOfpqYAWSJyD7gc2CkiKyxthWLSCqA9bOkg5gWGWOyjDFZSUlJboQRfOZPSccY+NdGHdLD1+wVdVqRrZSLbit6EpEQYCEu9RPGmGeMMWnGmMHAGcBuY8wsa/My4EZr+UbAnUR00hqUGEPWoL4szinA+YClfMVWUUdanCYKpVp52jx2vogU4HxSWC4iH7lsngkcNMbsdfN0DwPnikgeMMd636MtyMwgr+QIW21V/g6lxzDGWL2yNVEo1crTVk9LjDEZxphIY0yKMeZ8l21rjDGndnDsPmPMeJf3h4wx5xhjRljFXYc9ie1kcNGEVCLCQnxeqe1wGH7+zmY+2Vns0+sGgsq6JmobW7ToSSkX2jM7gMVFhzNnTDLLcu00tTh8dt0lG228lX2Q37y3nWYfXjcQFJS3Di+uvbKVaqWJIsAtmJLBoZpG1u72Teuu+qYW/vzxLvpGh7PvUC3LtxR2ftBJpHXCIi16Uup7migC3FmjkkiIiWCxj4b0eOGLfdgr63n6mkxGpvTm6dX5OHrQAIU2nbBIqeNooghw4aEhXDopjRU7iqmsa+rWax2uaeSvq/M5Z3Qypw3vx49nD2d38RE+3t5z6irsFXVEhYeQEBPh71CUChiaKILAgsx0GpsdvN/NxUBPfpJHTWMz918wGnBWpg9KjObp1fk9pomuvaKetPheiOiERUq10kQRBCakxzE8uXe3TpO6/1ANr67bz5WnDGBESiwAYaEh3DlrGFtslXzqozoSfyuoqNNiJ6WOoYkiCLQO6fHNvnIOHDp2iCzv+ONHuwgLCeGnc0YetX7+lAzS4qJ46pOe8VRh1852Sh1HE0WQuGxyOiLOpqvetvFAOcs3F3LbzKEk9zm6WWhEWAj/cdYwsveXs/7bk7trS0NzC6XVDdqHQqljaKIIEmnxvZgxNJHFG707pIcxhj+8v5N+vSO5febQNve58pQB9OsdydOr87123UBUaI0aq01jlTqaJoogsiAzg/2Hask5UO61c67YXszX+w7z33NG0DsyrM19osJDufXMIXyWV0buwQqvXTvQ2LVprFJt0kQRROaO70+v8FCv9alobnHw8Ic7GZoUw1WnDOhw3+tOHURcr3Ce+uTkfaoo0EShVJs0UQSR3pFhnD8uhfc22WlobvH4fG9+c5C9pTXcP3eTXfe3AAAS6ElEQVQ0YaEdfxR6R4bxw9MHs3JHMTsKT85BCu0VdYhA/zgdvkMpV5oogsyCzAyq6pv5ZEe703W45UhDM4+v3M20wQmcO9a9yQRvOm0wvSPDTtq6CntFHcmxkUSE6X8LpVzp/4ggc/rwfiTHRvKuh8VPi9bupexIIw9cNMbtzmXx0RFcd+oglm8pZE/pEY+uH4hsOry4Um3SRBFkQkOEy6aks2ZXCYdrGk/oHMVV9Ty7di8XTUxl8oD4Lh1765lDiAwL4Zk1e07o2oHMXlGv9RNKtUETRRBakJlOs8Pw3qYTmyb18ZW7aXY4uO/8UV0+tl/vSK46ZSD/2mjj4OHu6fznDw6Hwaa9spVqkyaKIDS6fx/GpvY5oSE9dhdX89Y3B7nu1EEMSow5oev/x1lDEYG/rz15nioO1TTS2OzQoiel2qCJIkgtyExnU0El+SVdqyt45IOdxESG8V9njzjha6fG9eIHUzP4Z3YBJVX1J3yeQKJ9KJRqnyaKIHXp5DRCBJZ0YZrUr/YcYtXOEu6cNZy+Hg6jfcdZw2lxGJ79zN0p0QObTScsUqpdmiiCVHJsFDNHJrEkx+bWxEIOh+EPH+wgLS6KH54+2OPrD0yM5tJJaby67sAJV6oHku+eKHScJ6WOo4kiiC3IzMBeWc+6bw91uu97m+1sLqjknvNGERUe6pXr3zlrGPXNLbzwxbdeOZ8/FZTX0TsyjD5RbQ9jolRPpokiiJ03NoXYyDCWdNKnoqG5hT99tIuxqX2YPyXda9cfkRLL3HH9efHLfVTVd+/se93NXlFHWnyUTlikVBs0UQSxqPBQLpjQn/e3FFLX2P6QHq98tZ+C8joeuHAMISHe/SL88ezhVNc388pX+716Xl+zV2rTWKXao4kiyC3IzKCmsYWPtxe1ub2ytoknP8ln5sgkzhjRz+vXH58ex+xRSTz32V5qG5u9fn5fsZVrr2yl2qOJIshNG5xAenyvdof0eHpNPlX1TfzCmge7O9x19nDKa5t4ff2BbrtGd6ptbKa8tkkrspVqhyaKIBcSIsyfks7neaXH9Wk4eLiWF7/Yx+WZGYxJ7dNtMUwdlMCMoYksWruX+ibPR7X1Ne1DoVTHNFGcBOZnpuMwsDT36CE9/vzxLkTgnvNGtnOk99x19nBKqht4Z0PXe4v7m01ntlOqQ5ooTgLDknozeUA877oM6bGloJJ/5dq55YwhpMZ1/xfgacMSmTIwnr99uoemFke3X8+b9IlCqY5pojhJXJ6Zzs6iarbbqzDG8ND7O0iIieBHs4b55Poiwl2zh1NQXnfck02gs5XXERoiJMdG+jsUpQKSJoqTxMUT0wgPFZZsLGDNrlK+2nuI/zp7OH2iwn0Ww9mjkxmT2oe/rsmnxY3e4oHCXlFH/z5Rnc7yp1RPpf8zThJ9YyKYPSqZf+Xa+cMHOxicGM010wf5NIbWp4q9pTV8sLXQp9f2hA4vrlTHNFGcRBZkZlBa3cDu4iP8fO5ov0zpOXd8f4YlxfDUJ/kYExxPFTarV7ZSqm0efZOIyBUisk1EHCKS5bL+WhHJdXk5RGSytS1CRBaJyG4R2Skil1vrI0XkLRHJF5H1IjLYk9h6otmjk+gbHU7mwHjmju/vlxhCQ4Q7Zw1nZ1E1qzyc19sXWhyGosp67UOhVAc8/ZNzK7AAWOu60hjzmjFmsjFmMnA98K0xJtfa/EugxBgzEhgLfGqtvwUoN8YMB/4CPOJhbD1OZFgo79xxGotuyPLrmEWXTk5jQEIvnlod+E8VJdX1NDuMNo1VqgMeJQpjzA5jzK5OdrsaeNPl/c3AH6zjHcaYMmv9POAla/kd4BzREdq6bFhSb/r19m/rnfDQEH501jByD1bwRX7nI9v6k13noVCqU74oxL4SeANAROKtdb8VkRwReVtEUqx16cBBAGNMM1AJJLZ1QhG5XUSyRSS7tLS0e6NXJ+QHUzNI6RPJU6vz/B1Kh1o722VoolCqXZ0mChFZKSJb23jNc+PY6UCtMWartSoMyAC+NMZkAl8Bj3Y1aGPMImNMljEmKykpqauHKx+IDAvl9pnDWLf3MF/ml3V+gJ/YyvWJQqnOdJoojDFzjDHj23gtdeP8V2E9TVgOAbXAYuv920CmtWwDBgCISBgQZ+2vgtQ10wYyKDGaH726ga22Sn+H0yZ7RR3x0eHEROqERUq1p9uKnkQkBFiIS/2EcdZsvgfMsladA2y3lpcBN1rLPwA+MYFeE6o61CsilNdunU5sVDjXPree7fYqf4d0HFtFHWk+GOJEqWDmafPY+SJSAMwAlovIRy6bZwIHjTF7jzns58CDIrIZZ4uoe6z1zwOJIpIP3A3c70lsKjBk9I3mzdtPJSYilOueX8+uomp/h3QU58x2miiU6oinrZ6WGGMyjDGRxpgUY8z5LtvWGGNObeOY/caYmcaYicaYc4wxB6z19caYK4wxw40x09pIMCpIDUiI5vXbTiU8VLj2uXXklwROsrBV1JGhfSiU6pD2zFY+MbhfDG/cdioiwtXPrmdP6RF/h0RVfRPV9c3aK1upTmiiUD4zNKk3b9w2HWMM1zy7jn1lNX6N5/vhxaP9GodSgU4ThfKp4cmxvHbrqTS1OJPFwcO1fovl+6ax+kShVEc0USifG9U/lldvmU5tUwtXLVpHQbl/koVOWKSUezRRKL8Ym9aHV2+ZTnV9E9c8u57Cyjqfx2CrqCciNMTvQ54oFeg0USi/GZ8exyu3TKe8ppGrF62juKrep9e3VdSRGh9FSIgOKaZURzRRKL+aNCCeF2+eRml1A1c/u46Sat8lC7tOWKSUWzRRKL+bOqgvL948jaLKeq59dj1lRxp8cl3tbKeUezRRqIBwyuAE/nHTKRwsr+W659ZzuKaxW6/X1OKguKpeE4VSbtBEoQLGqUMTef7GU/i2rIbrnltPRW33JYuiynocRocXV8odmihUQDl9eD8W3ZBFfskRrn/+ayrrmrrlOjadsEgpt2miUAHnrJFJ/P36qewsquLGf3xNdb33k8V3fSh0nCelOqWJQgWk2aOT+eu1U9lqq+SmF77hSEOzV8/fmihS47RXtlKd0UShAta5Y1N48uop5B6s4OYXvqG20XvJwlZRR7/eEUSFh3rtnEqdrDRRqIB2wYRUnrhqMtn7D3P3W5twOLwzl5Wtol77UCjlJk0UKuBdPDGNX140lg+3FfGXlbu9ck5bea1WZCvlJk0UKijcfPpgrswawJOf5LM01+bRuYwx2PWJQim3aaJQQUFE+O1l45k2OIH73tlM7sGKEz5XRW0TdU0t+kShlJs0UaigEREWwjPXZZIUG8ntL2dTVHli40JpHwqlukYThQoqib0jef7GU6hpaOa2l7Opa2zp8jlaE4XOla2UezRRqKAzqn8s/3f1FLbaK7n3nU0Y07WWUN/PbKeJQil3aKJQQemcMSncP3c0yzcX8sSqvC4da6+oIyo8hL7R4d0UnVInlzB/B6DUibp95lB2Fx/h8ZV5jEiO5aKJqW4dZ690zkMhohMWKeUOfaJQQUtEeGjBeKYO6ss9b+eypaDSreNs5ToPhVJdoYlCBbXIsFD+fv1UEmMiue3lbErcmE7VVlGvFdlKdYEmChX0+vWO5Nkbsqiqb+K2l7Opb2q/JVR9UwtlRxpIi9NEoZS7NFGok8LYtD785crJbCqo5L53NrfbEqrQ6nuhRU9KuU8ThTppnD+uPz87fxTLNtl5enV+m/voPBRKdZ22elInlTtnDSOvuJpHP97N8ORY5o7vf9T21j4UOs6TUu7TJwp1UhERHr58IpMHxPPTt3LZZj+6JZStog4R6K8TFinlNk0U6qQTFR7KohumEh8dzm0vZVNS/X1LKHtFHSmxUYSH6kdfKXd59L9FRK4QkW0i4hCRLJf114pIrsvLISKTrW1Xi8gWEdksIh+KSD9rfYKIrBCRPOtnX8/+aaonS46N4tkbsiivbeI/XtnwXUsoW0UdafH6NKFUV3j6Z9VWYAGw1nWlMeY1Y8xkY8xk4HrgW2NMroiEAU8As40xE4HNwF3WYfcDq4wxI4BV1nulTtj49DgeWziJjQcqeGDxFmseijrS+0b7OzSlgopHicIYs8MYs6uT3a4G3rSWxXrFiHP8hD6A3do2D3jJWn4JuMyT2JQC51Sqd587ksUbbTzz6R7sFfX6RKFUF/mi1dOVOJMAxpgmEbkD2ALUAHnAj639UowxhdZyEZDig9hUD/CfZw9nd3E1f/zQ+TdNhrZ4UqpLOn2iEJGVIrK1jdc8N46dDtQaY7Za78OBO4ApQBrOoqdfHHuccfaWanfsaBG5XUSyRSS7tLS0szBUDyciPHrFJCZmxAHa2U6prur0icIYM8eD818FvOHyfrJ1zj0AIvJPvq+LKBaRVGNMoYikAiUdxLQIWASQlZXVtckIVI8UFR7Kszdk8cyaPUwbkuDvcJQKKt3WRlBEQoCFfF8/AWADxopIkvX+XGCHtbwMuNFavhFY2l2xqZ4ppU8UD146jtgonYdCqa7wtHnsfBEpAGYAy0XkI5fNM4GDxpi9rSuMMXbgf4G1IrIZ5xPGQ9bmh4FzRSQPmGO9V0op5WfS1WkkA01WVpbJzs72dxhKKRVURGSDMSar8z21Z7ZSSqlOaKJQSinVIU0USimlOqSJQimlVIc0USillOqQJgqllFIdCvrmsSJSCuw/wcP7AWVeDMcXNObuF2zxgsbsK8EWc0fxDjLGJLWz7ShBnyg8ISLZ7rYjDhQac/cLtnhBY/aVYIvZW/Fq0ZNSSqkOaaJQSinVoZ6eKBb5O4AToDF3v2CLFzRmXwm2mL0Sb4+uo1BKKdW5nv5EoZRSqhM9IlGIyFwR2SUi+SJyfxvbI0XkLWv7ehEZ7Psoj4pngIisFpHtIrJNRH7Sxj6zRKRSRHKt16/8EesxMe0TkS1WPMcN6StO/2fd580ikumPOK1YRrncu1wRqRKR/z5mH7/fYxH5h4iUiMhWl3UJIrJCRPKsn33bOfZGa588EbmxrX18GPOfRGSn9XtfIiLx7Rzb4WfIxzE/KCI2l9//he0c2+H3iw/jfcsl1n0iktvOsV2/x8aYk/oFhAJ7gKFABLAJGHvMPncCf7OWrwLe8nPMqUCmtRwL7G4j5lnAv/19f4+JaR/Qr4PtFwIfAAKcCqz3d8wun5EinO3KA+oe45zXJRPY6rLuj8D91vL9wCNtHJcA7LV+9rWW+/ox5vOAMGv5kbZiducz5OOYHwTudeOz0+H3i6/iPWb7n4Ffeese94QnimlAvjFmrzGmEeeMe8fO9z0PeMlafgc4R0TEhzEexRhTaIzJsZarcc4CmO6veLxoHvCycVoHxFvT3vrbOcAeY8yJdtzsNsaYtcDhY1a7fl5fAi5r49DzgRXGmMPGmHJgBTC32wJ10VbMxpiPjTHN1tt1QIYvYnFXO/fZHe58v3hdR/Fa310LOXoaao/0hESRDhx0eV/A8V+63+1jfZgrgUSfRNcJqxhsCrC+jc0zRGSTiHwgIuN8GljbDPCxiGwQkdvb2O7O78Ifjp3b3VWg3WOAFGNMobVcBKS0sU+g3muAm3E+Wbals8+Qr91lFZf9o50ivkC8z2cCxcaYvHa2d/ke94REEbREpDfwLvDfxpiqYzbn4CwqmQQ8CfzL1/G14QxjTCZwAfBjEZnp74A6IyIRwKXA221sDsR7fBTjLEsImqaLIvJLoBl4rZ1dAukz9AwwDOeUzYU4i3OCwdV0/DTR5XvcExKFDRjg8j7DWtfmPiISBsQBh3wSXTtEJBxnknjNGLP42O3GmCpjzBFr+X0gXET6+TjMY2OyWT9LgCU4H8tdufO78LULgBxjTPGxGwLxHluKW4vsrJ8lbewTcPdaRG4CLgautRLccdz4DPmMMabYGNNijHEAz7YTS0DdZ+v7awHwVnv7nMg97gmJ4htghIgMsf56vApYdsw+y4DWViE/AD5p74PsC1YZ4/PADmPMY+3s07+1HkVEpuH8XfotuYlIjIjEti7jrLzcesxuy4AbrNZPpwKVLkUo/tLuX1+Bdo9duH5ebwSWtrHPR8B5ItLXKjI5z1rnFyIyF7gPuNQYU9vOPu58hnzmmPqz+e3E4s73iy/NAXYaYwra2njC97i7a+cD4YWztc1unK0Tfmmt+w3ODy1AFM6ih3zga2Con+M9A2dxwmYg13pdCPwI+JG1z13ANpytLNYBp/k55qFWLJusuFrvs2vMAjxt/R62AFl+jjkG5xd/nMu6gLrHOJNYIdCEs/z7Fpz1Z6uAPGAlkGDtmwU853LszdZnOh/4oZ9jzsdZlt/6eW5tZZgGvN/RZ8iPMb9ifU434/zyTz02Zuv9cd8v/ojXWv9i6+fXZV+P77H2zFZKKdWhnlD0pJRSygOaKJRSSnVIE4VSSqkOaaJQSinVIU0USimlOqSJQimlVIc0USillOqQJgqllFId+v/Mln+PZVMKegAAAABJRU5ErkJggg==\n", + "text/plain": "
" + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ] + } + }, + "8d18e0fa10b94372a3edf64edb4814bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_8b14eeb5b78e4e4cb98441ffaeccf4fb", + "style": "IPY_MODEL_a89219097e994deb9caa9b27d8bd2866", + "value": "Adam" + } + }, + "8d80128792d44bf1a0467b7e86df0b54": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_09c74a8b5d1a43828034e148d2edfbfc", + "max": 510, + "min": -490, + "readout_format": ".0f", + "style": "IPY_MODEL_e318e3ad8e11430d840261e7eb1b540e", + "value": 10 + } + }, + "8efed772f09f4ea1a1dabf91598fd49a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "8f01f6cb90754bcb8b2e64809505291d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "8f5e2c19238240c38947f1a5d8e72792": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_413fd706b68148a099ed9af1a952ec6d", + "style": "IPY_MODEL_ff0e9f4940eb4b57bd99d96059b5e194", + "value": "Action space:" + } + }, + "8f90c0a8d78442cfa05aff9b006a94d6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget013" + } + }, + "8f9477722bb54e6185f07c7069ed73bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "8fd0788ed947457d8556dc976e0eda38": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "8ff956034aa047d0a8809922cbefa856": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget012", + "justify_content": "center" + } + }, + "90d52d8b63c342f087384246a76680d7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_f6f23b9ba55946d0aa626d62ba4bbdf5", + "style": "IPY_MODEL_3488ba4c7374447794395c4c315a1193", + "value": "Box(3,)" + } + }, + "91d86c9ddbfa4acdaf18e13d8adf3862": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_298f572cd2ec4a9ca5a6feafaf334040", + "style": "IPY_MODEL_de8a6e2e9cb447439055e987582fc63e", + "value": "Adam" + } + }, + "9384c24875c24e5b8be37d4c55e04820": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget008" + } + }, + "93fcd071ff834486b199ab26105f6901": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "946c2a2e7e8f4e36b0311e922520272f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "965b9a99694b4227a43121ae2e974290": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_bcb79998188240e99279f9cda7e004d9", + "style": "IPY_MODEL_02904d8bc2d442deb3da0b5e6e0363a9", + "value": "StochasticPolicyNetwork" + } + }, + "9689f9977c7f455282a9831bcd81905c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_59da397a7faa43c79c633dd523b6f07b", + "style": "IPY_MODEL_ec6b04eac2cd4e5a821244a954846a39", + "value": "Dense(n_units=1, tanh, in_channels='64', name='dense_2')" + } + }, + "9694a75a41e543a3b2642aee3572857d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_1202663af1bf4653bc967824c8574e1a", + "IPY_MODEL_e1d753092ae3420ead7a3086b9405f2a", + "IPY_MODEL_182107ee16aa4bfba497dd033e347d65", + "IPY_MODEL_6cb628f08ae2469db2ee42e38ca4de74", + "IPY_MODEL_885608d7df064c51ac0523ef9928e6b6", + "IPY_MODEL_22ff0e7129b04334b71044d77e3c9298", + "IPY_MODEL_43ca75c41e054155b5ad51e493b3b990", + "IPY_MODEL_84f7291061b34bfaaaec0711bd0cca56" + ], + "layout": "IPY_MODEL_3e9c9dcc814b47f8b2b392074c83d853" + } + }, + "96fc368f69794e5baa9433c3a31b1ec1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "9705108e9dd540fa8e02c1933e03eadd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "97b119b9f8fc4a5f80b7f35b2fbc20dd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "97f58376ed524fab85dde1ea5f67ee17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_8fd0788ed947457d8556dc976e0eda38", + "style": "IPY_MODEL_c480ff00167c4205a51065548cbea855", + "value": "StochasticPolicyNetwork" + } + }, + "98824ad5eda8475394e9fb13819502a9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "98eeb6cc7ac643ac882d54fab647de04": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget014" + } + }, + "98f2c9b34e884cada9e2eedac93e1912": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "99ac959475eb4f75b586ed6599b99113": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_7a7ebee6dcf34f36b1d55d2cb443e387", + "style": "IPY_MODEL_55abe6fb296b491ba2e2a09a492b5ae8", + "value": "Dense(n_units=64, relu, in_channels='3', name='hidden_layer1')" + } + }, + "9a247aedcd64492d9b4ddf9d76c13062": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget005" + } + }, + "9ac98c15de5a4548a99d80e8ea3004c9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_d9b467355fa940af8f164b0b53137582", + "style": "IPY_MODEL_351ae05c16d040dab9a578c06a78858c", + "value": "Environment Selector" + } + }, + "9b276e72efa44a7e911ee209d08859b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "9b5f3fd4ebd341ac91227f9ded9fab19": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "9c226167c8fb4cfab3a7161a87588ae1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "9ce0362f9fac4e45a87ebe7a085a24af": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_options_labels": [ + "default" + ], + "description": "state type:", + "index": 0, + "layout": "IPY_MODEL_a6379873f0434d53a6ad52553c164bdb", + "style": "IPY_MODEL_dceb338b27c742cd8733350448a2e798" + } + }, + "9dd1d4acaad44f16b1bbf0693ee9fad5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_7e128d275e3c4e88829167514cec3bc6", + "style": "IPY_MODEL_10b2a84971164564ac50d9f53bd98579", + "value": "Input(shape=(None, 3), name='input_layer')" + } + }, + "9dfcd5e4ec744ed4a0a9091bed5ed2d8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_88fc41c33c024f4eb22b13e0ea98e605", + "style": "IPY_MODEL_5caab83d7d4d4658ac739d02b56e9fd6", + "value": "render" + } + }, + "9e37b046f2d841dd9572b2284a729bf5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "description": "Progress", + "layout": "IPY_MODEL_33ecf71f75a649a285ea6a8211b5acbd", + "style": "IPY_MODEL_68fcf5652dd14e5fad220fcbe777ddbb", + "value": 18 + } + }, + "9ee876553e424052a509a2daed8da1c6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_cefe9c21582d46dc9471bee195b466b7", + "style": "IPY_MODEL_b04de6976e7d476fa4981293ded26bd6", + "value": "Dense(n_units=64, relu, in_channels='64', name='hidden_layer2')" + } + }, + "9fc5c513843a4c0fa7ae9c8b37c3b4ff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "9fd6a74ce4e54ae38816e55d19327281": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_68d4eab6f1cf4e2fa0e229ecdce8d392", + "style": "IPY_MODEL_bb0110f57f39444db2d635a30437c85d", + "value": "amsgrad" + } + }, + "a01f34500cfc486289f3334e3cd222df": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_7a4be7c4229640b18c29d60d30cc0e70", + "style": "IPY_MODEL_7d64c7c8f2dc4d4eb6218e55ae44bfbe", + "value": "Algorithm Selector" + } + }, + "a02320673c484c46848d7aeb6fda6e18": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "a0371ec3949944198211395dc7848ba6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_options_labels": [ + "Acrobot-v1", + "CartPole-v1", + "CartPole-v0", + "MountainCar-v0", + "MountainCarContinuous-v0", + "Pendulum-v0" + ], + "description": "env name:", + "index": 5, + "layout": "IPY_MODEL_45e906bdfe7a464d848f9c972f536d31", + "style": "IPY_MODEL_ad07aedb699c4a3da0110a187e381619" + } + }, + "a038c2e1def5473484b4d9bbc5393145": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_6923c73eeac747fdbe41b2062e257a58", + "style": "IPY_MODEL_93fcd071ff834486b199ab26105f6901", + "value": "save_interval" + } + }, + "a0b2c18704554c60bfb62c5c7ea46e34": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_40747ee3248e4cbca2b22e3201e7ae52", + "style": "IPY_MODEL_7f3f44cbaac94755810c0e589d048490", + "value": "ValueNetwork" + } + }, + "a18265de326b4d399e760f9d2e5bb238": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_76d1b335a0134c19852090005ae135c4", + "IPY_MODEL_dd631605869640d9b8564da50fd7f14e", + "IPY_MODEL_dd3cb8ec44e2454a9fd787b26a794aa2", + "IPY_MODEL_e2d50772ac80494ea293f047efb33527", + "IPY_MODEL_3c695e15ebbd4ecfb555b0fe5221ad10", + "IPY_MODEL_f401d82a291f4cdb9d44cf62f1c48978", + "IPY_MODEL_f8eb99b0291b45dda1b391805141e984", + "IPY_MODEL_1d03aaf95d45497ca74e337a82632cee", + "IPY_MODEL_ec1d469669a2411f9a5a7a1774480576", + "IPY_MODEL_2c48650276864e79a7b82413ddd8c6fa", + "IPY_MODEL_e923a0f829b14a6b83f8ef159b7e1e67", + "IPY_MODEL_ad74a56ab452440e86d1ff508a37e2fc", + "IPY_MODEL_a8c7fbd1b9e64ebebfc11f7da9dfbfd5", + "IPY_MODEL_1eec2203d3bf49c2876604c21291cc18" + ], + "layout": "IPY_MODEL_31fe17808d8e4f7ead5964af2e4f5894" + } + }, + "a23a881ee9034a33a8d23c63c65490c7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "a2bb633318304f79a811eb07e18da7f5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_fca1d8802f264b48aa3f7bef2b5f5b81", + "IPY_MODEL_27fbf57b093b4444b8990601eaddca26", + "IPY_MODEL_4b9184b437ac441e8c485894889e7fd4" + ], + "layout": "IPY_MODEL_1c09f9523eb2469ab864ddcd5f15f417" + } + }, + "a2bf112fa96c4e8aba14a96af2788dbc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "a32e41356969452abe56558608109dc8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "a496bd2aabab465fbcf0022dc1acd19f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_f72ef10c1acd44608d2db2b932f2b167", + "style": "IPY_MODEL_077609b632e64492acbc9a009222e086", + "value": "ValueNetwork" + } + }, + "a517b57a04ed49bf82a0820df4bcf3b2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "center" + } + }, + "a592a51f7f3d40cf81de06ff0c9e1546": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget004" + } + }, + "a5d8986e9aad47b1ba7821ddf2850c7a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "a6379873f0434d53a6ad52553c164bdb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "a6a4d48baea44d659e3b2dd7e54fcd17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_f9cd83ba01bb440b9510e0ada3cfd4aa", + "step": null, + "style": "IPY_MODEL_1a3aa6da2cad4cfd9696b32125ab645b", + "value": 200 + } + }, + "a7d002d3e5454965af1d9cdb2e54e7ca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_1b48b0f90cef4077aa20b9ee8be52e9b", + "style": "IPY_MODEL_3d9166fc4fcf43f3b930ebc7f996a5bf", + "value": "Adam" + } + }, + "a7d8b17ff9fd43298bc30e0471ade94f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "a860d9c958c646aa89ae598dc67eaa08": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "a89219097e994deb9caa9b27d8bd2866": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "a899edcecbcf49d1a1f57b48bed97865": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "a8c7fbd1b9e64ebebfc11f7da9dfbfd5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_ae3b1f8332bd40ab9ef5ec6dfd688123", + "style": "IPY_MODEL_6efa143c4b9d43aa94ed8cfe56824583", + "value": "epsilon" + } + }, + "a8e550f371f94677a29e238776be2cdb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "aafbebe0ec5b4425acf54f0ad9f6c80f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_49c009585e524d98af99d984cf65a85b", + "step": null, + "style": "IPY_MODEL_76dec90334724f3ba9e51ba05856ff79", + "value": 100 + } + }, + "ab2e3b3dc5024debb0c00c3d27d48a8b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "ac4da45cf7d84d5fa0ea8963afbe5c12": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget001" + } + }, + "ad07aedb699c4a3da0110a187e381619": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "ad34362a6d0b43edb782d9f50d666a41": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "ad74a56ab452440e86d1ff508a37e2fc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_379d32750a8c4e88b3b6a8d76c3ee91b", + "style": "IPY_MODEL_b1240a01113b4044b84ce15397d29251", + "value": "0.0" + } + }, + "ae1716b3153545b394ccc02357c0cecc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "ae3b1f8332bd40ab9ef5ec6dfd688123": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget013" + } + }, + "ae877e1e2a554a19b78fb9a12f60e5d3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "aeecfc3325ec482ebd31ced3fc2e6839": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "af4e53453b1a434e9426fd63d61888c5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_fca98009fe56433b97f1fd16969f9a35", + "IPY_MODEL_0c0d922d9ed14199ab9b8f48b9e8ba1d", + "IPY_MODEL_5bced3d11d4a41a4b3e1c712f83b98e4" + ], + "layout": "IPY_MODEL_7a6c0819e1344119aae9ef136830ad44" + } + }, + "afeba836a14d4fb6a7c5407794848b80": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget004" + } + }, + "b04b868ce504489c82bd8818501b3ac3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_8991ca296f464086aab8e12cc644430c", + "style": "IPY_MODEL_683e3afa65604f1b85604a79ec228a2b", + "value": "decay" + } + }, + "b04de6976e7d476fa4981293ded26bd6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "b04fb98f9bb24f24bfa2c883cb8bd2fc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_434eec441fb94a30bcb70bec50c60d78", + "IPY_MODEL_0b1a53d081f547f8ab913cd15fe70058", + "IPY_MODEL_0af6103ca9e44bb4a44c62b84b39415f", + "IPY_MODEL_0b1a53d081f547f8ab913cd15fe70058", + "IPY_MODEL_6f0bd8ffadf44461a70b1031b3f65064" + ], + "layout": "IPY_MODEL_452324b6d7cc4cf28d456787efc23b8f" + } + }, + "b106f6f6a7f047a4a11ec9f9a23804e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_options_labels": [ + "AC", + "DDPG", + "PG", + "PPO", + "SAC", + "TD3", + "TRPO" + ], + "description": "Algorithms:", + "index": 0, + "layout": "IPY_MODEL_eb5620a9d421450a9c0b629c52d3d8ba", + "style": "IPY_MODEL_1dbbcf0744194117b3463d5ae8af00ef" + } + }, + "b1240a01113b4044b84ce15397d29251": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "b18ac7a05b7c4d58813a3e735173a3ca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_a23a881ee9034a33a8d23c63c65490c7", + "style": "IPY_MODEL_014bf4270fea44b6aad4c80c7a5979b7", + "value": "Choose your environment" + } + }, + "b20aaab10e6a49138d9cf0a414321c49": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_c06d332516bf42b2b764cc7b6117aade", + "IPY_MODEL_891909eab8204a4bb78c9a468bc20112" + ], + "layout": "IPY_MODEL_ce069bda2c504adabddf4308b196d410" + } + }, + "b2ed3221465c4c7097b79683b8e5c5f0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget002", + "justify_content": "center" + } + }, + "b316a517fda34deba03047080e565a59": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget008" + } + }, + "b3a43d5f73df48299fdf24a855c623a7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "b4047180a5aa44479c358d8c12f0c5d5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_3025ff51115247eebfcfe7e2a18e414e", + "style": "IPY_MODEL_5f1fda7eb4ac4ce694f721e312e205ab", + "value": "0.0001" + } + }, + "b42c755dec514e6fa26ca97f3f0ef923": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_85d35dbed0594a3a837f536309af0b59", + "IPY_MODEL_0201bde3e922471d9bb86857be61df95" + ], + "layout": "IPY_MODEL_5efb085669c2400a909ac37b5cb4e45e" + } + }, + "b4d945e45eae41ceb40de345939615ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_42f8297b00d240308e7403a004a1c6b4", + "style": "IPY_MODEL_f48e72d8d0b5470798d5faeed3dc8e40", + "value": "learning_rate" + } + }, + "b50b99192c944a348df722c9f5cdaa90": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "b5214d589d704727964cdb67261b2d47": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_ad34362a6d0b43edb782d9f50d666a41", + "step": null, + "style": "IPY_MODEL_dca0afd22296462f8a0e11b82566f289", + "value": 0.9 + } + }, + "b58381d8050044ee9df6c0857e3a06e4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_template_areas": "\n \"net_label net_info\"\n \"opt_label opt_info\"\n " + } + }, + "b5ac8df291f9438bacc64a6cb2805620": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "b5bcfb7873f44eba8f8f90e018f09b6a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget012" + } + }, + "b5dd447dec9c48bc8b1bb664c9553912": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "b64d5e345cb5482595aa92662c8f162c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "b672fea2d3ac4732a92e992eaaef260e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget009" + } + }, + "b85dbc19731e4b84bb6122ea52367809": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "b92bc4065ee4473aa6e1b4051e044dee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_629ece3b43ac4c8a8c2f83733a180978", + "style": "IPY_MODEL_38f46c0b84c84233a228758c9b306a79", + "value": "amsgrad" + } + }, + "b9743661bbd24d94969c463e1f77d6e8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "b979276c5b584ebab1400eea707b2c39": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "b9ad33908a4f4a6ba687c820c123c37a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget014" + } + }, + "bb0110f57f39444db2d635a30437c85d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "bb04f52581bb496e9a6931ce291714c9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "bb5d38052b40427585a8ec928bdef7b5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_ca41ace6e197496b8d0e375f53b92729", + "IPY_MODEL_081136f1075542a3999ce83eba68fdb5" + ], + "layout": "IPY_MODEL_4a88a99c974d47da993c8bde3faab362" + } + }, + "bcb79998188240e99279f9cda7e004d9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "bd7afa2132154beebd89e4320ebcad26": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_b316a517fda34deba03047080e565a59", + "style": "IPY_MODEL_d21ecfeb69a54154ad0c0cadf69db4fa", + "value": "0.9" + } + }, + "bdb404863da84bdf870e550898f54848": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_7f94bb571172453a920e7bd6d7a9050f", + "IPY_MODEL_e2ecea0189544c41a0ca172743cf16a1" + ], + "layout": "IPY_MODEL_62a5e4f04f554e6580d63bb32f36b3be" + } + }, + "be4d4fbbc53d4705963f9b343aff399f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "bebb739676c74aacb396889de39592e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "bf3a856d0c5f4d47abf596f528a2d947": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "bf620c54949846b49135585c61101b19": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "bf7a578fb6204ce694235598a0f00ea2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget001" + } + }, + "bfa16a837ebd4ec795d5aa0a893d5298": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "bfdfc9d77a654743a9ebdfc08ab167da": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "bffd75c7e90346ebb8214c6fe0ce2ab4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_d9864398064d4a4ea93f2f985bf70bb5", + "style": "IPY_MODEL_835ef9a1125846679a65d679afb62013", + "value": "./model/AC-Pendulum-v0" + } + }, + "c06d332516bf42b2b764cc7b6117aade": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_d466ecd3ea76446fa72d90acf2d7c5ba", + "style": "IPY_MODEL_c726054bb59f40aab21ea2d4485ce77e", + "value": "Learning curve" + } + }, + "c083a4b8f36848ed9f277f423ae18084": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_8f90c0a8d78442cfa05aff9b006a94d6", + "style": "IPY_MODEL_d220d182817c44408e2df2a364760e43", + "value": "epsilon" + } + }, + "c096b60cb96b4aa68be8728e6feb2366": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "c12ffb6b4533460bbdfc7404ff89d807": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_842ea79123034275adec1df392a4846d", + "style": "IPY_MODEL_0cabfd585d5d4421a05805698bc1c8ad", + "value": "beta_2" + } + }, + "c2160078393b421d9f3a4343f37307e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_38484ea61c3449a1b809d8526ead582d", + "style": "IPY_MODEL_7ff9e3e9f09b40d398b6c898e5ee9653", + "value": "False" + } + }, + "c234ed19a3204e1d9452d6686e014efb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "c2aa94c81efc4f3f826adcb847fbdb89": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "description": "Progress", + "layout": "IPY_MODEL_5b87473fb6cc473a89998a285388f4da", + "max": 10, + "style": "IPY_MODEL_6f525160109d45299758550c08196bd9", + "value": 10 + } + }, + "c2eca071d21942c98a47aaf881130883": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_ae1716b3153545b394ccc02357c0cecc", + "max": 400, + "readout_format": ".0f", + "style": "IPY_MODEL_8f9477722bb54e6185f07c7069ed73bc", + "value": 200 + } + }, + "c3233dc4967548279ff54f73e91e27a0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "c34d5f3024f24951b4f478bca62dd7c7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_40c1e5560977460b86028ca09ee94662", + "style": "IPY_MODEL_e00c049b23f34848a62ee225b63ec0b7", + "value": "amsgrad" + } + }, + "c35cf89d5b4c42c886c9c83fdc93c8e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "c3c09aa3ecea45eda2b142c857c5d7c5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget011" + } + }, + "c3d17e5a575344968f8b84a174b26ba9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget004" + } + }, + "c3ef353dd171416da3dc55582107fa67": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_266e10703ed340a78b259c7d3ddc8836", + "IPY_MODEL_64750206fa3a48119aa85e75f5ff2de8" + ], + "layout": "IPY_MODEL_a517b57a04ed49bf82a0820df4bcf3b2" + } + }, + "c4662ffdadef4c7d82aba5ddca1fbfda": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "c480ff00167c4205a51065548cbea855": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "c60dc42b295c47138b76205df9071217": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_432a3a690b36409192aa3ee4dd5fedf8", + "IPY_MODEL_08f5684d8e194916ac04ed379e2bf022" + ], + "layout": "IPY_MODEL_48392da1f6c64d3fad859465d0d0095b" + } + }, + "c726054bb59f40aab21ea2d4485ce77e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "c75a9640bb26465785ca214520007519": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "c7a9f23b553e43a78d5c0ced37526327": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "c90e24c07a754360836c2acc6f3a7e22": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_9ac98c15de5a4548a99d80e8ea3004c9", + "IPY_MODEL_f10d3787733a4ece9120c3641017114b" + ], + "layout": "IPY_MODEL_6187b72c80f64272a6c33c90cb582c4c" + } + }, + "ca41ace6e197496b8d0e375f53b92729": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_516cc7132ca94faab3023ffcd1ed4cd4", + "IPY_MODEL_329f804132904f47a73d10b3ccba4b4d", + "IPY_MODEL_a0371ec3949944198211395dc7848ba6" + ], + "layout": "IPY_MODEL_9c226167c8fb4cfab3a7161a87588ae1" + } + }, + "ce069bda2c504adabddf4308b196d410": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "ce5b0166c393435a840819472b761b8c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "ce5b912531614dfe90ee3e20fa7ba467": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "height": "250px", + "width": "350px" + } + }, + "ce777268358f48608666122680449e3c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "ce96b4fa2ae14c6f8f4af830f9442000": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "cefe9c21582d46dc9471bee195b466b7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "cf3de6c59d124068af4aef37293c26e2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget003" + } + }, + "cfb6b6bcedad4f61893206fb1eb28385": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_283080f17fcf4286b2e6e059bcda3370", + "IPY_MODEL_04461564de8c45d6af4c6055f7b4c17f", + "IPY_MODEL_9dfcd5e4ec744ed4a0a9091bed5ed2d8", + "IPY_MODEL_334d1a726d2347db82e42df5760618b3", + "IPY_MODEL_6c751fa2c2aa415ea57d3c9b0e11b22d", + "IPY_MODEL_43730220bf8e489cae588fcf375d08cf", + "IPY_MODEL_a038c2e1def5473484b4d9bbc5393145", + "IPY_MODEL_7af9623e94c64555b01efa581f338e60", + "IPY_MODEL_389174ab87e24a48a23ad5f81a32da61", + "IPY_MODEL_4ee9cbafcaad44de9f9e7453ee765047", + "IPY_MODEL_3a3916bde1e849aeae0e2701258ddc34", + "IPY_MODEL_88aafdf648784ac7954ce933431f9a3a" + ], + "layout": "IPY_MODEL_19b0d8173d9141e0a0db8d0b2110c98c" + } + }, + "cfc4c351d9da4a2bbe36bb1288f74e82": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "d02f0cd6f8f94156ac86605286a6ee78": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "center" + } + }, + "d16d026731104f40ad77f1c7b8f77bf6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget009" + } + }, + "d1b7a611e0ea474991c6034e7e7a9e98": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "d1ba6fbf21674589b3f585f6e0f9638b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_79611f87c64c431794f17eccbbd60f38", + "style": "IPY_MODEL_a2bf112fa96c4e8aba14a96af2788dbc", + "value": "0.0" + } + }, + "d20f2266d6fc44df988c78b63b202a81": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget014" + } + }, + "d21ecfeb69a54154ad0c0cadf69db4fa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "d220d182817c44408e2df2a364760e43": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "d2ba7f491ec94768be174bba323aff6d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget009" + } + }, + "d34c7789bb974de1a36ef3cc45737b52": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget010" + } + }, + "d439f3de7aeb4f059483dedb8aca131a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "d466ecd3ea76446fa72d90acf2d7c5ba": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "d48e8464b37c4f0099d42e59369dbab6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_b672fea2d3ac4732a92e992eaaef260e", + "style": "IPY_MODEL_f834d6547a954a478d9e755653e4f5a1", + "value": "beta_2" + } + }, + "d4c91e304ca34f88a4c959ecc4683678": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "d5a3129aed5d47718c478523d35359ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "d6a04d9b77b54ae89af21fa5551e205e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_214c87e57eb641bb89644c9f465889ca", + "style": "IPY_MODEL_7a807eea55d14bae96d792b1e475adcb", + "value": "save_interval" + } + }, + "d6ddb43e654a421ead72beacfae7145e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_2dab24721ba34bd789afa55d1479464b", + "style": "IPY_MODEL_0a21d0f35913467a9b266a75d2af8db0", + "value": "Supported algorithms are shown below" + } + }, + "d915d378018e4bd085cf4a0a935e2aaa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_d16d026731104f40ad77f1c7b8f77bf6", + "style": "IPY_MODEL_7aba7921241e41af9a32cbe042699485", + "value": "test_episodes" + } + }, + "d91d58d65e864faa90c9cc7bfd2959b0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_182c5797541f4476bb02c95a710f1bca", + "step": null, + "style": "IPY_MODEL_6dc0399123f94dd1831a2b2cfb6c3078", + "value": 10 + } + }, + "d932e823fc31419d9d00cb89736f8a5f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_a0b2c18704554c60bfb62c5c7ea46e34", + "IPY_MODEL_f80bd1f80d99494595e88c9fc5f055d2" + ], + "layout": "IPY_MODEL_f3645a595f8c4e1f82d71ed6f97e7dd6" + } + }, + "d9864398064d4a4ea93f2f985bf70bb5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget004" + } + }, + "d99dceda8ae6483f8df298525d45be82": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "border": "solid" + } + }, + "d9b467355fa940af8f164b0b53137582": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "da04b8e9a4464f7ea141e41904fa3b0f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "da5536ed85464ee5a97c44660b985348": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "GridBoxModel", + "state": { + "children": [ + "IPY_MODEL_469da089cf804101a4cbc570975a1aed", + "IPY_MODEL_dc4226a0086147b29ba43f099ccad551", + "IPY_MODEL_7df23ef826fb4c568071b0667bafcd3b", + "IPY_MODEL_f5879b9ebaab4df9b53830cef8c25e62" + ], + "layout": "IPY_MODEL_de78a9211dba417182808fc83d0ebbf8" + } + }, + "da5694fd870b41e79f41ebc7d7b8db5e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget002" + } + }, + "dc12042cc1bb40c98a69bef90468797a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "dc4226a0086147b29ba43f099ccad551": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_f5c5c8e022aa4f239006a40e2ac8b990", + "IPY_MODEL_b5214d589d704727964cdb67261b2d47" + ], + "layout": "IPY_MODEL_b2ed3221465c4c7097b79683b8e5c5f0" + } + }, + "dca0afd22296462f8a0e11b82566f289": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "dceb338b27c742cd8733350448a2e798": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "dd3cb8ec44e2454a9fd787b26a794aa2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_39c394badc7246fdb12032649f71a1b6", + "style": "IPY_MODEL_ce96b4fa2ae14c6f8f4af830f9442000", + "value": "learning_rate" + } + }, + "dd51349042bc4341b061da02df9f8be2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget003" + } + }, + "dd631605869640d9b8564da50fd7f14e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_2dece16eb4994e5082a1cbeeea4163d0", + "style": "IPY_MODEL_d439f3de7aeb4f059483dedb8aca131a", + "value": "Adam" + } + }, + "ddaf2150308c4af2876f9f423d0b803d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "ddba268ea0db428898643ae0f9a259a3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "stretch", + "display": "flex", + "grid_area": "widget006", + "justify_content": "center" + } + }, + "de78a9211dba417182808fc83d0ebbf8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"", + "grid_template_columns": "repeat(2, 1fr)", + "grid_template_rows": "repeat(2, 1fr)" + } + }, + "de8a6e2e9cb447439055e987582fc63e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "def02ee29d9a44b19a1fd20f8a4be1a0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "df228d4f3b644bb081011555c9f36485": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget010" + } + }, + "df84370f89e949518569f900854e2510": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e00c049b23f34848a62ee225b63ec0b7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e09e0ff65ebf454b80a965aaa0f61d32": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_5526ed8ea7b4499eadc0bbb165d7bbc4", + "IPY_MODEL_d932e823fc31419d9d00cb89736f8a5f" + ], + "layout": "IPY_MODEL_54927f9f2cde4416bf0e3b782fbd5118" + } + }, + "e0a1f12f4f0e4e31adc281b1fe6dee11": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e14f5611fa9242af879512207669394f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e1d753092ae3420ead7a3086b9405f2a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_e9add15a402448ee8f55d0a65f2d460c", + "style": "IPY_MODEL_ddaf2150308c4af2876f9f423d0b803d", + "value": "Pendulum-v0" + } + }, + "e1f03c622ff64b3bb4e59fc54e7898a6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_b5bcfb7873f44eba8f8f90e018f09b6a", + "style": "IPY_MODEL_c3233dc4967548279ff54f73e91e27a0", + "value": "0.0" + } + }, + "e1f175e02edf40f39585c485ec11cbff": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "height": "250px", + "width": "350px" + } + }, + "e210fdbc53d246a2ae55da6a3689745b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "e224793bc1524f0c91ce3d7ef0e98f8e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_4d8d22e583c64179817ad9c514bd4490", + "style": "IPY_MODEL_f91418c725364297a60aa4983253ae07", + "value": "0.0002" + } + }, + "e255dc6e7af7487e8a2729f670bffd8a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget008" + } + }, + "e27f2db74f874171acd272cf848ddc80": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget011" + } + }, + "e2d50772ac80494ea293f047efb33527": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_a592a51f7f3d40cf81de06ff0c9e1546", + "style": "IPY_MODEL_d5a3129aed5d47718c478523d35359ad", + "value": "0.0002" + } + }, + "e2ecea0189544c41a0ca172743cf16a1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_1adbcde168d04bcdaed1c410feae74ac", + "step": null, + "style": "IPY_MODEL_4e6414fcd34b454e94c982f7233402a7", + "value": 100 + } + }, + "e318e3ad8e11430d840261e7eb1b540e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "SliderStyleModel", + "state": { + "description_width": "" + } + }, + "e35bce23c28f4af3b0d4dce2266ed2e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e3adb676dd9b48a6bd4e895ac644b653": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e41fe8ee1bf04764abe02428057a540a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e4665eee9731436a839eaebea246f048": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_0d95601931d94f8cac55349f5886038a", + "style": "IPY_MODEL_ee84c4f73d284618aa3241fcb758da9f", + "value": "Box(1,)" + } + }, + "e467ed3285684035a013df63ebb6b422": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e527873f8829445dbdb49e0710132c63": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "e53d3b32848c4872a5e1254a2ed080f1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "e57f860aafca4775a03574208f4944b7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_fd1693effce0420c8f4bbbebde0ef7c3", + "IPY_MODEL_4bbe95c5e6b34795a2058cc7bf7416f9", + "IPY_MODEL_9ee876553e424052a509a2daed8da1c6", + "IPY_MODEL_07b040199f664673b2cb1b45c5a5af34" + ], + "layout": "IPY_MODEL_41425cf814dc44c49ac901aeec4c668f" + } + }, + "e62a214128d34799be2e1cc2cdb98b8c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e6958eae462d43d8bdb9c6227deddcc7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "e6c798aa900740009741c67dfccb0d92": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_df228d4f3b644bb081011555c9f36485", + "style": "IPY_MODEL_63d55c74d6ed493abe58361958b23046", + "value": "0.999" + } + }, + "e8260cb1f55049a49bdaf024528d43c4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget001" + } + }, + "e835260b70924edd959ac38cbdaa50d3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget013" + } + }, + "e8b87d816ccb409083b0c522ef0bd9dd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget004" + } + }, + "e904337542fd4e5d8187b9b9190b7522": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatTextModel", + "state": { + "description": "Manual input:", + "layout": "IPY_MODEL_b50b99192c944a348df722c9f5cdaa90", + "step": null, + "style": "IPY_MODEL_831ed45407f74193acc07dacada162a9", + "value": 50 + } + }, + "e923a0f829b14a6b83f8ef159b7e1e67": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_e27f2db74f874171acd272cf848ddc80", + "style": "IPY_MODEL_b3a43d5f73df48299fdf24a855c623a7", + "value": "decay" + } + }, + "e944a76d793541058cf5f32563847fb3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "e9794b57be6c4c0e981a017d3fa82a36": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "e9add15a402448ee8f55d0a65f2d460c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "e9d6d91ceda64a63b9fe358e90337820": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_fe785154b75c4badbab0d946f05802cf", + "style": "IPY_MODEL_78f5897896d144fe839fafd65e76816e", + "value": "Environment Information" + } + }, + "eb54eb7b3c674e67b10610ce2aaf309a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_547d2113aae04e20ba41d30deb33ec5f", + "style": "IPY_MODEL_7b48f1fae96e40519787018ed628b99b", + "value": "1e-07" + } + }, + "eb5620a9d421450a9c0b629c52d3d8ba": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "eb5fdb48aa1d483fa9acf05a229ef307": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "ebff747fea3f4cf2abb9efcd9f998ddb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "ec1d469669a2411f9a5a7a1774480576": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_4749f46df2c4438e874ed6912a4d7ef1", + "style": "IPY_MODEL_7cc3bf6293494425b70569d1eca3af03", + "value": "beta_2" + } + }, + "ec6b04eac2cd4e5a821244a954846a39": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "ecc6da99cf7944f5a5a6cfd1f0516aa6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "ed746bfae28741e9ae1d450dd1394423": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "center" + } + }, + "ee84c4f73d284618aa3241fcb758da9f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "eef437964b4e4fa29ea42afc6b9a69ce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_8f01f6cb90754bcb8b2e64809505291d", + "style": "IPY_MODEL_ce777268358f48608666122680449e3c", + "value": "Box(1,)" + } + }, + "ef95b43fb5cd436cb6f737f2defc8e38": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_747e88ebfefc4efb95f60f63e725dcc1", + "style": "IPY_MODEL_078c44ca72d24661bbeb9921196ddace", + "value": "The action space is continuous." + } + }, + "f10d3787733a4ece9120c3641017114b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_26036b1a064245a6a1cef60ec7d39376", + "IPY_MODEL_af4e53453b1a434e9426fd63d61888c5" + ], + "layout": "IPY_MODEL_70c300868924433094e74b74d260a4a2" + } + }, + "f1888922c93c435f8bac11033ae325e9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_455c6fed537d48b188edef0200ab0fb1", + "IPY_MODEL_2e65a763e5db40ca8969c36950c0d9bd", + "IPY_MODEL_8c27b4b759354d64b25bcb3462c444ef" + ], + "layout": "IPY_MODEL_74d03d1491d4451d879384ab357f33a9" + } + }, + "f1985e262a7d401ea97c903091713789": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "f2612900bd944258af3be77cacc7a46b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "f29a7f4ff2a74bbf8d6485cbfb086152": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_bf3a856d0c5f4d47abf596f528a2d947", + "style": "IPY_MODEL_10685777c5384041b62b4ce3aa26bf6e", + "value": "Environment Selector" + } + }, + "f29ba87ee02f4fc38760b98a32e20581": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "f2db93e6094b47d0bfce3821b33d707a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "f2ffc80dd5074916b1a69e9de91149f9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget001" + } + }, + "f3645a595f8c4e1f82d71ed6f97e7dd6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_content": "center", + "align_items": "center", + "border": "dotted" + } + }, + "f401d82a291f4cdb9d44cf62f1c48978": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_86e357397076415ba3ac239b26a8bc8f", + "style": "IPY_MODEL_faea715cb8894b8ca444f80d17c07e12", + "value": "False" + } + }, + "f48e72d8d0b5470798d5faeed3dc8e40": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "f4d0297192f5464bac7ab02b3dabed2c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "f5879b9ebaab4df9b53830cef8c25e62": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_afeba836a14d4fb6a7c5407794848b80", + "style": "IPY_MODEL_9fc5c513843a4c0fa7ae9c8b37c3b4ff", + "value": "./model/AC-Pendulum-v0" + } + }, + "f5c5c8e022aa4f239006a40e2ac8b990": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_e527873f8829445dbdb49e0710132c63", + "max": 1.8, + "readout_format": ".1f", + "step": 0.1, + "style": "IPY_MODEL_2b0d8567d4aa4e53a5837284b315cc58", + "value": 0.9 + } + }, + "f63f7fca433e4d32ad6252416895155b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "f6f23b9ba55946d0aa626d62ba4bbdf5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "f72ef10c1acd44608d2db2b932f2b167": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "f74c2a3b52114bbc80056d7097731209": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatSliderModel", + "state": { + "continuous_update": false, + "description": "Slider input:", + "layout": "IPY_MODEL_80d9bf94c37c49708820ccb5a2aa8f8b", + "max": 200, + "readout_format": ".0f", + "style": "IPY_MODEL_731d299fb9dd45c1a41a5d4df4f41f94", + "value": 100 + } + }, + "f77e6fff86704faea6c01e0262104c70": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_0a575cd57803474a9574922e07d3d316", + "IPY_MODEL_8d025735275c4dfdbbbf2d491e727c08" + ], + "layout": "IPY_MODEL_5b759ba6fc8f451c97ee15467069a6ed" + } + }, + "f80bd1f80d99494595e88c9fc5f055d2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_9dd1d4acaad44f16b1bbf0693ee9fad5", + "IPY_MODEL_1cb88e139a0642afb2f3c958dff539aa", + "IPY_MODEL_2e6e71650a6a48878fce055c8e563538", + "IPY_MODEL_fe6a7094bdd649e6b5270a701e12253a" + ], + "layout": "IPY_MODEL_bfa16a837ebd4ec795d5aa0a893d5298" + } + }, + "f834d6547a954a478d9e755653e4f5a1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "f8a20f2f4b8b4c03857bcd85bf96b136": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "f8eb99b0291b45dda1b391805141e984": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_5afcc13ec3d94e6299bd06fb87ed7885", + "style": "IPY_MODEL_d4c91e304ca34f88a4c959ecc4683678", + "value": "beta_1" + } + }, + "f91418c725364297a60aa4983253ae07": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "f9a9a8529629435f926e28c9e2ff6d21": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "f9b983bef3a14087b6d1f966b8b041ed": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "f9cd83ba01bb440b9510e0ada3cfd4aa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "fa3877a284354fd08f33d320314b6765": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_57f97e2ebec542f8b297365916bf571e", + "style": "IPY_MODEL_454021a337164bae8a96f5a5a7749b78", + "value": "decay" + } + }, + "faea715cb8894b8ca444f80d17c07e12": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "fb06877af7ae451baefc12dfd27d9348": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "border": "dotted", + "grid_template_areas": "\"widget001 widget002\"\n\"widget003 widget004\"\n\"widget005 widget006\"\n\"widget007 widget008\"\n\"widget009 widget010\"\n\"widget011 widget012\"\n\"widget013 widget014\"", + "grid_template_columns": "repeat(2, 1fr)", + "grid_template_rows": "repeat(7, 1fr)" + } + }, + "fb19638e8a38465f844aaf06c6378b29": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "fbd450c8b01f4ab9ae7ea1caa129bd66": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_content": "center", + "align_items": "center", + "border": "dotted" + } + }, + "fc20a5f1e967425c840960c1948f00c8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget011" + } + }, + "fc69d16aa7e547b09859e2ca7dbfbde8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_97f58376ed524fab85dde1ea5f67ee17", + "IPY_MODEL_0dc03ae5db46484a85272ce1899e53c0" + ], + "layout": "IPY_MODEL_81f34a95028440608c8a5a307cd7ee9b" + } + }, + "fc6a2f4827034d64b99a15547f3d9f43": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_cf3de6c59d124068af4aef37293c26e2", + "style": "IPY_MODEL_1222c8a942134f83aa262d9b321ee413", + "value": "render" + } + }, + "fc83fd9df36b4c0fa6ee544fe520cde7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_area": "widget007" + } + }, + "fca1d8802f264b48aa3f7bef2b5f5b81": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_c096b60cb96b4aa68be8728e6feb2366", + "style": "IPY_MODEL_7532b84aea3a4f4290efa4b0369e846a", + "value": "Algorithm Parameters" + } + }, + "fca98009fe56433b97f1fd16969f9a35": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_43f9446733e242f1977bbe394ddc479b", + "style": "IPY_MODEL_660e8c250f974ff685128c61b3d57fe3", + "value": "Environment settings" + } + }, + "fd1693effce0420c8f4bbbebde0ef7c3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_4fa0861e758940d9b9c2775304ebb140", + "style": "IPY_MODEL_661fd55473c0431aa9dffd6876d1d559", + "value": "Input(shape=(None, 3), name='input_layer')" + } + }, + "fe547223f16e423fa8493d4c6ae577ba": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "children": [ + "IPY_MODEL_f77e6fff86704faea6c01e0262104c70", + "IPY_MODEL_9e37b046f2d841dd9572b2284a729bf5" + ], + "layout": "IPY_MODEL_48a97cf1c4a44a858c3376f962060321" + } + }, + "fe6a7094bdd649e6b5270a701e12253a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "LabelModel", + "state": { + "layout": "IPY_MODEL_a860d9c958c646aa89ae598dc67eaa08", + "style": "IPY_MODEL_85514e8a938240e7b2df7c2a8ad6b6e8", + "value": "Dense(n_units=1, No Activation, in_channels='64', name='dense_1')" + } + }, + "fe785154b75c4badbab0d946f05802cf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": {} + }, + "ff06931e66b544389c8f409734b472e3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "grid_template_areas": "\n \"net_label net_info\"\n \"opt_label opt_info\"\n " + } + }, + "ff0e9f4940eb4b57bd99d96059b5e194": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "ffce2434eb114cd1a7f6961dd71ff755": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "align_items": "center" + } + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/rlzoo/run_rlzoo.py b/rlzoo/run_rlzoo.py old mode 100644 new mode 100755 index 4ad2581..3618434 --- a/rlzoo/run_rlzoo.py +++ b/rlzoo/run_rlzoo.py @@ -1,60 +1,60 @@ -from rlzoo.common.env_wrappers import * -from rlzoo.common.utils import * -from rlzoo.algorithms import * - -# EnvName = 'PongNoFrameskip-v4' -# EnvType = 'atari' - -# EnvName = 'CartPole-v0' -EnvName = 'Pendulum-v0' -EnvType = 'classic_control' - -# EnvName = 'BipedalWalker-v2' -# EnvType = 'box2d' - -# EnvName = 'Ant-v2' -# EnvType = 'mujoco' - -# EnvName = 'FetchPush-v1' -# EnvType = 'robotics' - -# EnvName = 'FishSwim-v0' -# EnvType = 'dm_control' - -# EnvName = 'ReachTarget' -# EnvType = 'rlbench' -# env = build_env(EnvName, EnvType, state_type='vision') - -AlgName = 'SAC' -env = build_env(EnvName, EnvType) -alg_params, learn_params = call_default_params(env, EnvType, AlgName) -alg = eval(AlgName+'(**alg_params)') -alg.learn(env=env, mode='train', render=False, **learn_params) -alg.learn(env=env, mode='test', render=True, **learn_params) - -# AlgName = 'DPPO' -# number_workers = 2 # need to specify number of parallel workers in parallel algorithms like A3C and DPPO -# env = build_env(EnvName, EnvType, nenv=number_workers) -# alg_params, learn_params = call_default_params(env, EnvType, AlgName) -# alg_params['method'] = 'clip' # specify 'clip' or 'penalty' method for different version of PPO and DPPO -# alg = eval(AlgName+'(**alg_params)') -# alg.learn(env=env, mode='train', render=False, **learn_params) -# alg.learn(env=env, mode='test', render=True, **learn_params) - -# AlgName = 'PPO' -# env = build_env(EnvName, EnvType) -# alg_params, learn_params = call_default_params(env, EnvType, AlgName) -# alg_params['method'] = 'clip' # specify 'clip' or 'penalty' method for different version of PPO and DPPO -# alg = eval(AlgName+'(**alg_params)') -# alg.learn(env=env, mode='train', render=False, **learn_params) -# alg.learn(env=env, mode='test', render=True, **learn_params) - -# AlgName = 'A3C' -# number_workers = 2 # need to specify number of parallel workers -# env = build_env(EnvName, EnvType, nenv=number_workers) -# alg_params, learn_params = call_default_params(env, EnvType, 'A3C') -# alg = eval(AlgName+'(**alg_params)') -# alg.learn(env=env, mode='train', render=False, **learn_params) -# alg.learn(env=env, mode='test', render=True, **learn_params) - -env.close() +from rlzoo.common.env_wrappers import * +from rlzoo.common.utils import * +from rlzoo.algorithms import * + +# EnvName = 'PongNoFrameskip-v4' +# EnvType = 'atari' + +# EnvName = 'CartPole-v0' +EnvName = 'Pendulum-v0' +EnvType = 'classic_control' + +# EnvName = 'BipedalWalker-v2' +# EnvType = 'box2d' + +# EnvName = 'Ant-v2' +# EnvType = 'mujoco' + +# EnvName = 'FetchPush-v1' +# EnvType = 'robotics' + +# EnvName = 'FishSwim-v0' +# EnvType = 'dm_control' + +# EnvName = 'ReachTarget' +# EnvType = 'rlbench' +# env = build_env(EnvName, EnvType, state_type='vision') + +AlgName = 'SAC' +env = build_env(EnvName, EnvType) +alg_params, learn_params = call_default_params(env, EnvType, AlgName) +alg = eval(AlgName+'(**alg_params)') +alg.learn(env=env, mode='train', render=False, **learn_params) +alg.learn(env=env, mode='test', render=True, **learn_params) + +# AlgName = 'DPPO' +# number_workers = 2 # need to specify number of parallel workers in parallel algorithms like A3C and DPPO +# env = build_env(EnvName, EnvType, nenv=number_workers) +# alg_params, learn_params = call_default_params(env, EnvType, AlgName) +# alg_params['method'] = 'clip' # specify 'clip' or 'penalty' method for different version of PPO and DPPO +# alg = eval(AlgName+'(**alg_params)') +# alg.learn(env=env, mode='train', render=False, **learn_params) +# alg.learn(env=env, mode='test', render=True, **learn_params) + +# AlgName = 'PPO' +# env = build_env(EnvName, EnvType) +# alg_params, learn_params = call_default_params(env, EnvType, AlgName) +# alg_params['method'] = 'clip' # specify 'clip' or 'penalty' method for different version of PPO and DPPO +# alg = eval(AlgName+'(**alg_params)') +# alg.learn(env=env, mode='train', render=False, **learn_params) +# alg.learn(env=env, mode='test', render=True, **learn_params) + +# AlgName = 'A3C' +# number_workers = 2 # need to specify number of parallel workers +# env = build_env(EnvName, EnvType, nenv=number_workers) +# alg_params, learn_params = call_default_params(env, EnvType, 'A3C') +# alg = eval(AlgName+'(**alg_params)') +# alg.learn(env=env, mode='train', render=False, **learn_params) +# alg.learn(env=env, mode='test', render=True, **learn_params) + +env.close() From 53ca0fdf6b64690d1bbb83e0aeadb87537eb4b70 Mon Sep 17 00:00:00 2001 From: quantumiracle <1402434478@qq.com> Date: Sat, 26 Jun 2021 12:55:34 +0800 Subject: [PATCH 06/11] add comment --- rlzoo/distributed/start_dis_role.py | 4 ++-- rlzoo/distributed/training_components.py | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/rlzoo/distributed/start_dis_role.py b/rlzoo/distributed/start_dis_role.py index 22c03bb..e24cee2 100755 --- a/rlzoo/distributed/start_dis_role.py +++ b/rlzoo/distributed/start_dis_role.py @@ -183,7 +183,7 @@ def run_server(agent, args, training_conf, env_conf, agent_conf): # save the final model rl_agent.save_ckpt(env_name) - print('server finished') + print('Server Finished.') def main(training_conf, env_conf, agent_conf): @@ -201,6 +201,6 @@ def main(training_conf, env_conf, agent_conf): elif agent.role() == Role.Server: run_server(agent, args, training_conf, env_conf, agent_conf) else: - raise RuntimeError('invalid role') + raise RuntimeError('Invalid Role.') agent.barrier() diff --git a/rlzoo/distributed/training_components.py b/rlzoo/distributed/training_components.py index 3b5c447..a975245 100755 --- a/rlzoo/distributed/training_components.py +++ b/rlzoo/distributed/training_components.py @@ -4,15 +4,15 @@ from rlzoo.algorithms.dppo_clip_distributed.dppo_clip import DPPO_CLIP from functools import partial -# constants +# Specify the training configurations training_conf = { - 'total_step': int(1e7), - 'traj_len': 200, - 'train_n_traj': 2, - 'save_interval': 10, + 'total_step': int(1e7), # overall training timesteps + 'traj_len': 200, # length of the rollout trajectory + 'train_n_traj': 2, # update the models after every certain number of trajectories for each learner + 'save_interval': 10, # saving the models after every certain number of updates } -# launch env settings +# Specify the environment and launch it env_name, env_type = 'CartPole-v0', 'classic_control' env_maker = partial(build_env, env_name, env_type) temp_env = env_maker() @@ -41,6 +41,7 @@ def build_network(observation_space, action_space, name='DPPO_CLIP'): def build_opt(actor_lr=1e-4, critic_lr=2e-4): + """ choose the optimizer for learning """ import tensorflow as tf return [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] @@ -57,6 +58,6 @@ def build_opt(actor_lr=1e-4, critic_lr=2e-4): from rlzoo.distributed.start_dis_role import main -print('start training') +print('Start Training.') main(training_conf, env_conf, agent_conf) -print('finished') +print('Training Finished.') From 8f733807c4fb394d2b69555da96641fd882a9db8 Mon Sep 17 00:00:00 2001 From: Tianyang Yu <34995488+Tokarev-TT-33@users.noreply.github.com> Date: Sat, 26 Jun 2021 13:05:39 +0800 Subject: [PATCH 07/11] Update start_dis_role.py --- rlzoo/distributed/start_dis_role.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rlzoo/distributed/start_dis_role.py b/rlzoo/distributed/start_dis_role.py index e24cee2..7716c96 100755 --- a/rlzoo/distributed/start_dis_role.py +++ b/rlzoo/distributed/start_dis_role.py @@ -111,7 +111,7 @@ def run_server(agent, args, training_conf, env_conf, agent_conf): curr_reward_list = [] tmp_eps_reward = 0 while curr_step < total_step: - tmp_eps_reward = 0 # todo env with no end + # tmp_eps_reward = 0 # todo env with no end for _ in range(traj_len): curr_step += agent.role_size(Role.Actor) @@ -145,15 +145,15 @@ def run_server(agent, args, training_conf, env_conf, agent_conf): train_cnt += 1 # todo env with end - # avg_eps_reward = None - # if curr_reward_list: - # avg_eps_reward = np.mean(curr_reward_list) - # curr_reward_list.clear() - # total_reward_list.append(avg_eps_reward) + avg_eps_reward = None + if curr_reward_list: + avg_eps_reward = np.mean(curr_reward_list) + curr_reward_list.clear() + total_reward_list.append(avg_eps_reward) # todo env with no end - avg_eps_reward = tmp_eps_reward - total_reward_list.append(np.array(avg_eps_reward)) + # avg_eps_reward = tmp_eps_reward + # total_reward_list.append(np.array(avg_eps_reward)) print('Training iters: {}, steps so far: {}, average eps reward: {}'.format( train_cnt, curr_step, np.array(avg_eps_reward))) From e3ec8eaf084ea095ed0e4526350f38a3358cfa5e Mon Sep 17 00:00:00 2001 From: Zihan Ding <1402434478@qq.com> Date: Sat, 26 Jun 2021 13:59:05 +0800 Subject: [PATCH 08/11] Update README.md --- README.md | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/README.md b/README.md index 74930ed..042341e 100644 --- a/README.md +++ b/README.md @@ -279,6 +279,148 @@ python algorithms/ac/run_ac.py We also provide an interactive learning configuration with Jupyter Notebook and *ipywidgets*, where you can select the algorithm, environment, and general learning settings with simple clicking on dropdown lists and sliders! A video demonstrating the usage is as following. The interactive mode can be used with [`rlzoo/interactive/main.ipynb`](https://github.com/tensorlayer/RLzoo/blob/master/rlzoo/interactive/main.ipynb) by running `$ jupyter notebook` to open it. ![Interactive Video](https://github.com/tensorlayer/RLzoo/blob/master/gif/interactive.gif) + + +### Distributed Training +RLzoo supports distributed training frameworks across multiple computational nodes with multiple CPUs/GPUs, using the [Kungfu](https://github.com/lsds/KungFu) package. The installation of Kungfu requires to install *CMake* and *Golang* first, details see the [website of Kungfu](https://github.com/lsds/KungFu). +An example for distributed training is contained in folder `rlzoo/distributed`, by running the following command, you will launch the distributed training process: +```bash +rlzoo/distributed/run_dis_train.sh +``` +
Code in Bash script [click to expand] +
+ +```bash +#!/bin/sh +set -e + +cd $(dirname $0) + +kungfu_flags() { + echo -q + echo -logdir logs + + local ip1=127.0.0.1 + local np1=$np + + local ip2=127.0.0.10 + local np2=$np + local H=$ip1:$np1,$ip2:$np2 + local m=cpu,gpu + + echo -H $ip1:$np1 +} + +prun() { + local np=$1 + shift + kungfu-run $(kungfu_flags) -np $np $@ +} + +n_learner=2 +n_actor=2 +n_server=1 + +flags() { + echo -l $n_learner + echo -a $n_actor + echo -s $n_server +} + +rl_run() { + local n=$((n_learner + n_actor + n_server)) + prun $n python3 training_components.py $(flags) +} + +main() { + rl_run +} + +main +``` +The script specifies the ip addresses for different computational nodes, as well as the number of policy learners (updating the models), actors (sampling through interaction with environments) and inference servers (policy forward inference during sampling process) as `n_learner`, `n_actor` and `n_server` respectively. + +
+
+ +Other training details are specified in an individual Python script named `training_components.py` **within the same directory** as `run_dis_train.sh`, which can be seen as following. + +
Code in Python script [click to expand] +
+ +```python +from rlzoo.common.env_wrappers import build_env +from rlzoo.common.policy_networks import * +from rlzoo.common.value_networks import * +from rlzoo.algorithms.dppo_clip_distributed.dppo_clip import DPPO_CLIP +from functools import partial + +# Specify the training configurations +training_conf = { + 'total_step': int(1e7), # overall training timesteps + 'traj_len': 200, # length of the rollout trajectory + 'train_n_traj': 2, # update the models after every certain number of trajectories for each learner + 'save_interval': 10, # saving the models after every certain number of updates +} + +# Specify the environment and launch it +env_name, env_type = 'CartPole-v0', 'classic_control' +env_maker = partial(build_env, env_name, env_type) +temp_env = env_maker() +obs_shape, act_shape = temp_env.observation_space.shape, temp_env.action_space.shape + +env_conf = { + 'env_name': env_name, + 'env_type': env_type, + 'env_maker': env_maker, + 'obs_shape': obs_shape, + 'act_shape': act_shape, +} + + +def build_network(observation_space, action_space, name='DPPO_CLIP'): + """ build networks for the algorithm """ + hidden_dim = 256 + num_hidden_layer = 2 + critic = ValueNetwork(observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') + + actor = StochasticPolicyNetwork(observation_space, action_space, + [hidden_dim] * num_hidden_layer, + trainable=True, + name=name + '_policy') + return critic, actor + + +def build_opt(actor_lr=1e-4, critic_lr=2e-4): + """ choose the optimizer for learning """ + import tensorflow as tf + return [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] + + +net_builder = partial(build_network, temp_env.observation_space, temp_env.action_space) +opt_builder = partial(build_opt, ) + +agent_conf = { + 'net_builder': net_builder, + 'opt_builder': opt_builder, + 'agent_generator': partial(DPPO_CLIP, net_builder, opt_builder), +} +del temp_env + +from rlzoo.distributed.start_dis_role import main + +print('Start Training.') +main(training_conf, env_conf, agent_conf) +print('Training Finished.') + +``` +Users can specify the environment, network architectures, optimizers and other training detains in this script. + +
+
+ +Note: if RLzoo is installed, you can create the two scripts `run_dis_train.sh` and `training_components.py` in whatever directory to launch distributed training, as long as the two scripts are in the same directory. + ## Contents @@ -399,8 +541,12 @@ Our core contributors include: [Tianyang Yu](https://github.com/Tokarev-TT-33), [Yanhua Huang](https://github.com/Officium), [Hongming Zhang](https://github.com/initial-h), +[Guo Li](https://github.com/lgarithm), +Quancheng Guo, +[Luo Mai](https://github.com/luomai), [Hao Dong](https://github.com/zsdonghao) + ## Citing ``` From ee5712ab9b5eb1c7e2f0ef50de81095a7ebc3dbc Mon Sep 17 00:00:00 2001 From: Zihan Ding <1402434478@qq.com> Date: Sat, 26 Jun 2021 14:02:15 +0800 Subject: [PATCH 09/11] Update README.md --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 042341e..ea271e1 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Please check our [**Online Documentation**](https://rlzoo.readthedocs.io) for de - [Contents](#contents) - [Algorithms](#algorithms) - [Environments](#environments) - - [Configurations](#configuration) + - [Configurations](#configurations) - [Properties](#properties) - [Troubleshooting](#troubleshooting) - [Credits](#credits) @@ -66,8 +66,13 @@ the coming months after initial release. We will keep improving the potential pr
Version History [click to expand]
+* 1.0.4 (Current version) -* 1.0.3 (Current version) + Changes: + + * Add distributed training for DPPO algorithm, using Kungfu + +* 1.0.3 Changes: From 893f88a55b0ed8d7b3152930907e199d1dcdd8fa Mon Sep 17 00:00:00 2001 From: Zihan Ding <1402434478@qq.com> Date: Sat, 26 Jun 2021 14:04:28 +0800 Subject: [PATCH 10/11] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ea271e1..84f57bb 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ the coming months after initial release. We will keep improving the potential pr
Version History [click to expand]
+ * 1.0.4 (Current version) Changes: From dae4afc2377e6777742aac1d990fe94436425bae Mon Sep 17 00:00:00 2001 From: Zihan Ding <1402434478@qq.com> Date: Sat, 26 Jun 2021 14:05:10 +0800 Subject: [PATCH 11/11] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e69c424..36aa0d4 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name = "rlzoo", - version = "1.0.3", + version = "1.0.4", include_package_data=True, author='Zihan Ding, Tianyang Yu, Yanhua Huang, Hongming Zhang, Hao Dong', author_email='zhding@mail.ustc.edu.cn',