From 52df871dab046efb7a1083b3012b87c00a08b8dd Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Wed, 1 Mar 2023 10:00:28 +0000 Subject: [PATCH 01/15] Documentation web shields fixed --- docs/source/_templates/shields.html | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/_templates/shields.html b/docs/source/_templates/shields.html index a7fca64d49..6ab8b5e2dd 100644 --- a/docs/source/_templates/shields.html +++ b/docs/source/_templates/shields.html @@ -1,4 +1,3 @@ -

Github latest release From 836bca01cf329935ebc87684514981a385a9b94e Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Thu, 2 Mar 2023 14:46:27 +0000 Subject: [PATCH 02/15] Added a json example for DRL_battery.py --- scripts/DRL_battery_example.json | 69 ++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 scripts/DRL_battery_example.json diff --git a/scripts/DRL_battery_example.json b/scripts/DRL_battery_example.json new file mode 100644 index 0000000000..2126501fe9 --- /dev/null +++ b/scripts/DRL_battery_example.json @@ -0,0 +1,69 @@ +{ + "id": "PRUEBAALEX", + "environment": "Eplus-5Zone-hot-continuous-stochastic-v1", + "algorithm": { + "name": "SB3-PPO", + "log_interval": 100, + "parameters": { + "policy": "MlpPolicy", + "verbose": 1, + "learning_rate": 0.0003, + "n_steps": 2048, + "batch_size": 64, + "n_epochs": 10, + "gamma": 0.99, + "gae_lambda": 0.95, + "clip_range": 0.2, + "ent_coef": 0, + "vf_coef": 0.5, + "max_grad_norm": 0.5 + } + }, + "episodes": 5, + "seed": 3, + "model": null, + "reward": { + "class": "LinearReward", + "parameters": { + "temperature_variable": [ + "Zone Air Temperature(SPACE1-1)", + "Zone Air Temperature(SPACE1-2)" + ], + "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)", + "range_comfort_winter": [ + 20.0, + 23.5 + ], + "range_comfort_summer": [ + 23.0, + 26.0 + ] + } + }, + "wrappers": [ + { + "class": "NormalizeObservation", + "parameters": { + "ranges": "sinergym.utils.constants.RANGES_5ZONE" + } + }, + { + "class": "LoggerWrapper", + "parameters": { + "logger_class": "sinergym.utils.logger.CSVLogger", + "flag": true + } + } + ], + "evaluation": { + "eval_freq": 2, + "eval_length": 1 + }, + "tensorboard": "./tensorboard_log/", + "cloud": { + "remote_store": "bucket_example", + "mlflow_store": true, + "group_name": "example_group", + "autodelete": true + } +} \ No newline at end of file From d3de2e77a54287cdd7d83e42e9bf67b1927b7939 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Thu, 2 Mar 2023 14:48:03 +0000 Subject: [PATCH 03/15] First approximation to adap DRL_battery.py to a json configuration --- scripts/DRL_battery.py | 528 +++++++++++++---------------------------- 1 file changed, 163 insertions(+), 365 deletions(-) diff --git a/scripts/DRL_battery.py b/scripts/DRL_battery.py index da02908537..82c20bcb79 100644 --- a/scripts/DRL_battery.py +++ b/scripts/DRL_battery.py @@ -1,21 +1,23 @@ import argparse +import sys import os +import json from datetime import datetime import gymnasium as gym import mlflow import numpy as np import tensorboard -from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3 -from stable_baselines3.common.callbacks import CallbackList -from stable_baselines3.common.logger import configure -from stable_baselines3.common.noise import NormalActionNoise -from stable_baselines3.common.vec_env import DummyVecEnv +# from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3 +# from stable_baselines3.common.callbacks import CallbackList +# from stable_baselines3.common.logger import configure +# from stable_baselines3.common.noise import NormalActionNoise +# from stable_baselines3.common.vec_env import DummyVecEnv import sinergym import sinergym.utils.gcloud as gcloud -from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback -from sinergym.utils.constants import RANGES_5ZONE, RANGES_DATACENTER, RANGES_IW +# from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback +from sinergym.utils.constants import * from sinergym.utils.rewards import * from sinergym.utils.wrappers import (LoggerWrapper, MultiObsWrapper, NormalizeObservation) @@ -24,176 +26,34 @@ # Parameters definition # # ---------------------------------------------------------------------------- # parser = argparse.ArgumentParser() -# commons arguments for battery parser.add_argument( - '--environment', - '-env', + '--configuration', + '-conf', required=True, type=str, - dest='environment', - help='Environment name of simulation (see sinergym/__init__.py).') -parser.add_argument( - '--model', - '-mod', - type=str, - default=None, - dest='model', - help='Path where model is stored, only when you want a model to continue training it.') -parser.add_argument( - '--episodes', - '-ep', - type=int, - default=1, - dest='episodes', - help='Number of episodes for training.') -parser.add_argument( - '--algorithm', - '-alg', - type=str, - default='PPO', - dest='algorithm', - help='Algorithm used to train (possible values: PPO, A2C, DQN, DDPG, SAC, TD3).') -parser.add_argument( - '--reward', - '-rw', - type=str, - default='linear', - dest='reward', - help='Reward function used by model, by default is linear (possible values: linear, exponential).') -parser.add_argument( - '--energy_weight', - '-rew', - type=float, - dest='energy_weight', - help='Reward energy weight with compatible rewards types.') -parser.add_argument( - '--normalization', - '-norm', - action='store_true', - dest='normalization', - help='Apply normalization to observations if this flag is specified.') -parser.add_argument( - '--multiobs', - '-mobs', - action='store_true', - dest='multiobs', - help='Apply Multi observations if this flag is specified.') -parser.add_argument( - '--logger', - '-log', - action='store_true', - dest='logger', - help='Apply Sinergym CSVLogger class if this flag is specified.') -parser.add_argument( - '--tensorboard', - '-tens', - type=str, - default=None, - dest='tensorboard', - help='Tensorboard path for logging (if not specified, tensorboard log will not be stored).') -parser.add_argument( - '--evaluation', - '-eval', - action='store_true', - dest='evaluation', - help='Evaluation is processed during training with this flag (save best model online).') -parser.add_argument( - '--eval_freq', - '-evalf', - type=int, - default=2, - dest='eval_freq', - help='Episodes executed before applying evaluation (if evaluation flag is not specified, this value is useless).') -parser.add_argument( - '--eval_length', - '-evall', - type=int, - default=2, - dest='eval_length', - help='Episodes executed during evaluation (if evaluation flag is not specified, this value is useless).') -parser.add_argument( - '--log_interval', - '-inter', - type=int, - default=1, - dest='log_interval', - help='model training log_interval parameter. See documentation since this value is different in every algorithm.') -parser.add_argument( - '--seed', - '-sd', - type=int, - default=None, - dest='seed', - help='Seed used to algorithm training.') -parser.add_argument( - '--id', - '-id', - type=str, - default=None, - dest='id', - help='Custom experiment identifier.') -parser.add_argument( - '--remote_store', - '-sto', - action='store_true', - dest='remote_store', - help='Determine if sinergym output will be sent to a Google Cloud Storage Bucket.') -parser.add_argument( - '--bucket_name', - '-buc', - type=str, - default='experiments-storage', - dest='bucket_name', - help='Name of the bucket where experiments output will be stored if experiment is configured to that.') -parser.add_argument( - '--mlflow_store', - '-mlflow', - action='store_true', - dest='mlflow_store', - help='Determine if sinergym output will be sent to a mlflow artifact storage') -parser.add_argument( - '--group_name', - '-group', - type=str, - dest='group_name', - help='This field indicate instance group name') -parser.add_argument( - '--auto_delete', - '-del', - action='store_true', - dest='auto_delete', - help='If is a GCE instance and this flag is active, that instance will be removed from GCP.') - -parser.add_argument('--learning_rate', '-lr', type=float, default=.0003) -parser.add_argument('--n_steps', '-n', type=int, default=2048) -parser.add_argument('--batch_size', '-bs', type=int, default=64) -parser.add_argument('--n_epochs', '-ne', type=int, default=10) -parser.add_argument('--gamma', '-g', type=float, default=.99) -parser.add_argument('--gae_lambda', '-gl', type=float, default=.95) -parser.add_argument('--ent_coef', '-ec', type=float, default=0) -parser.add_argument('--vf_coef', '-v', type=float, default=.5) -parser.add_argument('--max_grad_norm', '-m', type=float, default=.5) -parser.add_argument('--buffer_size', '-bfs', type=int, default=1000000) -parser.add_argument('--learning_starts', '-ls', type=int, default=100) -parser.add_argument('--tau', '-tu', type=float, default=0.005) -parser.add_argument('--gradient_steps', '-gs', type=int, default=1) -parser.add_argument('--clip_range', '-cr', type=float, default=.2) -parser.add_argument('--sigma', '-sig', type=float, default=0.1) -parser.add_argument('--rms_prop_eps', '-rpe', type=float, default=1e-5) - + dest='configuration', + help='Path to experiment configuration (JSON file)' +) args = parser.parse_args() # ------------------------------------------------------------------------------# +# ---------------------------------------------------------------------------- # +# Read json parameters # +# ---------------------------------------------------------------------------- # + +with open(args.configuration) as json_conf: + conf = json.load(json_conf) + # ---------------------------------------------------------------------------- # # Register run name # # ---------------------------------------------------------------------------- # experiment_date = datetime.today().strftime('%Y-%m-%d_%H:%M') -name = args.algorithm + '-' + args.environment + \ - '-episodes-' + str(args.episodes) -if args.seed: - name += '-seed-' + str(args.seed) -if args.id: - name += '-id-' + str(args.id) +name = conf['algorithm']['name'] + '-' + conf['environment'] + \ + '-episodes-' + str(conf['episodes']) +if conf.get('seed'): + name += '-seed-' + str(conf['seed']) +if conf.get('id'): + name += '-id-' + str(conf['id']) name += '_' + experiment_date # ---------------------------------------------------------------------------- # @@ -210,92 +70,72 @@ # MLflow track with mlflow.start_run(run_name=name): # Log experiment params + # sinergym and python versions mlflow.log_param('sinergym-version', sinergym.__version__) + mlflow.log_param('python-version', sys.version) - mlflow.log_param('env', args.environment) - mlflow.log_param('episodes', args.episodes) - mlflow.log_param('algorithm', args.algorithm) - mlflow.log_param('reward', args.reward) - mlflow.log_param('normalization', bool(args.normalization)) - mlflow.log_param('multi-observations', bool(args.multiobs)) - mlflow.log_param('logger', bool(args.logger)) - mlflow.log_param('tensorboard', args.tensorboard) - mlflow.log_param('evaluation', bool(args.evaluation)) - mlflow.log_param('evaluation-frequency', args.eval_freq) - mlflow.log_param('evaluation-length', args.eval_length) - mlflow.log_param('log-interval', args.log_interval) - mlflow.log_param('seed', args.seed) - mlflow.log_param('remote-store', bool(args.remote_store)) + mlflow.log_param('environment', conf['environment']) + mlflow.log_param('episodes', conf['episodes']) + mlflow.log_param('algorithm', conf['algorithm']['name']) + mlflow.log_param('reward', conf['reward']['class']) + mlflow.log_param( + 'normalization', bool( + conf.get('wrappers').get('class') == 'NormalizeObservation')) + mlflow.log_param( + 'multi-observations', + bool( + conf.get('wrappers').get('class') == 'MultiObsWrapper')) + mlflow.log_param( + 'logger', bool( + conf.get('wrappers').get('class') == 'LoggerWrapper')) + mlflow.log_param('tensorboard', conf.get('tensorboard')) + mlflow.log_param('evaluation', bool(conf.get('evaluation'))) + mlflow.log_param('evaluation-frequency', + conf.get('evaluation').get('eval_freq')) + mlflow.log_param( + 'evaluation-length', + conf.get('evaluation').get('eval_length')) + mlflow.log_param('log-interval', conf['algorithm'].get('log_interval')) + mlflow.log_param('seed', conf.get('seed')) + mlflow.log_param( + 'remote-store', + bool( + conf.get('cloud').get('remote_store'))) - mlflow.log_param('learning-rate', args.learning_rate) - mlflow.log_param('n-steps', args.n_steps) - mlflow.log_param('batch-size', args.batch_size) - mlflow.log_param('n-epochs', args.n_epochs) - mlflow.log_param('gamma', args.gamma) - mlflow.log_param('gae-lambda', args.gae_lambda) - mlflow.log_param('ent-coef', args.ent_coef) - mlflow.log_param('vf-coef', args.vf_coef) - mlflow.log_param('max-grad-norm', args.max_grad_norm) - mlflow.log_param('buffer-size', args.buffer_size) - mlflow.log_param('learning-starts', args.learning_starts) - mlflow.log_param('tau', args.tau) - mlflow.log_param('gradient-steps', args.gradient_steps) - mlflow.log_param('clip-range', args.clip_range) - mlflow.log_param('sigma', args.sigma) - mlflow.log_param('rms_prop_eps', args.rms_prop_eps) - mlflow.log_param('id', args.id) + # algorithm params + mlflow.log_params(conf['algorithm'].get('parameters')) + # reward params + mlflow.log_params(conf['reward'].get('parameters')) # ---------------------------------------------------------------------------- # # Environment construction (with reward specified) # # ---------------------------------------------------------------------------- # - if args.reward == 'linear': - reward = LinearReward - elif args.reward == 'exponential': - reward = ExpReward - else: - raise RuntimeError( - 'Reward function [{}] specified is not registered.'.format( - args.reward)) + reward = eval(conf['reward']['class']) + reward_kwargs = conf['reward']['parameters'] - env = gym.make(args.environment, reward=reward) - if hasattr(env.reward_fn, 'W_energy') and args.energy_weight is not None: - env.reward_fn.W_energy = args.energy_weight + env = gym.make( + args.environment, + reward=reward, + reward_kwargs=reward_kwargs) # env for evaluation if is enabled eval_env = None - if args.evaluation: - eval_env = gym.make(args.environment, reward=reward) - if hasattr(eval_env.reward_fn, 'W_energy') and args.energy_weight: - eval_env.reward_fn.W_energy = args.energy_weight + if conf.get('evaluation'): + eval_env = gym.make( + args.environment, + reward=reward, + reward_kwargs=reward_kwargs) # ---------------------------------------------------------------------------- # # Wrappers # # ---------------------------------------------------------------------------- # - if args.normalization: - # dictionary ranges to use - norm_range = None - env_type = args.environment.split('-')[1] - if env_type == 'datacenter': - norm_range = RANGES_DATACENTER - elif env_type == '5Zone': - norm_range = RANGES_5ZONE - elif env_type == 'IWMullion': - norm_range = RANGES_IW - else: - raise NameError( - 'Normalization cant be use on environment :"{}", check environment name or disable normalization'.format( - args.environment)) - env = NormalizeObservation(env, ranges=norm_range) + if conf.get('wrappers'): + wrappers = conf['wrappers'] + for wrapper in wrappers: + wrapper_class = eval(wrapper['class']) + env = wrapper_class(env, **wrapper['parameters']) if eval_env is not None: - eval_env = NormalizeObservation(eval_env, ranges=norm_range) - if args.logger: - env = LoggerWrapper(env) - if eval_env is not None: - eval_env = LoggerWrapper(eval_env) - if args.multiobs: - env = MultiObsWrapper(env) - if eval_env is not None: - eval_env = MultiObsWrapper(eval_env) + eval_env = wrapper_class(eval_env, **wrapper['parameters']) # ---------------------------------------------------------------------------- # # Defining model (algorithm) # @@ -306,141 +146,96 @@ # --------------------------------------------------------# # DQN # # --------------------------------------------------------# - if args.algorithm == 'DQN': - model = DQN('MlpPolicy', env, verbose=1, - learning_rate=args.learning_rate, - buffer_size=args.buffer_size, - learning_starts=args.learning_starts, - batch_size=args.batch_size, - tau=args.tau, - gamma=args.gamma, - train_freq=4, - gradient_steps=args.gradient_steps, - target_update_interval=10000, - exploration_fraction=.1, - exploration_initial_eps=1.0, - exploration_final_eps=.05, - max_grad_norm=args.max_grad_norm, - seed=args.seed, - tensorboard_log=args.tensorboard) + if conf['algorithm']['name'] == 'SB3-DQN': + + model = DQN(env=env, + seed=conf.get('seed', None), + tensorboard_log=conf.get('tensorboard', None), + ** conf['algorithm']['parameters']) # --------------------------------------------------------# # DDPG # # --------------------------------------------------------# - elif args.algorithm == 'DDPG': - if args.sigma: - # noise objects for DDPG - n_actions = env.action_space.shape[-1] - action_noise = NormalActionNoise(mean=np.zeros( - n_actions), sigma=0.1 * np.ones(n_actions)) - - model = DDPG("MlpPolicy", - env, - action_noise=action_noise, - verbose=1, - seed=args.seed, - tensorboard_log=args.tensorboard) + elif conf['algorithm']['name'] == 'SB3-DDPG': + model = DDPG(env, + seed=conf.get('seed', None), + tensorboard_log=conf.get('tensorboard', None), + ** conf['algorithm']['parameters']) # --------------------------------------------------------# # A2C # # --------------------------------------------------------# - elif args.algorithm == 'A2C': - model = A2C('MlpPolicy', env, verbose=1, - learning_rate=args.learning_rate, - n_steps=args.n_steps, - gamma=args.gamma, - gae_lambda=args.gae_lambda, - ent_coef=args.ent_coef, - vf_coef=args.vf_coef, - max_grad_norm=args.max_grad_norm, - rms_prop_eps=args.rms_prop_eps, - seed=args.seed, - tensorboard_log=args.tensorboard) + elif conf['algorithm']['name'] == 'SB3-A2C': + model = A2C(env, + seed=conf.get('seed', None), + tensorboard_log=conf.get('tensorboard', None), + ** conf['algorithm']['parameters']) # --------------------------------------------------------# # PPO # # --------------------------------------------------------# - elif args.algorithm == 'PPO': - model = PPO('MlpPolicy', env, verbose=1, - learning_rate=args.learning_rate, - n_steps=args.n_steps, - batch_size=args.batch_size, - n_epochs=args.n_epochs, - gamma=args.gamma, - gae_lambda=args.gae_lambda, - clip_range=args.clip_range, - ent_coef=args.ent_coef, - vf_coef=args.vf_coef, - max_grad_norm=args.max_grad_norm, - seed=args.seed, - tensorboard_log=args.tensorboard) + elif conf['algorithm']['name'] == 'SB3-PPO': + model = PPO(env, + seed=conf.get('seed', None), + tensorboard_log=conf.get('tensorboard', None), + ** conf['algorithm']['parameters']) # --------------------------------------------------------# # SAC # # --------------------------------------------------------# - elif args.algorithm == 'SAC': - model = SAC(policy='MlpPolicy', - env=env, - seed=args.seed, - learning_rate=args.learning_rate, - buffer_size=args.buffer_size, - batch_size=args.batch_size, - tau=args.tau, - gamma=args.gamma, - tensorboard_log=args.tensorboard) + elif conf['algorithm']['name'] == 'SB3-SAC': + model = SAC(env, + seed=conf.get('seed', None), + tensorboard_log=conf.get('tensorboard', None), + ** conf['algorithm']['parameters']) # --------------------------------------------------------# # TD3 # # --------------------------------------------------------# - elif args.algorithm == 'TD3': - model = TD3(policy='MlpPolicy', - env=env, seed=args.seed, - tensorboard_log=args.tensorboard, - learning_rate=args.learning_rate, - buffer_size=args.buffer_size, - batch_size=args.batch_size, - tau=args.tau, - gamma=args.gamma, - train_freq=(1, 'episode'), - action_noise=None, - replay_buffer_class=None, - replay_buffer_kwargs=None, - optimize_memory_usage=False, - policy_delay=2, - target_policy_noise=0.2, - target_noise_clip=0.5, - create_eval_env=False, - policy_kwargs=None, - verbose=0, - device='auto', - _init_setup_model=True) + elif conf['algorithm']['name'] == 'SB3-TD3': + model = TD3(env, + seed=conf.get('seed', None), + tensorboard_log=conf.get('tensorboard', None), + ** conf['algorithm']['parameters']) # --------------------------------------------------------# # Error # # --------------------------------------------------------# else: raise RuntimeError( - F'Algorithm specified [{args.algorithm}] is not registered.') + F'Algorithm specified [{conf["algorithm"]["name"]} ] is not registered.') else: model_path = '' - if 'gs://' in args.model: + if 'gs://' in conf['model']: # Download from given bucket (gcloud configured with privileges) client = gcloud.init_storage_client() - bucket_name = args.model.split('/')[2] - model_path = args.model.split(bucket_name + '/')[-1] + bucket_name = conf['model'].split('/')[2] + model_path = conf['model'].split(bucket_name + '/')[-1] gcloud.read_from_bucket(client, bucket_name, model_path) model_path = './' + model_path else: - model_path = args.model + model_path = conf['model'] model = None - if args.algorithm == 'DQN': - model = DQN.load(model_path, tensorboard_log=args.tensorboard) - elif args.algorithm == 'DDPG': - model = DDPG.load(model_path, tensorboard_log=args.tensorboard) - elif args.algorithm == 'A2C': - model = A2C.load(model_path, tensorboard_log=args.tensorboard) - elif args.algorithm == 'PPO': - model = PPO.load(model_path, tensorboard_log=args.tensorboard) - elif args.algorithm == 'SAC': - model = SAC.load(model_path, tensorboard_log=args.tensorboard) - elif args.algorithm == 'TD3': - model = TD3.load(model_path, tensorboard_log=args.tensorboard) + if conf['algorithm']['name'] == 'SB3-DQN': + model = DQN.load( + model_path, tensorboard_log=conf.get( + 'tensorboard', None)) + elif conf['algorithm']['name'] == 'SB3-DDPG': + model = DDPG.load( + model_path, tensorboard_log=conf.get( + 'tensorboard', None)) + elif conf['algorithm']['name'] == 'SB3-A2C': + model = A2C.load( + model_path, tensorboard_log=conf.get( + 'tensorboard', None)) + elif conf['algorithm']['name'] == 'SB3-PPO': + model = PPO.load( + model_path, tensorboard_log=conf.get( + 'tensorboard', None)) + elif conf['algorithm']['name'] == 'SB3-SAC': + model = SAC.load( + model_path, tensorboard_log=conf.get( + 'tensorboard', None)) + elif conf['algorithm']['name'] == 'SB3-TD3': + model = TD3.load( + model_path, tensorboard_log=conf.get( + 'tensorboard', None)) else: raise RuntimeError('Algorithm specified is not registered.') @@ -451,7 +246,7 @@ # ---------------------------------------------------------------------------- # n_timesteps_episode = env.simulator._eplus_one_epi_len / \ env.simulator._eplus_run_stepsize - timesteps = args.episodes * n_timesteps_episode - 1 + timesteps = conf['episodes'] * n_timesteps_episode - 1 # ---------------------------------------------------------------------------- # # CALLBACKS # @@ -459,24 +254,24 @@ callbacks = [] # Set up Evaluation and saving best model - if args.evaluation: + if conf.get('evaluation'): eval_callback = LoggerEvalCallback( eval_env, best_model_save_path='best_model/' + name, log_path='best_model/' + name + '/', eval_freq=n_timesteps_episode * - args.eval_freq, + conf['evaluation']['eval_freq'], deterministic=True, render=False, - n_eval_episodes=args.eval_length) + n_eval_episodes=conf['evaluation']['eval_length']) callbacks.append(eval_callback) # Set up tensorboard logger - if args.tensorboard: - log_callback = LoggerCallback(sinergym_logger=bool(args.logger)) + if conf.get('tensorboard'): + log_callback = LoggerCallback() callbacks.append(log_callback) # lets change default dir for TensorboardFormatLogger only - tb_path = args.tensorboard + '/' + name + tb_path = conf['tensorboard'] + '/' + name new_logger = configure(tb_path, ["tensorboard"]) model.set_logger(new_logger) @@ -488,7 +283,7 @@ model.learn( total_timesteps=timesteps, callback=callback, - log_interval=args.log_interval) + log_interval=conf['algorithm']['log_interval']) model.save(env.simulator._env_working_dir_parent + '/' + name) # If the algorithm doesn't reset or close the environment, this script will do it in @@ -500,46 +295,48 @@ # ---------------------------------------------------------------------------- # # Mlflow artifacts storege # # ---------------------------------------------------------------------------- # - if args.mlflow_store: + if conf.get('cloud').get('mlflow_store'): # Code for send output and tensorboard to mlflow artifacts. mlflow.log_artifacts( local_dir=env.simulator._env_working_dir_parent, artifact_path=name) - if args.evaluation: + if conf.get('evaluation'): mlflow.log_artifacts( local_dir='best_model/' + name, artifact_path='best_model/' + name) # If tensorboard is active (in local) we should send to mlflow - if args.tensorboard and 'gs://' + args.bucket_name not in args.tensorboard: + if conf.get('tensorboard') and 'gs://' + \ + conf['cloud']['remote_store'] not in conf['tensorboard']: mlflow.log_artifacts( - local_dir=args.tensorboard + '/' + name, - artifact_path=os.path.abspath(args.tensorboard).split('/')[-1] + '/' + name) + local_dir=conf['tensorboard'] + '/' + name, + artifact_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name) # ---------------------------------------------------------------------------- # # Google Cloud Bucket Storage # # ---------------------------------------------------------------------------- # - if args.remote_store: + if conf.get('cloud').get('remote_store'): # Initiate Google Cloud client client = gcloud.init_storage_client() # Code for send output and tensorboard to common resource here. gcloud.upload_to_bucket( client, src_path=env.simulator._env_working_dir_parent, - dest_bucket_name=args.bucket_name, + dest_bucket_name=conf['cloud']['remote_store'], dest_path=name) - if args.evaluation: + if conf.get('evaluation'): gcloud.upload_to_bucket( client, src_path='best_model/' + name + '/', - dest_bucket_name=args.bucket_name, + dest_bucket_name=conf['cloud']['remote_store'], dest_path='best_model/' + name + '/') # If tensorboard is active (in local) we should send to bucket - if args.tensorboard and 'gs://' + args.bucket_name not in args.tensorboard: + if conf['tensorboard'] and 'gs://' + \ + conf['cloud']['remote_store'] not in conf['tensorboard']: gcloud.upload_to_bucket( client, - src_path=args.tensorboard + '/' + name + '/', - dest_bucket_name=args.bucket_name, - dest_path=os.path.abspath(args.tensorboard).split('/')[-1] + '/' + name + '/') + src_path=conf['tensorboard'] + '/' + name + '/', + dest_bucket_name=conf['cloud']['remote_store'], + dest_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name + '/') # gcloud.upload_to_bucket( # client, # src_path='mlruns/', @@ -552,6 +349,7 @@ # ---------------------------------------------------------------------------- # # Autodelete option if is a cloud resource # # ---------------------------------------------------------------------------- # - if args.group_name and args.auto_delete: + if args.group_name and conf['cloud']['auto_delete']: token = gcloud.get_service_account_token() - gcloud.delete_instance_MIG_from_container(args.group_name, token) + gcloud.delete_instance_MIG_from_container( + conf['cloud']['group_name'], token) From d46244278ce296730917d8ea13a275b8b316bb7e Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 3 Mar 2023 11:53:51 +0000 Subject: [PATCH 04/15] Update version from 2.2.4 to 2.2.5 --- sinergym/version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sinergym/version.txt b/sinergym/version.txt index 530cdd91a2..21bb5e156f 100644 --- a/sinergym/version.txt +++ b/sinergym/version.txt @@ -1 +1 @@ -2.2.4 +2.2.5 From ce298feda194a048468951e61b5e30d88a2e52a0 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 3 Mar 2023 11:54:37 +0000 Subject: [PATCH 05/15] Enhanced JSON structure in DRL_battery example --- scripts/DRL_battery_example.json | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/scripts/DRL_battery_example.json b/scripts/DRL_battery_example.json index 2126501fe9..1119752fd6 100644 --- a/scripts/DRL_battery_example.json +++ b/scripts/DRL_battery_example.json @@ -1,6 +1,7 @@ { "id": "PRUEBAALEX", "environment": "Eplus-5Zone-hot-continuous-stochastic-v1", + "episodes": 5, "algorithm": { "name": "SB3-PPO", "log_interval": 100, @@ -19,7 +20,6 @@ "max_grad_norm": 0.5 } }, - "episodes": 5, "seed": 3, "model": null, "reward": { @@ -40,21 +40,15 @@ ] } }, - "wrappers": [ - { - "class": "NormalizeObservation", - "parameters": { - "ranges": "sinergym.utils.constants.RANGES_5ZONE" - } + "wrappers": { + "NormalizeObservation": { + "ranges": "sinergym.utils.constants.RANGES_5ZONE" }, - { - "class": "LoggerWrapper", - "parameters": { - "logger_class": "sinergym.utils.logger.CSVLogger", - "flag": true - } + "LoggerWrapper": { + "logger_class": "sinergym.utils.logger.CSVLogger", + "flag": true } - ], + }, "evaluation": { "eval_freq": 2, "eval_length": 1 @@ -64,6 +58,6 @@ "remote_store": "bucket_example", "mlflow_store": true, "group_name": "example_group", - "autodelete": true + "auto_delete": true } } \ No newline at end of file From 60e591dede6b0b6dafbda7f36333300682d05894 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 3 Mar 2023 11:55:06 +0000 Subject: [PATCH 06/15] Re-adapted DRL_battery to new JSON changes and fixed some bugs --- scripts/DRL_battery.py | 215 +++++++++++++++++++++-------------------- 1 file changed, 108 insertions(+), 107 deletions(-) diff --git a/scripts/DRL_battery.py b/scripts/DRL_battery.py index 82c20bcb79..d37d213db8 100644 --- a/scripts/DRL_battery.py +++ b/scripts/DRL_battery.py @@ -8,15 +8,15 @@ import mlflow import numpy as np import tensorboard -# from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3 -# from stable_baselines3.common.callbacks import CallbackList -# from stable_baselines3.common.logger import configure -# from stable_baselines3.common.noise import NormalActionNoise -# from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3 +from stable_baselines3.common.callbacks import CallbackList +from stable_baselines3.common.logger import configure +from stable_baselines3.common.noise import NormalActionNoise +from stable_baselines3.common.vec_env import DummyVecEnv import sinergym import sinergym.utils.gcloud as gcloud -# from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback +from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback from sinergym.utils.constants import * from sinergym.utils.rewards import * from sinergym.utils.wrappers import (LoggerWrapper, MultiObsWrapper, @@ -73,34 +73,36 @@ # sinergym and python versions mlflow.log_param('sinergym-version', sinergym.__version__) mlflow.log_param('python-version', sys.version) - + # Main mlflow.log_param('environment', conf['environment']) mlflow.log_param('episodes', conf['episodes']) mlflow.log_param('algorithm', conf['algorithm']['name']) mlflow.log_param('reward', conf['reward']['class']) + # Optional + mlflow.log_param('tensorboard', conf.get('tensorboard', False)) mlflow.log_param( - 'normalization', bool( - conf.get('wrappers').get('class') == 'NormalizeObservation')) - mlflow.log_param( - 'multi-observations', - bool( - conf.get('wrappers').get('class') == 'MultiObsWrapper')) - mlflow.log_param( - 'logger', bool( - conf.get('wrappers').get('class') == 'LoggerWrapper')) - mlflow.log_param('tensorboard', conf.get('tensorboard')) - mlflow.log_param('evaluation', bool(conf.get('evaluation'))) - mlflow.log_param('evaluation-frequency', - conf.get('evaluation').get('eval_freq')) - mlflow.log_param( - 'evaluation-length', - conf.get('evaluation').get('eval_length')) - mlflow.log_param('log-interval', conf['algorithm'].get('log_interval')) - mlflow.log_param('seed', conf.get('seed')) - mlflow.log_param( - 'remote-store', - bool( - conf.get('cloud').get('remote_store'))) + 'log-interval', + conf['algorithm'].get( + 'log_interval', + False)) + mlflow.log_param('seed', conf.get('seed', False)) + if conf.get('cloud', False): + mlflow.log_param( + 'remote-store', + conf['cloud'].get( + 'remote_store', + False)) + if conf.get('wrappers'): + for key in conf['wrappers']: + mlflow.log_param(key, True) + mlflow.log_param('evaluation', bool(conf.get('evaluation', False))) + if conf.get('evaluation'): + mlflow.log_param( + 'evaluation-frequency', + conf['evaluation'].get('eval_freq')) + mlflow.log_param( + 'evaluation-length', + conf['evaluation'].get('eval_length')) # algorithm params mlflow.log_params(conf['algorithm'].get('parameters')) @@ -114,7 +116,7 @@ reward_kwargs = conf['reward']['parameters'] env = gym.make( - args.environment, + conf['environment'], reward=reward, reward_kwargs=reward_kwargs) @@ -122,7 +124,7 @@ eval_env = None if conf.get('evaluation'): eval_env = gym.make( - args.environment, + conf['environment'], reward=reward, reward_kwargs=reward_kwargs) @@ -130,74 +132,78 @@ # Wrappers # # ---------------------------------------------------------------------------- # if conf.get('wrappers'): - wrappers = conf['wrappers'] - for wrapper in wrappers: - wrapper_class = eval(wrapper['class']) - env = wrapper_class(env, **wrapper['parameters']) - if eval_env is not None: - eval_env = wrapper_class(eval_env, **wrapper['parameters']) + for key, parameters in conf['wrappers']: + wrapper_class = eval(key) + # parse str parameters to sinergym variables + for name, value in parameters: + if 'sinergym.' in name: + parameters[name] = eval(value) + env = wrapper_class(env=env, ** parameters) + if eval_env is not None: + eval_env = wrapper_class(env=eval_env, ** parameters) # ---------------------------------------------------------------------------- # # Defining model (algorithm) # # ---------------------------------------------------------------------------- # - - if args.model is None: + algorithm_name = conf['algorithm']['name'] + algorithm_parameters = conf['algorithm']['parameters'] + if conf.get('model') is None: # --------------------------------------------------------# # DQN # # --------------------------------------------------------# - if conf['algorithm']['name'] == 'SB3-DQN': + if algorithm_name == 'SB3-DQN': model = DQN(env=env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), - ** conf['algorithm']['parameters']) + ** algorithm_parameters) # --------------------------------------------------------# # DDPG # # --------------------------------------------------------# - elif conf['algorithm']['name'] == 'SB3-DDPG': + elif algorithm_name == 'SB3-DDPG': model = DDPG(env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), - ** conf['algorithm']['parameters']) + ** algorithm_parameters) # --------------------------------------------------------# # A2C # # --------------------------------------------------------# - elif conf['algorithm']['name'] == 'SB3-A2C': + elif algorithm_name == 'SB3-A2C': model = A2C(env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), - ** conf['algorithm']['parameters']) + ** algorithm_parameters) # --------------------------------------------------------# # PPO # # --------------------------------------------------------# - elif conf['algorithm']['name'] == 'SB3-PPO': + elif algorithm_name == 'SB3-PPO': model = PPO(env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), - ** conf['algorithm']['parameters']) + ** algorithm_parameters) # --------------------------------------------------------# # SAC # # --------------------------------------------------------# - elif conf['algorithm']['name'] == 'SB3-SAC': + elif algorithm_name == 'SB3-SAC': model = SAC(env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), - ** conf['algorithm']['parameters']) + ** algorithm_parameters) # --------------------------------------------------------# # TD3 # # --------------------------------------------------------# - elif conf['algorithm']['name'] == 'SB3-TD3': + elif algorithm_name == 'SB3-TD3': model = TD3(env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), - ** conf['algorithm']['parameters']) + ** algorithm_parameters) # --------------------------------------------------------# # Error # # --------------------------------------------------------# else: raise RuntimeError( - F'Algorithm specified [{conf["algorithm"]["name"]} ] is not registered.') + F'Algorithm specified [{algorithm_name} ] is not registered.') else: model_path = '' @@ -212,27 +218,27 @@ model_path = conf['model'] model = None - if conf['algorithm']['name'] == 'SB3-DQN': + if algorithm_name == 'SB3-DQN': model = DQN.load( model_path, tensorboard_log=conf.get( 'tensorboard', None)) - elif conf['algorithm']['name'] == 'SB3-DDPG': + elif algorithm_name == 'SB3-DDPG': model = DDPG.load( model_path, tensorboard_log=conf.get( 'tensorboard', None)) - elif conf['algorithm']['name'] == 'SB3-A2C': + elif algorithm_name == 'SB3-A2C': model = A2C.load( model_path, tensorboard_log=conf.get( 'tensorboard', None)) - elif conf['algorithm']['name'] == 'SB3-PPO': + elif algorithm_name == 'SB3-PPO': model = PPO.load( model_path, tensorboard_log=conf.get( 'tensorboard', None)) - elif conf['algorithm']['name'] == 'SB3-SAC': + elif algorithm_name == 'SB3-SAC': model = SAC.load( model_path, tensorboard_log=conf.get( 'tensorboard', None)) - elif conf['algorithm']['name'] == 'SB3-TD3': + elif algorithm_name == 'SB3-TD3': model = TD3.load( model_path, tensorboard_log=conf.get( 'tensorboard', None)) @@ -293,55 +299,48 @@ env.close() # ---------------------------------------------------------------------------- # - # Mlflow artifacts storege # + # Mlflow artifacts storege and Google Cloud Bucket Storage # # ---------------------------------------------------------------------------- # - if conf.get('cloud').get('mlflow_store'): - # Code for send output and tensorboard to mlflow artifacts. - mlflow.log_artifacts( - local_dir=env.simulator._env_working_dir_parent, - artifact_path=name) - if conf.get('evaluation'): - mlflow.log_artifacts( - local_dir='best_model/' + name, - artifact_path='best_model/' + name) - # If tensorboard is active (in local) we should send to mlflow - if conf.get('tensorboard') and 'gs://' + \ - conf['cloud']['remote_store'] not in conf['tensorboard']: - mlflow.log_artifacts( - local_dir=conf['tensorboard'] + '/' + name, - artifact_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name) - - # ---------------------------------------------------------------------------- # - # Google Cloud Bucket Storage # - # ---------------------------------------------------------------------------- # - if conf.get('cloud').get('remote_store'): - # Initiate Google Cloud client - client = gcloud.init_storage_client() - # Code for send output and tensorboard to common resource here. - gcloud.upload_to_bucket( - client, - src_path=env.simulator._env_working_dir_parent, - dest_bucket_name=conf['cloud']['remote_store'], - dest_path=name) - if conf.get('evaluation'): - gcloud.upload_to_bucket( - client, - src_path='best_model/' + name + '/', - dest_bucket_name=conf['cloud']['remote_store'], - dest_path='best_model/' + name + '/') - # If tensorboard is active (in local) we should send to bucket - if conf['tensorboard'] and 'gs://' + \ - conf['cloud']['remote_store'] not in conf['tensorboard']: + if conf.get('cloud'): + if conf['cloud'].get('remote_store'): + # Initiate Google Cloud client + client = gcloud.init_storage_client() + # Code for send output and tensorboard to common resource here. gcloud.upload_to_bucket( client, - src_path=conf['tensorboard'] + '/' + name + '/', + src_path=env.simulator._env_working_dir_parent, dest_bucket_name=conf['cloud']['remote_store'], - dest_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name + '/') - # gcloud.upload_to_bucket( - # client, - # src_path='mlruns/', - # dest_bucket_name=args.bucket_name, - # dest_path='mlruns/') + dest_path=name) + # Code for send output and tensorboard to mlflow artifacts. + mlflow.log_artifacts( + local_dir=env.simulator._env_working_dir_parent, + artifact_path=name) + if conf.get('evaluation'): + gcloud.upload_to_bucket( + client, + src_path='best_model/' + name + '/', + dest_bucket_name=conf['cloud']['remote_store'], + dest_path='best_model/' + name + '/') + mlflow.log_artifacts( + local_dir='best_model/' + name, + artifact_path='best_model/' + name) + # If tensorboard is active (in local) we should send to mlflow + if conf.get('tensorboard') and 'gs://' + \ + conf['cloud']['remote_store'] not in conf.get('tensorboard'): + gcloud.upload_to_bucket( + client, + src_path=conf['tensorboard'] + '/' + name + '/', + dest_bucket_name=conf['cloud']['remote_store'], + dest_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name + '/') + mlflow.log_artifacts( + local_dir=conf['tensorboard'] + '/' + name, + artifact_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name) + + # gcloud.upload_to_bucket( + # client, + # src_path='mlruns/', + # dest_bucket_name=conf['cloud']['remote_store'], + # dest_path='mlruns/') # End mlflow run mlflow.end_run() @@ -349,7 +348,9 @@ # ---------------------------------------------------------------------------- # # Autodelete option if is a cloud resource # # ---------------------------------------------------------------------------- # - if args.group_name and conf['cloud']['auto_delete']: - token = gcloud.get_service_account_token() - gcloud.delete_instance_MIG_from_container( - conf['cloud']['group_name'], token) + if conf.get('cloud'): + if conf['cloud'].get( + 'remote_store') and conf['cloud'].get('auto_delete'): + token = gcloud.get_service_account_token() + gcloud.delete_instance_MIG_from_container( + conf['cloud']['group_name'], token) From cd59621cae603228559bed038680df17c2c81159 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 3 Mar 2023 14:11:10 +0000 Subject: [PATCH 07/15] Added documentation about json file for DRL_battery.py --- .../pages/deep-reinforcement-learning.rst | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/source/pages/deep-reinforcement-learning.rst b/docs/source/pages/deep-reinforcement-learning.rst index acd1978334..e285ff2dd4 100644 --- a/docs/source/pages/deep-reinforcement-learning.rst +++ b/docs/source/pages/deep-reinforcement-learning.rst @@ -160,7 +160,7 @@ How use You can try your own experiments and benefit from this functionality. `sinergym/scripts/DRL_battery.py `__ is a example code to use it. You can use ``DRL_battery.py`` directly from -your local computer specifying ``--tensorboard`` flag in execution. +your local computer or using Google Cloud Platform. The most **important information** you must keep in mind when you try your own experiments are: @@ -179,13 +179,23 @@ your own experiments are: specify train ``timesteps``, ``callbacks`` and ``log_interval`` as we commented in type algorithms (On and Off Policy). -* ``DRL_battery.py`` requires some **extra arguments** to being - executed like ``-env`` and ``-ep``. - * You can execute **Curriculum Learning**, you only have to - add ``--model`` field with a valid model path, this script + add model field with a valid model path, this script will load the model and execute to train. +``DRL_battery.py`` has a unique parameter to be able to execute it; ``-conf``. +This parameter is a str to indicate the JSON file in which there are allocated +all information about the experiment you want to execute. You can see the +JSON structure in `sinergym/scripts/DRL_battery_example.json `__: + +* The **obligatory** parameters are: environment, train episodes, + algorithm (and parameters) and reward function (and parameters). + +* The **optional** parameters are: seed, model to load (before training), + experiment ID, wrappers to use (respecting the order), training evaluation, + tensorboard functionality and cloud options. + + **************** Mlflow **************** From 1b015dc2ff27a5c0a826d226f61ec275632d3f61 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Mon, 6 Mar 2023 10:40:08 +0000 Subject: [PATCH 08/15] Fix isort bug --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8ff728010d..04c124ff0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ autopep8 eppy gymnasium -isort[requirements_deprecated_finder,pipfile_deprecated_finder] +isort numpy opyplus pandas From 576e5df1eb2612a144c2a8263ee74f886ca8aa1a Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Fri, 10 Mar 2023 11:05:27 +0000 Subject: [PATCH 09/15] Updated Sinergym version from 2.2.5 to 2.2.6 --- sinergym/version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sinergym/version.txt b/sinergym/version.txt index 21bb5e156f..bda8fbec15 100644 --- a/sinergym/version.txt +++ b/sinergym/version.txt @@ -1 +1 @@ -2.2.5 +2.2.6 From 5c24af529b1c32c2982cab9bad9a3e0bb96eff0a Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Tue, 14 Mar 2023 11:14:14 +0000 Subject: [PATCH 10/15] Fixed multiple bugs in new DRL_battery.py structure --- scripts/DRL_battery.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/scripts/DRL_battery.py b/scripts/DRL_battery.py index d37d213db8..5dd3cea61b 100644 --- a/scripts/DRL_battery.py +++ b/scripts/DRL_battery.py @@ -8,19 +8,18 @@ import mlflow import numpy as np import tensorboard -from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3 +from stable_baselines3 import * from stable_baselines3.common.callbacks import CallbackList from stable_baselines3.common.logger import configure from stable_baselines3.common.noise import NormalActionNoise from stable_baselines3.common.vec_env import DummyVecEnv - import sinergym import sinergym.utils.gcloud as gcloud from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback from sinergym.utils.constants import * from sinergym.utils.rewards import * -from sinergym.utils.wrappers import (LoggerWrapper, MultiObsWrapper, - NormalizeObservation) +from sinergym.utils.wrappers import * +from sinergym.utils.logger import * # ---------------------------------------------------------------------------- # # Parameters definition # @@ -132,12 +131,14 @@ # Wrappers # # ---------------------------------------------------------------------------- # if conf.get('wrappers'): - for key, parameters in conf['wrappers']: + for key, parameters in conf['wrappers'].items(): wrapper_class = eval(key) - # parse str parameters to sinergym variables - for name, value in parameters: - if 'sinergym.' in name: - parameters[name] = eval(value) + for name, value in parameters.items(): + # parse str parameters to sinergym Callable or Objects if it is + # required + if isinstance(value, str): + if 'sinergym.' in value: + parameters[name] = eval(value) env = wrapper_class(env=env, ** parameters) if eval_env is not None: eval_env = wrapper_class(env=eval_env, ** parameters) @@ -162,7 +163,7 @@ # DDPG # # --------------------------------------------------------# elif algorithm_name == 'SB3-DDPG': - model = DDPG(env, + model = DDPG(env=env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), ** algorithm_parameters) @@ -170,7 +171,7 @@ # A2C # # --------------------------------------------------------# elif algorithm_name == 'SB3-A2C': - model = A2C(env, + model = A2C(env=env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), ** algorithm_parameters) @@ -178,7 +179,7 @@ # PPO # # --------------------------------------------------------# elif algorithm_name == 'SB3-PPO': - model = PPO(env, + model = PPO(env=env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), ** algorithm_parameters) @@ -186,7 +187,7 @@ # SAC # # --------------------------------------------------------# elif algorithm_name == 'SB3-SAC': - model = SAC(env, + model = SAC(env=env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), ** algorithm_parameters) @@ -194,7 +195,7 @@ # TD3 # # --------------------------------------------------------# elif algorithm_name == 'SB3-TD3': - model = TD3(env, + model = TD3(env=env, seed=conf.get('seed', None), tensorboard_log=conf.get('tensorboard', None), ** algorithm_parameters) From 97ee5a7a9b0c568f65ab7caf87cea5cc246158bc Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Tue, 14 Mar 2023 12:08:13 +0000 Subject: [PATCH 11/15] Fixed info keys in callbacks --- sinergym/utils/callbacks.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sinergym/utils/callbacks.py b/sinergym/utils/callbacks.py index 038a3ee7fe..c65c8d139f 100644 --- a/sinergym/utils/callbacks.py +++ b/sinergym/utils/callbacks.py @@ -82,7 +82,7 @@ def _on_step(self) -> bool: variables = self.training_env.get_attr('variables')[0]['action'] action = None # sinergym action received inner its own setpoints range - action_ = info['action_'] + action_ = info['action'] try: # network output clipped with gym action space action = self.locals['clipped_actions'][-1] @@ -115,10 +115,10 @@ def _on_step(self) -> bool: except KeyError: print('Algorithm reward key in locals dict unknown') - self.ep_powers.append(info['total_power']) - self.ep_term_comfort.append(info['comfort_penalty']) - self.ep_term_energy.append(info['total_power_no_units']) - if (info['comfort_penalty'] != 0): + self.ep_powers.append(info['total_energy']) + self.ep_term_comfort.append(info['reward_comfort']) + self.ep_term_energy.append(info['reward_energy']) + if (info['reward_comfort'] != 0): self.num_comfort_violation += 1 self.ep_timesteps += 1 From 7fa48efc42aa6f67609be5764df5c0361727b3b6 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Tue, 14 Mar 2023 12:09:33 +0000 Subject: [PATCH 12/15] Fixed info keys in evaluation.py --- sinergym/utils/evaluation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sinergym/utils/evaluation.py b/sinergym/utils/evaluation.py index 1330816837..99d93aeb2a 100644 --- a/sinergym/utils/evaluation.py +++ b/sinergym/utils/evaluation.py @@ -76,10 +76,10 @@ def evaluate_policy(model: "base_class.BaseAlgorithm", obs, state=state, deterministic=deterministic) obs, reward, done, info = env.step(action) episode_reward += reward - episode_power += info[0]['total_power'] - episode_power_penalty += info[0]['total_power_no_units'] - episode_comfort_penalty += info[0]['comfort_penalty'] - if info[0]['comfort_penalty'] != 0: + episode_power += info[0]['total_energy'] + episode_power_penalty += info[0]['reward_energy'] + episode_comfort_penalty += info[0]['reward_comfort'] + if info[0]['reward_comfort'] != 0: episode_steps_comfort_violation += 1 if callback is not None: callback(locals(), globals()) From 61e4df76045fbae68045de94d46788600f89bd65 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Tue, 14 Mar 2023 14:43:43 +0000 Subject: [PATCH 13/15] Updated tests for stable baselines 3 --- tests/test_stable_baselines.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/test_stable_baselines.py b/tests/test_stable_baselines.py index 7143f512c8..321e37ba39 100644 --- a/tests/test_stable_baselines.py +++ b/tests/test_stable_baselines.py @@ -56,13 +56,13 @@ def test_stable_PPO(env_name, request): # Check model works - obs = env.reset() + obs, info = env.reset() a, _ = model.predict(obs) - obs, reward, done, info = env.step(a) + obs, reward, terminated, truncated, info = env.step(a) assert reward is not None and reward < 0 assert a is not None - assert isinstance(done, bool) + assert isinstance(terminated, bool) assert info['timestep'] == 1 env.close() @@ -102,13 +102,13 @@ def test_stable_A2C(env_name, request): # Check model works - obs = env.reset() + obs, info = env.reset() a, _ = model.predict(obs) - obs, reward, done, info = env.step(a) + obs, reward, terminated, truncated, info = env.step(a) assert reward is not None and reward < 0 assert a is not None - assert isinstance(done, bool) + assert isinstance(terminated, bool) assert info['timestep'] == 1 env.close() @@ -172,13 +172,13 @@ def test_stable_DQN(env_name, request): # Check model works - obs = env.reset() + obs, info = env.reset() a, _ = model.predict(obs) - obs, reward, done, info = env.step(a) + obs, reward, terminated, truncated, info = env.step(a) assert reward is not None and reward < 0 assert a is not None - assert isinstance(done, bool) + assert isinstance(terminated, bool) assert info['timestep'] == 1 env.close() @@ -225,13 +225,13 @@ def test_stable_DDPG(env_name, request): # Check model works - obs = env.reset() + obs, info = env.reset() a, _ = model.predict(obs) - obs, reward, done, info = env.step(a) + obs, reward, terminated, truncated, info = env.step(a) assert reward is not None and reward < 0 assert a is not None - assert isinstance(done, bool) + assert isinstance(terminated, bool) assert info['timestep'] == 1 env.close() @@ -272,13 +272,13 @@ def test_stable_SAC(env_name, request): # Check model works - obs = env.reset() + obs, info = env.reset() a, _ = model.predict(obs) - obs, reward, done, info = env.step(a) + obs, reward, terminated, truncated, info = env.step(a) assert reward is not None and reward < 0 assert a is not None - assert isinstance(done, bool) + assert isinstance(terminated, bool) assert info['timestep'] == 1 env.close() From 6ec96f7486056c83c9554bad745295bdbb230cc3 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Tue, 14 Mar 2023 15:14:11 +0000 Subject: [PATCH 14/15] Updated JSON structure and DRL_battery.py: reward and reward parameters are not mandatory now, all environments parameters can be overwritten optionally if it is desired --- scripts/DRL_battery.py | 31 ++++++++++++++-------- scripts/DRL_battery_example.json | 45 ++++++++++++++------------------ 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/scripts/DRL_battery.py b/scripts/DRL_battery.py index 5dd3cea61b..7ad67b4e58 100644 --- a/scripts/DRL_battery.py +++ b/scripts/DRL_battery.py @@ -76,7 +76,8 @@ mlflow.log_param('environment', conf['environment']) mlflow.log_param('episodes', conf['episodes']) mlflow.log_param('algorithm', conf['algorithm']['name']) - mlflow.log_param('reward', conf['reward']['class']) + # Environment parameters overwriten + mlflow.log_params(conf.get('env_params')) # Optional mlflow.log_param('tensorboard', conf.get('tensorboard', False)) mlflow.log_param( @@ -105,27 +106,35 @@ # algorithm params mlflow.log_params(conf['algorithm'].get('parameters')) - # reward params - mlflow.log_params(conf['reward'].get('parameters')) + + # --------------------- Overwrite environment parameters --------------------- # + env_params = {} + # Transform required str's into Callables + if conf.get('env_params'): + if conf['env_params'].get('reward'): + conf['env_params']['reward'] = eval(conf['env_params']['reward']) + if conf['env_params'].get('observation_space'): + conf['env_params']['observation_space'] = eval( + conf['env_params']['observation_space']) + if conf['env_params'].get('action_space'): + conf['env_params']['observation_space'] = eval( + conf['env_params']['action_space']) + + env_params = conf['env_params'] # ---------------------------------------------------------------------------- # - # Environment construction (with reward specified) # + # Environment construction # # ---------------------------------------------------------------------------- # - reward = eval(conf['reward']['class']) - reward_kwargs = conf['reward']['parameters'] - env = gym.make( conf['environment'], - reward=reward, - reward_kwargs=reward_kwargs) + ** env_params) # env for evaluation if is enabled eval_env = None if conf.get('evaluation'): eval_env = gym.make( conf['environment'], - reward=reward, - reward_kwargs=reward_kwargs) + ** env_params) # ---------------------------------------------------------------------------- # # Wrappers # diff --git a/scripts/DRL_battery_example.json b/scripts/DRL_battery_example.json index 1119752fd6..5bd4689aeb 100644 --- a/scripts/DRL_battery_example.json +++ b/scripts/DRL_battery_example.json @@ -1,6 +1,25 @@ { "id": "PRUEBAALEX", "environment": "Eplus-5Zone-hot-continuous-stochastic-v1", + "env_params": { + "reward": "LinearReward", + "reward_kwargs": { + "temperature_variable": [ + "Zone Air Temperature(SPACE1-1)", + "Zone Air Temperature(SPACE1-2)" + ], + "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)", + "range_comfort_winter": [ + 20.0, + 23.5 + ], + "range_comfort_summer": [ + 23.0, + 26.0 + ] + }, + "act_repeat": 1 + }, "episodes": 5, "algorithm": { "name": "SB3-PPO", @@ -22,24 +41,6 @@ }, "seed": 3, "model": null, - "reward": { - "class": "LinearReward", - "parameters": { - "temperature_variable": [ - "Zone Air Temperature(SPACE1-1)", - "Zone Air Temperature(SPACE1-2)" - ], - "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)", - "range_comfort_winter": [ - 20.0, - 23.5 - ], - "range_comfort_summer": [ - 23.0, - 26.0 - ] - } - }, "wrappers": { "NormalizeObservation": { "ranges": "sinergym.utils.constants.RANGES_5ZONE" @@ -53,11 +54,5 @@ "eval_freq": 2, "eval_length": 1 }, - "tensorboard": "./tensorboard_log/", - "cloud": { - "remote_store": "bucket_example", - "mlflow_store": true, - "group_name": "example_group", - "auto_delete": true - } + "tensorboard": "./tensorboard_log/" } \ No newline at end of file From 43eff3553dd8a556d4f23ede6d68e5c3e6625771 Mon Sep 17 00:00:00 2001 From: AlejandroCN7 Date: Tue, 14 Mar 2023 15:24:18 +0000 Subject: [PATCH 15/15] Updated JSON example fields order and updated documentation --- .../pages/deep-reinforcement-learning.rst | 14 ++++--- scripts/DRL_battery_example.json | 40 +++++++++---------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/docs/source/pages/deep-reinforcement-learning.rst b/docs/source/pages/deep-reinforcement-learning.rst index e285ff2dd4..d0a3b843d5 100644 --- a/docs/source/pages/deep-reinforcement-learning.rst +++ b/docs/source/pages/deep-reinforcement-learning.rst @@ -186,15 +186,19 @@ your own experiments are: ``DRL_battery.py`` has a unique parameter to be able to execute it; ``-conf``. This parameter is a str to indicate the JSON file in which there are allocated all information about the experiment you want to execute. You can see the -JSON structure in `sinergym/scripts/DRL_battery_example.json `__: +JSON structure example in `sinergym/scripts/DRL_battery_example.json `__: -* The **obligatory** parameters are: environment, train episodes, - algorithm (and parameters) and reward function (and parameters). +* The **obligatory** parameters are: environment, episodes, + algorithm (and parameters of the algorithm which don't have + default values). -* The **optional** parameters are: seed, model to load (before training), +* The **optional** parameters are: All environment parameters (if it is specified + will be overwrite the default environment value) seed, model to load (before training), experiment ID, wrappers to use (respecting the order), training evaluation, tensorboard functionality and cloud options. - + +* The name of the fields must be like in example mentioned. Otherwise, the experiment + will return an error. **************** Mlflow diff --git a/scripts/DRL_battery_example.json b/scripts/DRL_battery_example.json index 5bd4689aeb..50dd24033b 100644 --- a/scripts/DRL_battery_example.json +++ b/scripts/DRL_battery_example.json @@ -1,25 +1,6 @@ { - "id": "PRUEBAALEX", + "id": "ExperimentExample", "environment": "Eplus-5Zone-hot-continuous-stochastic-v1", - "env_params": { - "reward": "LinearReward", - "reward_kwargs": { - "temperature_variable": [ - "Zone Air Temperature(SPACE1-1)", - "Zone Air Temperature(SPACE1-2)" - ], - "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)", - "range_comfort_winter": [ - 20.0, - 23.5 - ], - "range_comfort_summer": [ - 23.0, - 26.0 - ] - }, - "act_repeat": 1 - }, "episodes": 5, "algorithm": { "name": "SB3-PPO", @@ -39,6 +20,25 @@ "max_grad_norm": 0.5 } }, + "env_params": { + "reward": "LinearReward", + "reward_kwargs": { + "temperature_variable": [ + "Zone Air Temperature(SPACE1-1)", + "Zone Air Temperature(SPACE1-2)" + ], + "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)", + "range_comfort_winter": [ + 20.0, + 23.5 + ], + "range_comfort_summer": [ + 23.0, + 26.0 + ] + }, + "act_repeat": 1 + }, "seed": 3, "model": null, "wrappers": {