From 52df871dab046efb7a1083b3012b87c00a08b8dd Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Wed, 1 Mar 2023 10:00:28 +0000
Subject: [PATCH 01/15] Documentation web shields fixed
---
docs/source/_templates/shields.html | 1 -
1 file changed, 1 deletion(-)
diff --git a/docs/source/_templates/shields.html b/docs/source/_templates/shields.html
index a7fca64d49..6ab8b5e2dd 100644
--- a/docs/source/_templates/shields.html
+++ b/docs/source/_templates/shields.html
@@ -1,4 +1,3 @@
-
From 836bca01cf329935ebc87684514981a385a9b94e Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Thu, 2 Mar 2023 14:46:27 +0000
Subject: [PATCH 02/15] Added a json example for DRL_battery.py
---
scripts/DRL_battery_example.json | 69 ++++++++++++++++++++++++++++++++
1 file changed, 69 insertions(+)
create mode 100644 scripts/DRL_battery_example.json
diff --git a/scripts/DRL_battery_example.json b/scripts/DRL_battery_example.json
new file mode 100644
index 0000000000..2126501fe9
--- /dev/null
+++ b/scripts/DRL_battery_example.json
@@ -0,0 +1,69 @@
+{
+ "id": "PRUEBAALEX",
+ "environment": "Eplus-5Zone-hot-continuous-stochastic-v1",
+ "algorithm": {
+ "name": "SB3-PPO",
+ "log_interval": 100,
+ "parameters": {
+ "policy": "MlpPolicy",
+ "verbose": 1,
+ "learning_rate": 0.0003,
+ "n_steps": 2048,
+ "batch_size": 64,
+ "n_epochs": 10,
+ "gamma": 0.99,
+ "gae_lambda": 0.95,
+ "clip_range": 0.2,
+ "ent_coef": 0,
+ "vf_coef": 0.5,
+ "max_grad_norm": 0.5
+ }
+ },
+ "episodes": 5,
+ "seed": 3,
+ "model": null,
+ "reward": {
+ "class": "LinearReward",
+ "parameters": {
+ "temperature_variable": [
+ "Zone Air Temperature(SPACE1-1)",
+ "Zone Air Temperature(SPACE1-2)"
+ ],
+ "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)",
+ "range_comfort_winter": [
+ 20.0,
+ 23.5
+ ],
+ "range_comfort_summer": [
+ 23.0,
+ 26.0
+ ]
+ }
+ },
+ "wrappers": [
+ {
+ "class": "NormalizeObservation",
+ "parameters": {
+ "ranges": "sinergym.utils.constants.RANGES_5ZONE"
+ }
+ },
+ {
+ "class": "LoggerWrapper",
+ "parameters": {
+ "logger_class": "sinergym.utils.logger.CSVLogger",
+ "flag": true
+ }
+ }
+ ],
+ "evaluation": {
+ "eval_freq": 2,
+ "eval_length": 1
+ },
+ "tensorboard": "./tensorboard_log/",
+ "cloud": {
+ "remote_store": "bucket_example",
+ "mlflow_store": true,
+ "group_name": "example_group",
+ "autodelete": true
+ }
+}
\ No newline at end of file
From d3de2e77a54287cdd7d83e42e9bf67b1927b7939 Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Thu, 2 Mar 2023 14:48:03 +0000
Subject: [PATCH 03/15] First approximation to adap DRL_battery.py to a json
configuration
---
scripts/DRL_battery.py | 528 +++++++++++++----------------------------
1 file changed, 163 insertions(+), 365 deletions(-)
diff --git a/scripts/DRL_battery.py b/scripts/DRL_battery.py
index da02908537..82c20bcb79 100644
--- a/scripts/DRL_battery.py
+++ b/scripts/DRL_battery.py
@@ -1,21 +1,23 @@
import argparse
+import sys
import os
+import json
from datetime import datetime
import gymnasium as gym
import mlflow
import numpy as np
import tensorboard
-from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3
-from stable_baselines3.common.callbacks import CallbackList
-from stable_baselines3.common.logger import configure
-from stable_baselines3.common.noise import NormalActionNoise
-from stable_baselines3.common.vec_env import DummyVecEnv
+# from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3
+# from stable_baselines3.common.callbacks import CallbackList
+# from stable_baselines3.common.logger import configure
+# from stable_baselines3.common.noise import NormalActionNoise
+# from stable_baselines3.common.vec_env import DummyVecEnv
import sinergym
import sinergym.utils.gcloud as gcloud
-from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback
-from sinergym.utils.constants import RANGES_5ZONE, RANGES_DATACENTER, RANGES_IW
+# from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback
+from sinergym.utils.constants import *
from sinergym.utils.rewards import *
from sinergym.utils.wrappers import (LoggerWrapper, MultiObsWrapper,
NormalizeObservation)
@@ -24,176 +26,34 @@
# Parameters definition #
# ---------------------------------------------------------------------------- #
parser = argparse.ArgumentParser()
-# commons arguments for battery
parser.add_argument(
- '--environment',
- '-env',
+ '--configuration',
+ '-conf',
required=True,
type=str,
- dest='environment',
- help='Environment name of simulation (see sinergym/__init__.py).')
-parser.add_argument(
- '--model',
- '-mod',
- type=str,
- default=None,
- dest='model',
- help='Path where model is stored, only when you want a model to continue training it.')
-parser.add_argument(
- '--episodes',
- '-ep',
- type=int,
- default=1,
- dest='episodes',
- help='Number of episodes for training.')
-parser.add_argument(
- '--algorithm',
- '-alg',
- type=str,
- default='PPO',
- dest='algorithm',
- help='Algorithm used to train (possible values: PPO, A2C, DQN, DDPG, SAC, TD3).')
-parser.add_argument(
- '--reward',
- '-rw',
- type=str,
- default='linear',
- dest='reward',
- help='Reward function used by model, by default is linear (possible values: linear, exponential).')
-parser.add_argument(
- '--energy_weight',
- '-rew',
- type=float,
- dest='energy_weight',
- help='Reward energy weight with compatible rewards types.')
-parser.add_argument(
- '--normalization',
- '-norm',
- action='store_true',
- dest='normalization',
- help='Apply normalization to observations if this flag is specified.')
-parser.add_argument(
- '--multiobs',
- '-mobs',
- action='store_true',
- dest='multiobs',
- help='Apply Multi observations if this flag is specified.')
-parser.add_argument(
- '--logger',
- '-log',
- action='store_true',
- dest='logger',
- help='Apply Sinergym CSVLogger class if this flag is specified.')
-parser.add_argument(
- '--tensorboard',
- '-tens',
- type=str,
- default=None,
- dest='tensorboard',
- help='Tensorboard path for logging (if not specified, tensorboard log will not be stored).')
-parser.add_argument(
- '--evaluation',
- '-eval',
- action='store_true',
- dest='evaluation',
- help='Evaluation is processed during training with this flag (save best model online).')
-parser.add_argument(
- '--eval_freq',
- '-evalf',
- type=int,
- default=2,
- dest='eval_freq',
- help='Episodes executed before applying evaluation (if evaluation flag is not specified, this value is useless).')
-parser.add_argument(
- '--eval_length',
- '-evall',
- type=int,
- default=2,
- dest='eval_length',
- help='Episodes executed during evaluation (if evaluation flag is not specified, this value is useless).')
-parser.add_argument(
- '--log_interval',
- '-inter',
- type=int,
- default=1,
- dest='log_interval',
- help='model training log_interval parameter. See documentation since this value is different in every algorithm.')
-parser.add_argument(
- '--seed',
- '-sd',
- type=int,
- default=None,
- dest='seed',
- help='Seed used to algorithm training.')
-parser.add_argument(
- '--id',
- '-id',
- type=str,
- default=None,
- dest='id',
- help='Custom experiment identifier.')
-parser.add_argument(
- '--remote_store',
- '-sto',
- action='store_true',
- dest='remote_store',
- help='Determine if sinergym output will be sent to a Google Cloud Storage Bucket.')
-parser.add_argument(
- '--bucket_name',
- '-buc',
- type=str,
- default='experiments-storage',
- dest='bucket_name',
- help='Name of the bucket where experiments output will be stored if experiment is configured to that.')
-parser.add_argument(
- '--mlflow_store',
- '-mlflow',
- action='store_true',
- dest='mlflow_store',
- help='Determine if sinergym output will be sent to a mlflow artifact storage')
-parser.add_argument(
- '--group_name',
- '-group',
- type=str,
- dest='group_name',
- help='This field indicate instance group name')
-parser.add_argument(
- '--auto_delete',
- '-del',
- action='store_true',
- dest='auto_delete',
- help='If is a GCE instance and this flag is active, that instance will be removed from GCP.')
-
-parser.add_argument('--learning_rate', '-lr', type=float, default=.0003)
-parser.add_argument('--n_steps', '-n', type=int, default=2048)
-parser.add_argument('--batch_size', '-bs', type=int, default=64)
-parser.add_argument('--n_epochs', '-ne', type=int, default=10)
-parser.add_argument('--gamma', '-g', type=float, default=.99)
-parser.add_argument('--gae_lambda', '-gl', type=float, default=.95)
-parser.add_argument('--ent_coef', '-ec', type=float, default=0)
-parser.add_argument('--vf_coef', '-v', type=float, default=.5)
-parser.add_argument('--max_grad_norm', '-m', type=float, default=.5)
-parser.add_argument('--buffer_size', '-bfs', type=int, default=1000000)
-parser.add_argument('--learning_starts', '-ls', type=int, default=100)
-parser.add_argument('--tau', '-tu', type=float, default=0.005)
-parser.add_argument('--gradient_steps', '-gs', type=int, default=1)
-parser.add_argument('--clip_range', '-cr', type=float, default=.2)
-parser.add_argument('--sigma', '-sig', type=float, default=0.1)
-parser.add_argument('--rms_prop_eps', '-rpe', type=float, default=1e-5)
-
+ dest='configuration',
+ help='Path to experiment configuration (JSON file)'
+)
args = parser.parse_args()
# ------------------------------------------------------------------------------#
+# ---------------------------------------------------------------------------- #
+# Read json parameters #
+# ---------------------------------------------------------------------------- #
+
+with open(args.configuration) as json_conf:
+ conf = json.load(json_conf)
+
# ---------------------------------------------------------------------------- #
# Register run name #
# ---------------------------------------------------------------------------- #
experiment_date = datetime.today().strftime('%Y-%m-%d_%H:%M')
-name = args.algorithm + '-' + args.environment + \
- '-episodes-' + str(args.episodes)
-if args.seed:
- name += '-seed-' + str(args.seed)
-if args.id:
- name += '-id-' + str(args.id)
+name = conf['algorithm']['name'] + '-' + conf['environment'] + \
+ '-episodes-' + str(conf['episodes'])
+if conf.get('seed'):
+ name += '-seed-' + str(conf['seed'])
+if conf.get('id'):
+ name += '-id-' + str(conf['id'])
name += '_' + experiment_date
# ---------------------------------------------------------------------------- #
@@ -210,92 +70,72 @@
# MLflow track
with mlflow.start_run(run_name=name):
# Log experiment params
+ # sinergym and python versions
mlflow.log_param('sinergym-version', sinergym.__version__)
+ mlflow.log_param('python-version', sys.version)
- mlflow.log_param('env', args.environment)
- mlflow.log_param('episodes', args.episodes)
- mlflow.log_param('algorithm', args.algorithm)
- mlflow.log_param('reward', args.reward)
- mlflow.log_param('normalization', bool(args.normalization))
- mlflow.log_param('multi-observations', bool(args.multiobs))
- mlflow.log_param('logger', bool(args.logger))
- mlflow.log_param('tensorboard', args.tensorboard)
- mlflow.log_param('evaluation', bool(args.evaluation))
- mlflow.log_param('evaluation-frequency', args.eval_freq)
- mlflow.log_param('evaluation-length', args.eval_length)
- mlflow.log_param('log-interval', args.log_interval)
- mlflow.log_param('seed', args.seed)
- mlflow.log_param('remote-store', bool(args.remote_store))
+ mlflow.log_param('environment', conf['environment'])
+ mlflow.log_param('episodes', conf['episodes'])
+ mlflow.log_param('algorithm', conf['algorithm']['name'])
+ mlflow.log_param('reward', conf['reward']['class'])
+ mlflow.log_param(
+ 'normalization', bool(
+ conf.get('wrappers').get('class') == 'NormalizeObservation'))
+ mlflow.log_param(
+ 'multi-observations',
+ bool(
+ conf.get('wrappers').get('class') == 'MultiObsWrapper'))
+ mlflow.log_param(
+ 'logger', bool(
+ conf.get('wrappers').get('class') == 'LoggerWrapper'))
+ mlflow.log_param('tensorboard', conf.get('tensorboard'))
+ mlflow.log_param('evaluation', bool(conf.get('evaluation')))
+ mlflow.log_param('evaluation-frequency',
+ conf.get('evaluation').get('eval_freq'))
+ mlflow.log_param(
+ 'evaluation-length',
+ conf.get('evaluation').get('eval_length'))
+ mlflow.log_param('log-interval', conf['algorithm'].get('log_interval'))
+ mlflow.log_param('seed', conf.get('seed'))
+ mlflow.log_param(
+ 'remote-store',
+ bool(
+ conf.get('cloud').get('remote_store')))
- mlflow.log_param('learning-rate', args.learning_rate)
- mlflow.log_param('n-steps', args.n_steps)
- mlflow.log_param('batch-size', args.batch_size)
- mlflow.log_param('n-epochs', args.n_epochs)
- mlflow.log_param('gamma', args.gamma)
- mlflow.log_param('gae-lambda', args.gae_lambda)
- mlflow.log_param('ent-coef', args.ent_coef)
- mlflow.log_param('vf-coef', args.vf_coef)
- mlflow.log_param('max-grad-norm', args.max_grad_norm)
- mlflow.log_param('buffer-size', args.buffer_size)
- mlflow.log_param('learning-starts', args.learning_starts)
- mlflow.log_param('tau', args.tau)
- mlflow.log_param('gradient-steps', args.gradient_steps)
- mlflow.log_param('clip-range', args.clip_range)
- mlflow.log_param('sigma', args.sigma)
- mlflow.log_param('rms_prop_eps', args.rms_prop_eps)
- mlflow.log_param('id', args.id)
+ # algorithm params
+ mlflow.log_params(conf['algorithm'].get('parameters'))
+ # reward params
+ mlflow.log_params(conf['reward'].get('parameters'))
# ---------------------------------------------------------------------------- #
# Environment construction (with reward specified) #
# ---------------------------------------------------------------------------- #
- if args.reward == 'linear':
- reward = LinearReward
- elif args.reward == 'exponential':
- reward = ExpReward
- else:
- raise RuntimeError(
- 'Reward function [{}] specified is not registered.'.format(
- args.reward))
+ reward = eval(conf['reward']['class'])
+ reward_kwargs = conf['reward']['parameters']
- env = gym.make(args.environment, reward=reward)
- if hasattr(env.reward_fn, 'W_energy') and args.energy_weight is not None:
- env.reward_fn.W_energy = args.energy_weight
+ env = gym.make(
+ args.environment,
+ reward=reward,
+ reward_kwargs=reward_kwargs)
# env for evaluation if is enabled
eval_env = None
- if args.evaluation:
- eval_env = gym.make(args.environment, reward=reward)
- if hasattr(eval_env.reward_fn, 'W_energy') and args.energy_weight:
- eval_env.reward_fn.W_energy = args.energy_weight
+ if conf.get('evaluation'):
+ eval_env = gym.make(
+ args.environment,
+ reward=reward,
+ reward_kwargs=reward_kwargs)
# ---------------------------------------------------------------------------- #
# Wrappers #
# ---------------------------------------------------------------------------- #
- if args.normalization:
- # dictionary ranges to use
- norm_range = None
- env_type = args.environment.split('-')[1]
- if env_type == 'datacenter':
- norm_range = RANGES_DATACENTER
- elif env_type == '5Zone':
- norm_range = RANGES_5ZONE
- elif env_type == 'IWMullion':
- norm_range = RANGES_IW
- else:
- raise NameError(
- 'Normalization cant be use on environment :"{}", check environment name or disable normalization'.format(
- args.environment))
- env = NormalizeObservation(env, ranges=norm_range)
+ if conf.get('wrappers'):
+ wrappers = conf['wrappers']
+ for wrapper in wrappers:
+ wrapper_class = eval(wrapper['class'])
+ env = wrapper_class(env, **wrapper['parameters'])
if eval_env is not None:
- eval_env = NormalizeObservation(eval_env, ranges=norm_range)
- if args.logger:
- env = LoggerWrapper(env)
- if eval_env is not None:
- eval_env = LoggerWrapper(eval_env)
- if args.multiobs:
- env = MultiObsWrapper(env)
- if eval_env is not None:
- eval_env = MultiObsWrapper(eval_env)
+ eval_env = wrapper_class(eval_env, **wrapper['parameters'])
# ---------------------------------------------------------------------------- #
# Defining model (algorithm) #
@@ -306,141 +146,96 @@
# --------------------------------------------------------#
# DQN #
# --------------------------------------------------------#
- if args.algorithm == 'DQN':
- model = DQN('MlpPolicy', env, verbose=1,
- learning_rate=args.learning_rate,
- buffer_size=args.buffer_size,
- learning_starts=args.learning_starts,
- batch_size=args.batch_size,
- tau=args.tau,
- gamma=args.gamma,
- train_freq=4,
- gradient_steps=args.gradient_steps,
- target_update_interval=10000,
- exploration_fraction=.1,
- exploration_initial_eps=1.0,
- exploration_final_eps=.05,
- max_grad_norm=args.max_grad_norm,
- seed=args.seed,
- tensorboard_log=args.tensorboard)
+ if conf['algorithm']['name'] == 'SB3-DQN':
+
+ model = DQN(env=env,
+ seed=conf.get('seed', None),
+ tensorboard_log=conf.get('tensorboard', None),
+ ** conf['algorithm']['parameters'])
# --------------------------------------------------------#
# DDPG #
# --------------------------------------------------------#
- elif args.algorithm == 'DDPG':
- if args.sigma:
- # noise objects for DDPG
- n_actions = env.action_space.shape[-1]
- action_noise = NormalActionNoise(mean=np.zeros(
- n_actions), sigma=0.1 * np.ones(n_actions))
-
- model = DDPG("MlpPolicy",
- env,
- action_noise=action_noise,
- verbose=1,
- seed=args.seed,
- tensorboard_log=args.tensorboard)
+ elif conf['algorithm']['name'] == 'SB3-DDPG':
+ model = DDPG(env,
+ seed=conf.get('seed', None),
+ tensorboard_log=conf.get('tensorboard', None),
+ ** conf['algorithm']['parameters'])
# --------------------------------------------------------#
# A2C #
# --------------------------------------------------------#
- elif args.algorithm == 'A2C':
- model = A2C('MlpPolicy', env, verbose=1,
- learning_rate=args.learning_rate,
- n_steps=args.n_steps,
- gamma=args.gamma,
- gae_lambda=args.gae_lambda,
- ent_coef=args.ent_coef,
- vf_coef=args.vf_coef,
- max_grad_norm=args.max_grad_norm,
- rms_prop_eps=args.rms_prop_eps,
- seed=args.seed,
- tensorboard_log=args.tensorboard)
+ elif conf['algorithm']['name'] == 'SB3-A2C':
+ model = A2C(env,
+ seed=conf.get('seed', None),
+ tensorboard_log=conf.get('tensorboard', None),
+ ** conf['algorithm']['parameters'])
# --------------------------------------------------------#
# PPO #
# --------------------------------------------------------#
- elif args.algorithm == 'PPO':
- model = PPO('MlpPolicy', env, verbose=1,
- learning_rate=args.learning_rate,
- n_steps=args.n_steps,
- batch_size=args.batch_size,
- n_epochs=args.n_epochs,
- gamma=args.gamma,
- gae_lambda=args.gae_lambda,
- clip_range=args.clip_range,
- ent_coef=args.ent_coef,
- vf_coef=args.vf_coef,
- max_grad_norm=args.max_grad_norm,
- seed=args.seed,
- tensorboard_log=args.tensorboard)
+ elif conf['algorithm']['name'] == 'SB3-PPO':
+ model = PPO(env,
+ seed=conf.get('seed', None),
+ tensorboard_log=conf.get('tensorboard', None),
+ ** conf['algorithm']['parameters'])
# --------------------------------------------------------#
# SAC #
# --------------------------------------------------------#
- elif args.algorithm == 'SAC':
- model = SAC(policy='MlpPolicy',
- env=env,
- seed=args.seed,
- learning_rate=args.learning_rate,
- buffer_size=args.buffer_size,
- batch_size=args.batch_size,
- tau=args.tau,
- gamma=args.gamma,
- tensorboard_log=args.tensorboard)
+ elif conf['algorithm']['name'] == 'SB3-SAC':
+ model = SAC(env,
+ seed=conf.get('seed', None),
+ tensorboard_log=conf.get('tensorboard', None),
+ ** conf['algorithm']['parameters'])
# --------------------------------------------------------#
# TD3 #
# --------------------------------------------------------#
- elif args.algorithm == 'TD3':
- model = TD3(policy='MlpPolicy',
- env=env, seed=args.seed,
- tensorboard_log=args.tensorboard,
- learning_rate=args.learning_rate,
- buffer_size=args.buffer_size,
- batch_size=args.batch_size,
- tau=args.tau,
- gamma=args.gamma,
- train_freq=(1, 'episode'),
- action_noise=None,
- replay_buffer_class=None,
- replay_buffer_kwargs=None,
- optimize_memory_usage=False,
- policy_delay=2,
- target_policy_noise=0.2,
- target_noise_clip=0.5,
- create_eval_env=False,
- policy_kwargs=None,
- verbose=0,
- device='auto',
- _init_setup_model=True)
+ elif conf['algorithm']['name'] == 'SB3-TD3':
+ model = TD3(env,
+ seed=conf.get('seed', None),
+ tensorboard_log=conf.get('tensorboard', None),
+ ** conf['algorithm']['parameters'])
# --------------------------------------------------------#
# Error #
# --------------------------------------------------------#
else:
raise RuntimeError(
- F'Algorithm specified [{args.algorithm}] is not registered.')
+ F'Algorithm specified [{conf["algorithm"]["name"]} ] is not registered.')
else:
model_path = ''
- if 'gs://' in args.model:
+ if 'gs://' in conf['model']:
# Download from given bucket (gcloud configured with privileges)
client = gcloud.init_storage_client()
- bucket_name = args.model.split('/')[2]
- model_path = args.model.split(bucket_name + '/')[-1]
+ bucket_name = conf['model'].split('/')[2]
+ model_path = conf['model'].split(bucket_name + '/')[-1]
gcloud.read_from_bucket(client, bucket_name, model_path)
model_path = './' + model_path
else:
- model_path = args.model
+ model_path = conf['model']
model = None
- if args.algorithm == 'DQN':
- model = DQN.load(model_path, tensorboard_log=args.tensorboard)
- elif args.algorithm == 'DDPG':
- model = DDPG.load(model_path, tensorboard_log=args.tensorboard)
- elif args.algorithm == 'A2C':
- model = A2C.load(model_path, tensorboard_log=args.tensorboard)
- elif args.algorithm == 'PPO':
- model = PPO.load(model_path, tensorboard_log=args.tensorboard)
- elif args.algorithm == 'SAC':
- model = SAC.load(model_path, tensorboard_log=args.tensorboard)
- elif args.algorithm == 'TD3':
- model = TD3.load(model_path, tensorboard_log=args.tensorboard)
+ if conf['algorithm']['name'] == 'SB3-DQN':
+ model = DQN.load(
+ model_path, tensorboard_log=conf.get(
+ 'tensorboard', None))
+ elif conf['algorithm']['name'] == 'SB3-DDPG':
+ model = DDPG.load(
+ model_path, tensorboard_log=conf.get(
+ 'tensorboard', None))
+ elif conf['algorithm']['name'] == 'SB3-A2C':
+ model = A2C.load(
+ model_path, tensorboard_log=conf.get(
+ 'tensorboard', None))
+ elif conf['algorithm']['name'] == 'SB3-PPO':
+ model = PPO.load(
+ model_path, tensorboard_log=conf.get(
+ 'tensorboard', None))
+ elif conf['algorithm']['name'] == 'SB3-SAC':
+ model = SAC.load(
+ model_path, tensorboard_log=conf.get(
+ 'tensorboard', None))
+ elif conf['algorithm']['name'] == 'SB3-TD3':
+ model = TD3.load(
+ model_path, tensorboard_log=conf.get(
+ 'tensorboard', None))
else:
raise RuntimeError('Algorithm specified is not registered.')
@@ -451,7 +246,7 @@
# ---------------------------------------------------------------------------- #
n_timesteps_episode = env.simulator._eplus_one_epi_len / \
env.simulator._eplus_run_stepsize
- timesteps = args.episodes * n_timesteps_episode - 1
+ timesteps = conf['episodes'] * n_timesteps_episode - 1
# ---------------------------------------------------------------------------- #
# CALLBACKS #
@@ -459,24 +254,24 @@
callbacks = []
# Set up Evaluation and saving best model
- if args.evaluation:
+ if conf.get('evaluation'):
eval_callback = LoggerEvalCallback(
eval_env,
best_model_save_path='best_model/' + name,
log_path='best_model/' + name + '/',
eval_freq=n_timesteps_episode *
- args.eval_freq,
+ conf['evaluation']['eval_freq'],
deterministic=True,
render=False,
- n_eval_episodes=args.eval_length)
+ n_eval_episodes=conf['evaluation']['eval_length'])
callbacks.append(eval_callback)
# Set up tensorboard logger
- if args.tensorboard:
- log_callback = LoggerCallback(sinergym_logger=bool(args.logger))
+ if conf.get('tensorboard'):
+ log_callback = LoggerCallback()
callbacks.append(log_callback)
# lets change default dir for TensorboardFormatLogger only
- tb_path = args.tensorboard + '/' + name
+ tb_path = conf['tensorboard'] + '/' + name
new_logger = configure(tb_path, ["tensorboard"])
model.set_logger(new_logger)
@@ -488,7 +283,7 @@
model.learn(
total_timesteps=timesteps,
callback=callback,
- log_interval=args.log_interval)
+ log_interval=conf['algorithm']['log_interval'])
model.save(env.simulator._env_working_dir_parent + '/' + name)
# If the algorithm doesn't reset or close the environment, this script will do it in
@@ -500,46 +295,48 @@
# ---------------------------------------------------------------------------- #
# Mlflow artifacts storege #
# ---------------------------------------------------------------------------- #
- if args.mlflow_store:
+ if conf.get('cloud').get('mlflow_store'):
# Code for send output and tensorboard to mlflow artifacts.
mlflow.log_artifacts(
local_dir=env.simulator._env_working_dir_parent,
artifact_path=name)
- if args.evaluation:
+ if conf.get('evaluation'):
mlflow.log_artifacts(
local_dir='best_model/' + name,
artifact_path='best_model/' + name)
# If tensorboard is active (in local) we should send to mlflow
- if args.tensorboard and 'gs://' + args.bucket_name not in args.tensorboard:
+ if conf.get('tensorboard') and 'gs://' + \
+ conf['cloud']['remote_store'] not in conf['tensorboard']:
mlflow.log_artifacts(
- local_dir=args.tensorboard + '/' + name,
- artifact_path=os.path.abspath(args.tensorboard).split('/')[-1] + '/' + name)
+ local_dir=conf['tensorboard'] + '/' + name,
+ artifact_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name)
# ---------------------------------------------------------------------------- #
# Google Cloud Bucket Storage #
# ---------------------------------------------------------------------------- #
- if args.remote_store:
+ if conf.get('cloud').get('remote_store'):
# Initiate Google Cloud client
client = gcloud.init_storage_client()
# Code for send output and tensorboard to common resource here.
gcloud.upload_to_bucket(
client,
src_path=env.simulator._env_working_dir_parent,
- dest_bucket_name=args.bucket_name,
+ dest_bucket_name=conf['cloud']['remote_store'],
dest_path=name)
- if args.evaluation:
+ if conf.get('evaluation'):
gcloud.upload_to_bucket(
client,
src_path='best_model/' + name + '/',
- dest_bucket_name=args.bucket_name,
+ dest_bucket_name=conf['cloud']['remote_store'],
dest_path='best_model/' + name + '/')
# If tensorboard is active (in local) we should send to bucket
- if args.tensorboard and 'gs://' + args.bucket_name not in args.tensorboard:
+ if conf['tensorboard'] and 'gs://' + \
+ conf['cloud']['remote_store'] not in conf['tensorboard']:
gcloud.upload_to_bucket(
client,
- src_path=args.tensorboard + '/' + name + '/',
- dest_bucket_name=args.bucket_name,
- dest_path=os.path.abspath(args.tensorboard).split('/')[-1] + '/' + name + '/')
+ src_path=conf['tensorboard'] + '/' + name + '/',
+ dest_bucket_name=conf['cloud']['remote_store'],
+ dest_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name + '/')
# gcloud.upload_to_bucket(
# client,
# src_path='mlruns/',
@@ -552,6 +349,7 @@
# ---------------------------------------------------------------------------- #
# Autodelete option if is a cloud resource #
# ---------------------------------------------------------------------------- #
- if args.group_name and args.auto_delete:
+ if args.group_name and conf['cloud']['auto_delete']:
token = gcloud.get_service_account_token()
- gcloud.delete_instance_MIG_from_container(args.group_name, token)
+ gcloud.delete_instance_MIG_from_container(
+ conf['cloud']['group_name'], token)
From d46244278ce296730917d8ea13a275b8b316bb7e Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Fri, 3 Mar 2023 11:53:51 +0000
Subject: [PATCH 04/15] Update version from 2.2.4 to 2.2.5
---
sinergym/version.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sinergym/version.txt b/sinergym/version.txt
index 530cdd91a2..21bb5e156f 100644
--- a/sinergym/version.txt
+++ b/sinergym/version.txt
@@ -1 +1 @@
-2.2.4
+2.2.5
From ce298feda194a048468951e61b5e30d88a2e52a0 Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Fri, 3 Mar 2023 11:54:37 +0000
Subject: [PATCH 05/15] Enhanced JSON structure in DRL_battery example
---
scripts/DRL_battery_example.json | 24 +++++++++---------------
1 file changed, 9 insertions(+), 15 deletions(-)
diff --git a/scripts/DRL_battery_example.json b/scripts/DRL_battery_example.json
index 2126501fe9..1119752fd6 100644
--- a/scripts/DRL_battery_example.json
+++ b/scripts/DRL_battery_example.json
@@ -1,6 +1,7 @@
{
"id": "PRUEBAALEX",
"environment": "Eplus-5Zone-hot-continuous-stochastic-v1",
+ "episodes": 5,
"algorithm": {
"name": "SB3-PPO",
"log_interval": 100,
@@ -19,7 +20,6 @@
"max_grad_norm": 0.5
}
},
- "episodes": 5,
"seed": 3,
"model": null,
"reward": {
@@ -40,21 +40,15 @@
]
}
},
- "wrappers": [
- {
- "class": "NormalizeObservation",
- "parameters": {
- "ranges": "sinergym.utils.constants.RANGES_5ZONE"
- }
+ "wrappers": {
+ "NormalizeObservation": {
+ "ranges": "sinergym.utils.constants.RANGES_5ZONE"
},
- {
- "class": "LoggerWrapper",
- "parameters": {
- "logger_class": "sinergym.utils.logger.CSVLogger",
- "flag": true
- }
+ "LoggerWrapper": {
+ "logger_class": "sinergym.utils.logger.CSVLogger",
+ "flag": true
}
- ],
+ },
"evaluation": {
"eval_freq": 2,
"eval_length": 1
@@ -64,6 +58,6 @@
"remote_store": "bucket_example",
"mlflow_store": true,
"group_name": "example_group",
- "autodelete": true
+ "auto_delete": true
}
}
\ No newline at end of file
From 60e591dede6b0b6dafbda7f36333300682d05894 Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Fri, 3 Mar 2023 11:55:06 +0000
Subject: [PATCH 06/15] Re-adapted DRL_battery to new JSON changes and fixed
some bugs
---
scripts/DRL_battery.py | 215 +++++++++++++++++++++--------------------
1 file changed, 108 insertions(+), 107 deletions(-)
diff --git a/scripts/DRL_battery.py b/scripts/DRL_battery.py
index 82c20bcb79..d37d213db8 100644
--- a/scripts/DRL_battery.py
+++ b/scripts/DRL_battery.py
@@ -8,15 +8,15 @@
import mlflow
import numpy as np
import tensorboard
-# from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3
-# from stable_baselines3.common.callbacks import CallbackList
-# from stable_baselines3.common.logger import configure
-# from stable_baselines3.common.noise import NormalActionNoise
-# from stable_baselines3.common.vec_env import DummyVecEnv
+from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3
+from stable_baselines3.common.callbacks import CallbackList
+from stable_baselines3.common.logger import configure
+from stable_baselines3.common.noise import NormalActionNoise
+from stable_baselines3.common.vec_env import DummyVecEnv
import sinergym
import sinergym.utils.gcloud as gcloud
-# from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback
+from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback
from sinergym.utils.constants import *
from sinergym.utils.rewards import *
from sinergym.utils.wrappers import (LoggerWrapper, MultiObsWrapper,
@@ -73,34 +73,36 @@
# sinergym and python versions
mlflow.log_param('sinergym-version', sinergym.__version__)
mlflow.log_param('python-version', sys.version)
-
+ # Main
mlflow.log_param('environment', conf['environment'])
mlflow.log_param('episodes', conf['episodes'])
mlflow.log_param('algorithm', conf['algorithm']['name'])
mlflow.log_param('reward', conf['reward']['class'])
+ # Optional
+ mlflow.log_param('tensorboard', conf.get('tensorboard', False))
mlflow.log_param(
- 'normalization', bool(
- conf.get('wrappers').get('class') == 'NormalizeObservation'))
- mlflow.log_param(
- 'multi-observations',
- bool(
- conf.get('wrappers').get('class') == 'MultiObsWrapper'))
- mlflow.log_param(
- 'logger', bool(
- conf.get('wrappers').get('class') == 'LoggerWrapper'))
- mlflow.log_param('tensorboard', conf.get('tensorboard'))
- mlflow.log_param('evaluation', bool(conf.get('evaluation')))
- mlflow.log_param('evaluation-frequency',
- conf.get('evaluation').get('eval_freq'))
- mlflow.log_param(
- 'evaluation-length',
- conf.get('evaluation').get('eval_length'))
- mlflow.log_param('log-interval', conf['algorithm'].get('log_interval'))
- mlflow.log_param('seed', conf.get('seed'))
- mlflow.log_param(
- 'remote-store',
- bool(
- conf.get('cloud').get('remote_store')))
+ 'log-interval',
+ conf['algorithm'].get(
+ 'log_interval',
+ False))
+ mlflow.log_param('seed', conf.get('seed', False))
+ if conf.get('cloud', False):
+ mlflow.log_param(
+ 'remote-store',
+ conf['cloud'].get(
+ 'remote_store',
+ False))
+ if conf.get('wrappers'):
+ for key in conf['wrappers']:
+ mlflow.log_param(key, True)
+ mlflow.log_param('evaluation', bool(conf.get('evaluation', False)))
+ if conf.get('evaluation'):
+ mlflow.log_param(
+ 'evaluation-frequency',
+ conf['evaluation'].get('eval_freq'))
+ mlflow.log_param(
+ 'evaluation-length',
+ conf['evaluation'].get('eval_length'))
# algorithm params
mlflow.log_params(conf['algorithm'].get('parameters'))
@@ -114,7 +116,7 @@
reward_kwargs = conf['reward']['parameters']
env = gym.make(
- args.environment,
+ conf['environment'],
reward=reward,
reward_kwargs=reward_kwargs)
@@ -122,7 +124,7 @@
eval_env = None
if conf.get('evaluation'):
eval_env = gym.make(
- args.environment,
+ conf['environment'],
reward=reward,
reward_kwargs=reward_kwargs)
@@ -130,74 +132,78 @@
# Wrappers #
# ---------------------------------------------------------------------------- #
if conf.get('wrappers'):
- wrappers = conf['wrappers']
- for wrapper in wrappers:
- wrapper_class = eval(wrapper['class'])
- env = wrapper_class(env, **wrapper['parameters'])
- if eval_env is not None:
- eval_env = wrapper_class(eval_env, **wrapper['parameters'])
+ for key, parameters in conf['wrappers']:
+ wrapper_class = eval(key)
+ # parse str parameters to sinergym variables
+ for name, value in parameters:
+ if 'sinergym.' in name:
+ parameters[name] = eval(value)
+ env = wrapper_class(env=env, ** parameters)
+ if eval_env is not None:
+ eval_env = wrapper_class(env=eval_env, ** parameters)
# ---------------------------------------------------------------------------- #
# Defining model (algorithm) #
# ---------------------------------------------------------------------------- #
-
- if args.model is None:
+ algorithm_name = conf['algorithm']['name']
+ algorithm_parameters = conf['algorithm']['parameters']
+ if conf.get('model') is None:
# --------------------------------------------------------#
# DQN #
# --------------------------------------------------------#
- if conf['algorithm']['name'] == 'SB3-DQN':
+ if algorithm_name == 'SB3-DQN':
model = DQN(env=env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
- ** conf['algorithm']['parameters'])
+ ** algorithm_parameters)
# --------------------------------------------------------#
# DDPG #
# --------------------------------------------------------#
- elif conf['algorithm']['name'] == 'SB3-DDPG':
+ elif algorithm_name == 'SB3-DDPG':
model = DDPG(env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
- ** conf['algorithm']['parameters'])
+ ** algorithm_parameters)
# --------------------------------------------------------#
# A2C #
# --------------------------------------------------------#
- elif conf['algorithm']['name'] == 'SB3-A2C':
+ elif algorithm_name == 'SB3-A2C':
model = A2C(env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
- ** conf['algorithm']['parameters'])
+ ** algorithm_parameters)
# --------------------------------------------------------#
# PPO #
# --------------------------------------------------------#
- elif conf['algorithm']['name'] == 'SB3-PPO':
+ elif algorithm_name == 'SB3-PPO':
model = PPO(env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
- ** conf['algorithm']['parameters'])
+ ** algorithm_parameters)
# --------------------------------------------------------#
# SAC #
# --------------------------------------------------------#
- elif conf['algorithm']['name'] == 'SB3-SAC':
+ elif algorithm_name == 'SB3-SAC':
model = SAC(env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
- ** conf['algorithm']['parameters'])
+ ** algorithm_parameters)
# --------------------------------------------------------#
# TD3 #
# --------------------------------------------------------#
- elif conf['algorithm']['name'] == 'SB3-TD3':
+ elif algorithm_name == 'SB3-TD3':
model = TD3(env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
- ** conf['algorithm']['parameters'])
+ ** algorithm_parameters)
# --------------------------------------------------------#
# Error #
# --------------------------------------------------------#
else:
raise RuntimeError(
- F'Algorithm specified [{conf["algorithm"]["name"]} ] is not registered.')
+ F'Algorithm specified [{algorithm_name} ] is not registered.')
else:
model_path = ''
@@ -212,27 +218,27 @@
model_path = conf['model']
model = None
- if conf['algorithm']['name'] == 'SB3-DQN':
+ if algorithm_name == 'SB3-DQN':
model = DQN.load(
model_path, tensorboard_log=conf.get(
'tensorboard', None))
- elif conf['algorithm']['name'] == 'SB3-DDPG':
+ elif algorithm_name == 'SB3-DDPG':
model = DDPG.load(
model_path, tensorboard_log=conf.get(
'tensorboard', None))
- elif conf['algorithm']['name'] == 'SB3-A2C':
+ elif algorithm_name == 'SB3-A2C':
model = A2C.load(
model_path, tensorboard_log=conf.get(
'tensorboard', None))
- elif conf['algorithm']['name'] == 'SB3-PPO':
+ elif algorithm_name == 'SB3-PPO':
model = PPO.load(
model_path, tensorboard_log=conf.get(
'tensorboard', None))
- elif conf['algorithm']['name'] == 'SB3-SAC':
+ elif algorithm_name == 'SB3-SAC':
model = SAC.load(
model_path, tensorboard_log=conf.get(
'tensorboard', None))
- elif conf['algorithm']['name'] == 'SB3-TD3':
+ elif algorithm_name == 'SB3-TD3':
model = TD3.load(
model_path, tensorboard_log=conf.get(
'tensorboard', None))
@@ -293,55 +299,48 @@
env.close()
# ---------------------------------------------------------------------------- #
- # Mlflow artifacts storege #
+ # Mlflow artifacts storege and Google Cloud Bucket Storage #
# ---------------------------------------------------------------------------- #
- if conf.get('cloud').get('mlflow_store'):
- # Code for send output and tensorboard to mlflow artifacts.
- mlflow.log_artifacts(
- local_dir=env.simulator._env_working_dir_parent,
- artifact_path=name)
- if conf.get('evaluation'):
- mlflow.log_artifacts(
- local_dir='best_model/' + name,
- artifact_path='best_model/' + name)
- # If tensorboard is active (in local) we should send to mlflow
- if conf.get('tensorboard') and 'gs://' + \
- conf['cloud']['remote_store'] not in conf['tensorboard']:
- mlflow.log_artifacts(
- local_dir=conf['tensorboard'] + '/' + name,
- artifact_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name)
-
- # ---------------------------------------------------------------------------- #
- # Google Cloud Bucket Storage #
- # ---------------------------------------------------------------------------- #
- if conf.get('cloud').get('remote_store'):
- # Initiate Google Cloud client
- client = gcloud.init_storage_client()
- # Code for send output and tensorboard to common resource here.
- gcloud.upload_to_bucket(
- client,
- src_path=env.simulator._env_working_dir_parent,
- dest_bucket_name=conf['cloud']['remote_store'],
- dest_path=name)
- if conf.get('evaluation'):
- gcloud.upload_to_bucket(
- client,
- src_path='best_model/' + name + '/',
- dest_bucket_name=conf['cloud']['remote_store'],
- dest_path='best_model/' + name + '/')
- # If tensorboard is active (in local) we should send to bucket
- if conf['tensorboard'] and 'gs://' + \
- conf['cloud']['remote_store'] not in conf['tensorboard']:
+ if conf.get('cloud'):
+ if conf['cloud'].get('remote_store'):
+ # Initiate Google Cloud client
+ client = gcloud.init_storage_client()
+ # Code for send output and tensorboard to common resource here.
gcloud.upload_to_bucket(
client,
- src_path=conf['tensorboard'] + '/' + name + '/',
+ src_path=env.simulator._env_working_dir_parent,
dest_bucket_name=conf['cloud']['remote_store'],
- dest_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name + '/')
- # gcloud.upload_to_bucket(
- # client,
- # src_path='mlruns/',
- # dest_bucket_name=args.bucket_name,
- # dest_path='mlruns/')
+ dest_path=name)
+ # Code for send output and tensorboard to mlflow artifacts.
+ mlflow.log_artifacts(
+ local_dir=env.simulator._env_working_dir_parent,
+ artifact_path=name)
+ if conf.get('evaluation'):
+ gcloud.upload_to_bucket(
+ client,
+ src_path='best_model/' + name + '/',
+ dest_bucket_name=conf['cloud']['remote_store'],
+ dest_path='best_model/' + name + '/')
+ mlflow.log_artifacts(
+ local_dir='best_model/' + name,
+ artifact_path='best_model/' + name)
+ # If tensorboard is active (in local) we should send to mlflow
+ if conf.get('tensorboard') and 'gs://' + \
+ conf['cloud']['remote_store'] not in conf.get('tensorboard'):
+ gcloud.upload_to_bucket(
+ client,
+ src_path=conf['tensorboard'] + '/' + name + '/',
+ dest_bucket_name=conf['cloud']['remote_store'],
+ dest_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name + '/')
+ mlflow.log_artifacts(
+ local_dir=conf['tensorboard'] + '/' + name,
+ artifact_path=os.path.abspath(conf['tensorboard']).split('/')[-1] + '/' + name)
+
+ # gcloud.upload_to_bucket(
+ # client,
+ # src_path='mlruns/',
+ # dest_bucket_name=conf['cloud']['remote_store'],
+ # dest_path='mlruns/')
# End mlflow run
mlflow.end_run()
@@ -349,7 +348,9 @@
# ---------------------------------------------------------------------------- #
# Autodelete option if is a cloud resource #
# ---------------------------------------------------------------------------- #
- if args.group_name and conf['cloud']['auto_delete']:
- token = gcloud.get_service_account_token()
- gcloud.delete_instance_MIG_from_container(
- conf['cloud']['group_name'], token)
+ if conf.get('cloud'):
+ if conf['cloud'].get(
+ 'remote_store') and conf['cloud'].get('auto_delete'):
+ token = gcloud.get_service_account_token()
+ gcloud.delete_instance_MIG_from_container(
+ conf['cloud']['group_name'], token)
From cd59621cae603228559bed038680df17c2c81159 Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Fri, 3 Mar 2023 14:11:10 +0000
Subject: [PATCH 07/15] Added documentation about json file for DRL_battery.py
---
.../pages/deep-reinforcement-learning.rst | 20 ++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/docs/source/pages/deep-reinforcement-learning.rst b/docs/source/pages/deep-reinforcement-learning.rst
index acd1978334..e285ff2dd4 100644
--- a/docs/source/pages/deep-reinforcement-learning.rst
+++ b/docs/source/pages/deep-reinforcement-learning.rst
@@ -160,7 +160,7 @@ How use
You can try your own experiments and benefit from this functionality.
`sinergym/scripts/DRL_battery.py `__
is a example code to use it. You can use ``DRL_battery.py`` directly from
-your local computer specifying ``--tensorboard`` flag in execution.
+your local computer or using Google Cloud Platform.
The most **important information** you must keep in mind when you try
your own experiments are:
@@ -179,13 +179,23 @@ your own experiments are:
specify train ``timesteps``, ``callbacks`` and ``log_interval``
as we commented in type algorithms (On and Off Policy).
-* ``DRL_battery.py`` requires some **extra arguments** to being
- executed like ``-env`` and ``-ep``.
-
* You can execute **Curriculum Learning**, you only have to
- add ``--model`` field with a valid model path, this script
+ add model field with a valid model path, this script
will load the model and execute to train.
+``DRL_battery.py`` has a unique parameter to be able to execute it; ``-conf``.
+This parameter is a str to indicate the JSON file in which there are allocated
+all information about the experiment you want to execute. You can see the
+JSON structure in `sinergym/scripts/DRL_battery_example.json `__:
+
+* The **obligatory** parameters are: environment, train episodes,
+ algorithm (and parameters) and reward function (and parameters).
+
+* The **optional** parameters are: seed, model to load (before training),
+ experiment ID, wrappers to use (respecting the order), training evaluation,
+ tensorboard functionality and cloud options.
+
+
****************
Mlflow
****************
From 1b015dc2ff27a5c0a826d226f61ec275632d3f61 Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Mon, 6 Mar 2023 10:40:08 +0000
Subject: [PATCH 08/15] Fix isort bug
---
requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements.txt b/requirements.txt
index 8ff728010d..04c124ff0f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
autopep8
eppy
gymnasium
-isort[requirements_deprecated_finder,pipfile_deprecated_finder]
+isort
numpy
opyplus
pandas
From 576e5df1eb2612a144c2a8263ee74f886ca8aa1a Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Fri, 10 Mar 2023 11:05:27 +0000
Subject: [PATCH 09/15] Updated Sinergym version from 2.2.5 to 2.2.6
---
sinergym/version.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sinergym/version.txt b/sinergym/version.txt
index 21bb5e156f..bda8fbec15 100644
--- a/sinergym/version.txt
+++ b/sinergym/version.txt
@@ -1 +1 @@
-2.2.5
+2.2.6
From 5c24af529b1c32c2982cab9bad9a3e0bb96eff0a Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Tue, 14 Mar 2023 11:14:14 +0000
Subject: [PATCH 10/15] Fixed multiple bugs in new DRL_battery.py structure
---
scripts/DRL_battery.py | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/scripts/DRL_battery.py b/scripts/DRL_battery.py
index d37d213db8..5dd3cea61b 100644
--- a/scripts/DRL_battery.py
+++ b/scripts/DRL_battery.py
@@ -8,19 +8,18 @@
import mlflow
import numpy as np
import tensorboard
-from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3
+from stable_baselines3 import *
from stable_baselines3.common.callbacks import CallbackList
from stable_baselines3.common.logger import configure
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.vec_env import DummyVecEnv
-
import sinergym
import sinergym.utils.gcloud as gcloud
from sinergym.utils.callbacks import LoggerCallback, LoggerEvalCallback
from sinergym.utils.constants import *
from sinergym.utils.rewards import *
-from sinergym.utils.wrappers import (LoggerWrapper, MultiObsWrapper,
- NormalizeObservation)
+from sinergym.utils.wrappers import *
+from sinergym.utils.logger import *
# ---------------------------------------------------------------------------- #
# Parameters definition #
@@ -132,12 +131,14 @@
# Wrappers #
# ---------------------------------------------------------------------------- #
if conf.get('wrappers'):
- for key, parameters in conf['wrappers']:
+ for key, parameters in conf['wrappers'].items():
wrapper_class = eval(key)
- # parse str parameters to sinergym variables
- for name, value in parameters:
- if 'sinergym.' in name:
- parameters[name] = eval(value)
+ for name, value in parameters.items():
+ # parse str parameters to sinergym Callable or Objects if it is
+ # required
+ if isinstance(value, str):
+ if 'sinergym.' in value:
+ parameters[name] = eval(value)
env = wrapper_class(env=env, ** parameters)
if eval_env is not None:
eval_env = wrapper_class(env=eval_env, ** parameters)
@@ -162,7 +163,7 @@
# DDPG #
# --------------------------------------------------------#
elif algorithm_name == 'SB3-DDPG':
- model = DDPG(env,
+ model = DDPG(env=env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
** algorithm_parameters)
@@ -170,7 +171,7 @@
# A2C #
# --------------------------------------------------------#
elif algorithm_name == 'SB3-A2C':
- model = A2C(env,
+ model = A2C(env=env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
** algorithm_parameters)
@@ -178,7 +179,7 @@
# PPO #
# --------------------------------------------------------#
elif algorithm_name == 'SB3-PPO':
- model = PPO(env,
+ model = PPO(env=env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
** algorithm_parameters)
@@ -186,7 +187,7 @@
# SAC #
# --------------------------------------------------------#
elif algorithm_name == 'SB3-SAC':
- model = SAC(env,
+ model = SAC(env=env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
** algorithm_parameters)
@@ -194,7 +195,7 @@
# TD3 #
# --------------------------------------------------------#
elif algorithm_name == 'SB3-TD3':
- model = TD3(env,
+ model = TD3(env=env,
seed=conf.get('seed', None),
tensorboard_log=conf.get('tensorboard', None),
** algorithm_parameters)
From 97ee5a7a9b0c568f65ab7caf87cea5cc246158bc Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Tue, 14 Mar 2023 12:08:13 +0000
Subject: [PATCH 11/15] Fixed info keys in callbacks
---
sinergym/utils/callbacks.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/sinergym/utils/callbacks.py b/sinergym/utils/callbacks.py
index 038a3ee7fe..c65c8d139f 100644
--- a/sinergym/utils/callbacks.py
+++ b/sinergym/utils/callbacks.py
@@ -82,7 +82,7 @@ def _on_step(self) -> bool:
variables = self.training_env.get_attr('variables')[0]['action']
action = None
# sinergym action received inner its own setpoints range
- action_ = info['action_']
+ action_ = info['action']
try:
# network output clipped with gym action space
action = self.locals['clipped_actions'][-1]
@@ -115,10 +115,10 @@ def _on_step(self) -> bool:
except KeyError:
print('Algorithm reward key in locals dict unknown')
- self.ep_powers.append(info['total_power'])
- self.ep_term_comfort.append(info['comfort_penalty'])
- self.ep_term_energy.append(info['total_power_no_units'])
- if (info['comfort_penalty'] != 0):
+ self.ep_powers.append(info['total_energy'])
+ self.ep_term_comfort.append(info['reward_comfort'])
+ self.ep_term_energy.append(info['reward_energy'])
+ if (info['reward_comfort'] != 0):
self.num_comfort_violation += 1
self.ep_timesteps += 1
From 7fa48efc42aa6f67609be5764df5c0361727b3b6 Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Tue, 14 Mar 2023 12:09:33 +0000
Subject: [PATCH 12/15] Fixed info keys in evaluation.py
---
sinergym/utils/evaluation.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/sinergym/utils/evaluation.py b/sinergym/utils/evaluation.py
index 1330816837..99d93aeb2a 100644
--- a/sinergym/utils/evaluation.py
+++ b/sinergym/utils/evaluation.py
@@ -76,10 +76,10 @@ def evaluate_policy(model: "base_class.BaseAlgorithm",
obs, state=state, deterministic=deterministic)
obs, reward, done, info = env.step(action)
episode_reward += reward
- episode_power += info[0]['total_power']
- episode_power_penalty += info[0]['total_power_no_units']
- episode_comfort_penalty += info[0]['comfort_penalty']
- if info[0]['comfort_penalty'] != 0:
+ episode_power += info[0]['total_energy']
+ episode_power_penalty += info[0]['reward_energy']
+ episode_comfort_penalty += info[0]['reward_comfort']
+ if info[0]['reward_comfort'] != 0:
episode_steps_comfort_violation += 1
if callback is not None:
callback(locals(), globals())
From 61e4df76045fbae68045de94d46788600f89bd65 Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Tue, 14 Mar 2023 14:43:43 +0000
Subject: [PATCH 13/15] Updated tests for stable baselines 3
---
tests/test_stable_baselines.py | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/tests/test_stable_baselines.py b/tests/test_stable_baselines.py
index 7143f512c8..321e37ba39 100644
--- a/tests/test_stable_baselines.py
+++ b/tests/test_stable_baselines.py
@@ -56,13 +56,13 @@ def test_stable_PPO(env_name, request):
# Check model works
- obs = env.reset()
+ obs, info = env.reset()
a, _ = model.predict(obs)
- obs, reward, done, info = env.step(a)
+ obs, reward, terminated, truncated, info = env.step(a)
assert reward is not None and reward < 0
assert a is not None
- assert isinstance(done, bool)
+ assert isinstance(terminated, bool)
assert info['timestep'] == 1
env.close()
@@ -102,13 +102,13 @@ def test_stable_A2C(env_name, request):
# Check model works
- obs = env.reset()
+ obs, info = env.reset()
a, _ = model.predict(obs)
- obs, reward, done, info = env.step(a)
+ obs, reward, terminated, truncated, info = env.step(a)
assert reward is not None and reward < 0
assert a is not None
- assert isinstance(done, bool)
+ assert isinstance(terminated, bool)
assert info['timestep'] == 1
env.close()
@@ -172,13 +172,13 @@ def test_stable_DQN(env_name, request):
# Check model works
- obs = env.reset()
+ obs, info = env.reset()
a, _ = model.predict(obs)
- obs, reward, done, info = env.step(a)
+ obs, reward, terminated, truncated, info = env.step(a)
assert reward is not None and reward < 0
assert a is not None
- assert isinstance(done, bool)
+ assert isinstance(terminated, bool)
assert info['timestep'] == 1
env.close()
@@ -225,13 +225,13 @@ def test_stable_DDPG(env_name, request):
# Check model works
- obs = env.reset()
+ obs, info = env.reset()
a, _ = model.predict(obs)
- obs, reward, done, info = env.step(a)
+ obs, reward, terminated, truncated, info = env.step(a)
assert reward is not None and reward < 0
assert a is not None
- assert isinstance(done, bool)
+ assert isinstance(terminated, bool)
assert info['timestep'] == 1
env.close()
@@ -272,13 +272,13 @@ def test_stable_SAC(env_name, request):
# Check model works
- obs = env.reset()
+ obs, info = env.reset()
a, _ = model.predict(obs)
- obs, reward, done, info = env.step(a)
+ obs, reward, terminated, truncated, info = env.step(a)
assert reward is not None and reward < 0
assert a is not None
- assert isinstance(done, bool)
+ assert isinstance(terminated, bool)
assert info['timestep'] == 1
env.close()
From 6ec96f7486056c83c9554bad745295bdbb230cc3 Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Tue, 14 Mar 2023 15:14:11 +0000
Subject: [PATCH 14/15] Updated JSON structure and DRL_battery.py: reward and
reward parameters are not mandatory now, all environments parameters can be
overwritten optionally if it is desired
---
scripts/DRL_battery.py | 31 ++++++++++++++--------
scripts/DRL_battery_example.json | 45 ++++++++++++++------------------
2 files changed, 40 insertions(+), 36 deletions(-)
diff --git a/scripts/DRL_battery.py b/scripts/DRL_battery.py
index 5dd3cea61b..7ad67b4e58 100644
--- a/scripts/DRL_battery.py
+++ b/scripts/DRL_battery.py
@@ -76,7 +76,8 @@
mlflow.log_param('environment', conf['environment'])
mlflow.log_param('episodes', conf['episodes'])
mlflow.log_param('algorithm', conf['algorithm']['name'])
- mlflow.log_param('reward', conf['reward']['class'])
+ # Environment parameters overwriten
+ mlflow.log_params(conf.get('env_params'))
# Optional
mlflow.log_param('tensorboard', conf.get('tensorboard', False))
mlflow.log_param(
@@ -105,27 +106,35 @@
# algorithm params
mlflow.log_params(conf['algorithm'].get('parameters'))
- # reward params
- mlflow.log_params(conf['reward'].get('parameters'))
+
+ # --------------------- Overwrite environment parameters --------------------- #
+ env_params = {}
+ # Transform required str's into Callables
+ if conf.get('env_params'):
+ if conf['env_params'].get('reward'):
+ conf['env_params']['reward'] = eval(conf['env_params']['reward'])
+ if conf['env_params'].get('observation_space'):
+ conf['env_params']['observation_space'] = eval(
+ conf['env_params']['observation_space'])
+ if conf['env_params'].get('action_space'):
+ conf['env_params']['observation_space'] = eval(
+ conf['env_params']['action_space'])
+
+ env_params = conf['env_params']
# ---------------------------------------------------------------------------- #
- # Environment construction (with reward specified) #
+ # Environment construction #
# ---------------------------------------------------------------------------- #
- reward = eval(conf['reward']['class'])
- reward_kwargs = conf['reward']['parameters']
-
env = gym.make(
conf['environment'],
- reward=reward,
- reward_kwargs=reward_kwargs)
+ ** env_params)
# env for evaluation if is enabled
eval_env = None
if conf.get('evaluation'):
eval_env = gym.make(
conf['environment'],
- reward=reward,
- reward_kwargs=reward_kwargs)
+ ** env_params)
# ---------------------------------------------------------------------------- #
# Wrappers #
diff --git a/scripts/DRL_battery_example.json b/scripts/DRL_battery_example.json
index 1119752fd6..5bd4689aeb 100644
--- a/scripts/DRL_battery_example.json
+++ b/scripts/DRL_battery_example.json
@@ -1,6 +1,25 @@
{
"id": "PRUEBAALEX",
"environment": "Eplus-5Zone-hot-continuous-stochastic-v1",
+ "env_params": {
+ "reward": "LinearReward",
+ "reward_kwargs": {
+ "temperature_variable": [
+ "Zone Air Temperature(SPACE1-1)",
+ "Zone Air Temperature(SPACE1-2)"
+ ],
+ "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)",
+ "range_comfort_winter": [
+ 20.0,
+ 23.5
+ ],
+ "range_comfort_summer": [
+ 23.0,
+ 26.0
+ ]
+ },
+ "act_repeat": 1
+ },
"episodes": 5,
"algorithm": {
"name": "SB3-PPO",
@@ -22,24 +41,6 @@
},
"seed": 3,
"model": null,
- "reward": {
- "class": "LinearReward",
- "parameters": {
- "temperature_variable": [
- "Zone Air Temperature(SPACE1-1)",
- "Zone Air Temperature(SPACE1-2)"
- ],
- "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)",
- "range_comfort_winter": [
- 20.0,
- 23.5
- ],
- "range_comfort_summer": [
- 23.0,
- 26.0
- ]
- }
- },
"wrappers": {
"NormalizeObservation": {
"ranges": "sinergym.utils.constants.RANGES_5ZONE"
@@ -53,11 +54,5 @@
"eval_freq": 2,
"eval_length": 1
},
- "tensorboard": "./tensorboard_log/",
- "cloud": {
- "remote_store": "bucket_example",
- "mlflow_store": true,
- "group_name": "example_group",
- "auto_delete": true
- }
+ "tensorboard": "./tensorboard_log/"
}
\ No newline at end of file
From 43eff3553dd8a556d4f23ede6d68e5c3e6625771 Mon Sep 17 00:00:00 2001
From: AlejandroCN7
Date: Tue, 14 Mar 2023 15:24:18 +0000
Subject: [PATCH 15/15] Updated JSON example fields order and updated
documentation
---
.../pages/deep-reinforcement-learning.rst | 14 ++++---
scripts/DRL_battery_example.json | 40 +++++++++----------
2 files changed, 29 insertions(+), 25 deletions(-)
diff --git a/docs/source/pages/deep-reinforcement-learning.rst b/docs/source/pages/deep-reinforcement-learning.rst
index e285ff2dd4..d0a3b843d5 100644
--- a/docs/source/pages/deep-reinforcement-learning.rst
+++ b/docs/source/pages/deep-reinforcement-learning.rst
@@ -186,15 +186,19 @@ your own experiments are:
``DRL_battery.py`` has a unique parameter to be able to execute it; ``-conf``.
This parameter is a str to indicate the JSON file in which there are allocated
all information about the experiment you want to execute. You can see the
-JSON structure in `sinergym/scripts/DRL_battery_example.json `__:
+JSON structure example in `sinergym/scripts/DRL_battery_example.json `__:
-* The **obligatory** parameters are: environment, train episodes,
- algorithm (and parameters) and reward function (and parameters).
+* The **obligatory** parameters are: environment, episodes,
+ algorithm (and parameters of the algorithm which don't have
+ default values).
-* The **optional** parameters are: seed, model to load (before training),
+* The **optional** parameters are: All environment parameters (if it is specified
+ will be overwrite the default environment value) seed, model to load (before training),
experiment ID, wrappers to use (respecting the order), training evaluation,
tensorboard functionality and cloud options.
-
+
+* The name of the fields must be like in example mentioned. Otherwise, the experiment
+ will return an error.
****************
Mlflow
diff --git a/scripts/DRL_battery_example.json b/scripts/DRL_battery_example.json
index 5bd4689aeb..50dd24033b 100644
--- a/scripts/DRL_battery_example.json
+++ b/scripts/DRL_battery_example.json
@@ -1,25 +1,6 @@
{
- "id": "PRUEBAALEX",
+ "id": "ExperimentExample",
"environment": "Eplus-5Zone-hot-continuous-stochastic-v1",
- "env_params": {
- "reward": "LinearReward",
- "reward_kwargs": {
- "temperature_variable": [
- "Zone Air Temperature(SPACE1-1)",
- "Zone Air Temperature(SPACE1-2)"
- ],
- "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)",
- "range_comfort_winter": [
- 20.0,
- 23.5
- ],
- "range_comfort_summer": [
- 23.0,
- 26.0
- ]
- },
- "act_repeat": 1
- },
"episodes": 5,
"algorithm": {
"name": "SB3-PPO",
@@ -39,6 +20,25 @@
"max_grad_norm": 0.5
}
},
+ "env_params": {
+ "reward": "LinearReward",
+ "reward_kwargs": {
+ "temperature_variable": [
+ "Zone Air Temperature(SPACE1-1)",
+ "Zone Air Temperature(SPACE1-2)"
+ ],
+ "energy_variable": "Facility Total HVAC Electricity Demand Rate(Whole Building)",
+ "range_comfort_winter": [
+ 20.0,
+ 23.5
+ ],
+ "range_comfort_summer": [
+ 23.0,
+ 26.0
+ ]
+ },
+ "act_repeat": 1
+ },
"seed": 3,
"model": null,
"wrappers": {