-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RLlib]: Rename input_evaluation
to off_policy_estimation_methods
#25107
Changes from all commits
e3e499d
2dcf6aa
b65a7a3
347cde5
9b989ce
8f602f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,12 +15,8 @@ | |
from ray.rllib.evaluation.collectors.sample_collector import SampleCollector | ||
from ray.rllib.evaluation.collectors.simple_list_collector import SimpleListCollector | ||
from ray.rllib.models import MODEL_DEFAULTS | ||
from ray.rllib.offline.estimators.importance_sampling import ImportanceSampling | ||
from ray.rllib.offline.estimators.weighted_importance_sampling import ( | ||
WeightedImportanceSampling, | ||
) | ||
from ray.rllib.utils import deep_update, merge_dicts | ||
from ray.rllib.utils.deprecation import DEPRECATED_VALUE | ||
from ray.rllib.utils.deprecation import DEPRECATED_VALUE, deprecation_warning | ||
from ray.rllib.utils.typing import ( | ||
EnvConfigDict, | ||
EnvType, | ||
|
@@ -170,10 +166,7 @@ def __init__(self, trainer_class=None): | |
self.input_ = "sampler" | ||
self.input_config = {} | ||
self.actions_in_input_normalized = False | ||
self.input_evaluation = [ | ||
ImportanceSampling, | ||
WeightedImportanceSampling, | ||
] | ||
self.off_policy_estimation_methods = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason, why we want to change this default? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. It's not needed here as we do this on the eval worker track. Please ignore my comment above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I'm not sure-Like, we can keep it for backwards compatibility, but if the user doesn't explicitly ask for it, should it be enabled? We can discuss this further in the offline eval worker PR. |
||
self.postprocess_inputs = False | ||
self.shuffle_buffer_size = 0 | ||
self.output = None | ||
|
@@ -236,6 +229,7 @@ def __init__(self, trainer_class=None): | |
self.prioritized_replay_alpha = DEPRECATED_VALUE | ||
self.prioritized_replay_beta = DEPRECATED_VALUE | ||
self.prioritized_replay_eps = DEPRECATED_VALUE | ||
self.input_evaluation = DEPRECATED_VALUE | ||
|
||
def to_dict(self) -> TrainerConfigDict: | ||
"""Converts all settings into a legacy config dict for backward compatibility. | ||
|
@@ -862,6 +856,7 @@ def offline_data( | |
input_config=None, | ||
actions_in_input_normalized=None, | ||
input_evaluation=None, | ||
off_policy_estimation_methods=None, | ||
postprocess_inputs=None, | ||
shuffle_buffer_size=None, | ||
output=None, | ||
|
@@ -906,7 +901,8 @@ def offline_data( | |
are already normalized (between -1.0 and 1.0). This is usually the case | ||
when the offline file has been generated by another RLlib algorithm | ||
(e.g. PPO or SAC), while "normalize_actions" was set to True. | ||
input_evaluation: Specify how to evaluate the current policy. | ||
input_evaluation: DEPRECATED: Use `off_policy_estimation_methods` instead! | ||
off_policy_estimation_methods: Specify how to evaluate the current policy. | ||
This only has an effect when reading offline experiences | ||
("input" is not "sampler"). | ||
Available options: | ||
|
@@ -945,7 +941,16 @@ def offline_data( | |
if actions_in_input_normalized is not None: | ||
self.actions_in_input_normalized = actions_in_input_normalized | ||
if input_evaluation is not None: | ||
self.input_evaluation = input_evaluation | ||
deprecation_warning( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very nice! |
||
old="offline_data(input_evaluation={})".format(input_evaluation), | ||
new="offline_data(off_policy_estimation_methods={})".format( | ||
input_evaluation | ||
), | ||
error=True, | ||
) | ||
self.off_policy_estimation_methods = input_evaluation | ||
if off_policy_estimation_methods is not None: | ||
self.off_policy_estimation_methods = off_policy_estimation_methods | ||
if postprocess_inputs is not None: | ||
self.postprocess_inputs = postprocess_inputs | ||
if shuffle_buffer_size is not None: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,9 +134,6 @@ def on_train_result(self, *, trainer, result, **kwargs): | |
# Evaluate every other training iteration (together | ||
# with every other call to Trainer.train()). | ||
"evaluation_interval": args.evaluation_interval, | ||
"evaluation_config": { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason, you removed this here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commenting for future reference, but that line of code wasn't actually doing anything, since eval workers don't read from offline data. Will be fixed in another PR soon. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, makes sense. Sampler -> No evaluation. Cool, I think that also answers the question on the default value of |
||
"input_evaluation": ["is"], | ||
}, | ||
# Run for n episodes/timesteps (properly distribute load amongst | ||
# all eval workers). The longer it takes to evaluate, the more sense | ||
# it makes to use `evaluation_parallel_to_training=True`. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Awesome!