From 4e424522ab45ae4b145226ae01782f813ddd0863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cmatrixbalto=E2=80=9D?= <“ilfangliu@gmail.com”> Date: Fri, 13 Sep 2024 03:17:17 -0400 Subject: [PATCH] eepy --- scripts/configs/RLBRIDGE_rwdshape_coffee.yaml | 35 +++++++++++++++++++ .../configs/RLBRIDGE_rwdshape_doorknobs.yaml | 35 +++++++++++++++++++ .../RLBRIDGE_rwdshape_gridrowdoor.yaml | 34 ++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 scripts/configs/RLBRIDGE_rwdshape_coffee.yaml create mode 100644 scripts/configs/RLBRIDGE_rwdshape_doorknobs.yaml create mode 100644 scripts/configs/RLBRIDGE_rwdshape_gridrowdoor.yaml diff --git a/scripts/configs/RLBRIDGE_rwdshape_coffee.yaml b/scripts/configs/RLBRIDGE_rwdshape_coffee.yaml new file mode 100644 index 000000000..cd9f5c8c3 --- /dev/null +++ b/scripts/configs/RLBRIDGE_rwdshape_coffee.yaml @@ -0,0 +1,35 @@ +# rl_bridge_approach experiments in gridrowdoor env. +--- +APPROACHES: + rl_rwd_shape: + NAME: "rl_bridge_policy" + FLAGS: + explorer: "maple_q" + bilevel_plan_without_sim: True + mlp_regressor_max_itr: 100000 + segmenter: "oracle" + demonstrator: "oracle" + sesame_max_skeletons_optimized: 50 + use_obj_centric: True + rl_rwd_shape: True + +ENVS: + RLBRIDGE_coffee: + NAME: "coffee" +ARGS: + - "debug" +FLAGS: + max_initial_demos: 0 + sampler_learner: "oracle" + strips_learner: "oracle" + num_online_learning_cycles: 100 + num_test_tasks: 10 + num_train_tasks: 1 + interactive_num_requests_per_cycle: 5 + online_nsrt_learning_requests_per_cycle: 5 + max_num_steps_interaction_request: 100 + timeout: 1000 + active_sampler_learning_num_samples: 100 + same_levels: True +START_SEED: 0 +NUM_SEEDS: 8 \ No newline at end of file diff --git a/scripts/configs/RLBRIDGE_rwdshape_doorknobs.yaml b/scripts/configs/RLBRIDGE_rwdshape_doorknobs.yaml new file mode 100644 index 000000000..8cdf74f27 --- /dev/null +++ b/scripts/configs/RLBRIDGE_rwdshape_doorknobs.yaml @@ -0,0 +1,35 @@ +# rl_bridge_approach experiments in gridrowdoor env. +--- +APPROACHES: + rl_rwd_shape: + NAME: "rl_bridge_policy" + FLAGS: + explorer: "maple_q" + bilevel_plan_without_sim: True + mlp_regressor_max_itr: 100000 + segmenter: "oracle" + demonstrator: "oracle" + sesame_max_skeletons_optimized: 50 + use_obj_centric: True + rl_rwd_shape: True + +ENVS: + RLBRIDGE_doorknobs: + NAME: "doorknobs" +ARGS: + - "debug" +FLAGS: + max_initial_demos: 0 + sampler_learner: "oracle" + strips_learner: "oracle" + num_online_learning_cycles: 100 + num_test_tasks: 10 + num_train_tasks: 1 + interactive_num_requests_per_cycle: 5 + online_nsrt_learning_requests_per_cycle: 5 + max_num_steps_interaction_request: 100 + timeout: 1000 + active_sampler_learning_num_samples: 10 + same_levels: True +START_SEED: 0 +NUM_SEEDS: 8 \ No newline at end of file diff --git a/scripts/configs/RLBRIDGE_rwdshape_gridrowdoor.yaml b/scripts/configs/RLBRIDGE_rwdshape_gridrowdoor.yaml new file mode 100644 index 000000000..d7cc4e6d1 --- /dev/null +++ b/scripts/configs/RLBRIDGE_rwdshape_gridrowdoor.yaml @@ -0,0 +1,34 @@ +# rl_bridge_approach experiments in gridrowdoor env. +--- +APPROACHES: + rl_rwd_shape: + NAME: "maple_q" + FLAGS: + explorer: "maple_q" + bilevel_plan_without_sim: True + mlp_regressor_max_itr: 100000 + segmenter: "oracle" + demonstrator: "oracle" + sesame_max_skeletons_optimized: 50 + use_obj_centric: False + rl_rwd_shape: True +ENVS: + RLBRIDGE_gridrowdoor: + NAME: "grid_row_door" +ARGS: + - "debug" +FLAGS: + max_initial_demos: 0 + sampler_learner: "oracle" + strips_learner: "oracle" + num_online_learning_cycles: 100 + num_test_tasks: 10 + num_train_tasks: 1 + interactive_num_requests_per_cycle: 5 + online_nsrt_learning_requests_per_cycle: 5 + max_num_steps_interaction_request: 100 + timeout: 1000 + active_sampler_learning_num_samples: 100 + same_levels: True +START_SEED: 0 +NUM_SEEDS: 8 \ No newline at end of file