From 4e424522ab45ae4b145226ae01782f813ddd0863 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cmatrixbalto=E2=80=9D?= <“ilfangliu@gmail.com”>
Date: Fri, 13 Sep 2024 03:17:17 -0400
Subject: [PATCH] eepy

---
 scripts/configs/RLBRIDGE_rwdshape_coffee.yaml | 35 +++++++++++++++++++
 .../configs/RLBRIDGE_rwdshape_doorknobs.yaml  | 35 +++++++++++++++++++
 .../RLBRIDGE_rwdshape_gridrowdoor.yaml        | 34 ++++++++++++++++++
 3 files changed, 104 insertions(+)
 create mode 100644 scripts/configs/RLBRIDGE_rwdshape_coffee.yaml
 create mode 100644 scripts/configs/RLBRIDGE_rwdshape_doorknobs.yaml
 create mode 100644 scripts/configs/RLBRIDGE_rwdshape_gridrowdoor.yaml

diff --git a/scripts/configs/RLBRIDGE_rwdshape_coffee.yaml b/scripts/configs/RLBRIDGE_rwdshape_coffee.yaml
new file mode 100644
index 000000000..cd9f5c8c3
--- /dev/null
+++ b/scripts/configs/RLBRIDGE_rwdshape_coffee.yaml
@@ -0,0 +1,35 @@
+# rl_bridge_approach experiments in gridrowdoor env.
+---
+APPROACHES:
+  rl_rwd_shape:
+    NAME: "rl_bridge_policy"
+    FLAGS:
+      explorer: "maple_q"
+      bilevel_plan_without_sim: True
+      mlp_regressor_max_itr: 100000
+      segmenter: "oracle"
+      demonstrator: "oracle"
+      sesame_max_skeletons_optimized: 50
+      use_obj_centric: True
+      rl_rwd_shape: True
+
+ENVS:
+  RLBRIDGE_coffee:
+    NAME: "coffee"
+ARGS:
+  - "debug"
+FLAGS:
+  max_initial_demos: 0
+  sampler_learner: "oracle"
+  strips_learner: "oracle"
+  num_online_learning_cycles: 100
+  num_test_tasks: 10
+  num_train_tasks: 1
+  interactive_num_requests_per_cycle: 5
+  online_nsrt_learning_requests_per_cycle: 5
+  max_num_steps_interaction_request: 100
+  timeout: 1000
+  active_sampler_learning_num_samples: 100
+  same_levels: True
+START_SEED: 0
+NUM_SEEDS: 8
\ No newline at end of file
diff --git a/scripts/configs/RLBRIDGE_rwdshape_doorknobs.yaml b/scripts/configs/RLBRIDGE_rwdshape_doorknobs.yaml
new file mode 100644
index 000000000..8cdf74f27
--- /dev/null
+++ b/scripts/configs/RLBRIDGE_rwdshape_doorknobs.yaml
@@ -0,0 +1,35 @@
+# rl_bridge_approach experiments in gridrowdoor env.
+---
+APPROACHES:
+  rl_rwd_shape:
+    NAME: "rl_bridge_policy"
+    FLAGS:
+      explorer: "maple_q"
+      bilevel_plan_without_sim: True
+      mlp_regressor_max_itr: 100000
+      segmenter: "oracle"
+      demonstrator: "oracle"
+      sesame_max_skeletons_optimized: 50
+      use_obj_centric: True
+      rl_rwd_shape: True
+
+ENVS:
+  RLBRIDGE_doorknobs:
+    NAME: "doorknobs"
+ARGS:
+  - "debug"
+FLAGS:
+  max_initial_demos: 0
+  sampler_learner: "oracle"
+  strips_learner: "oracle"
+  num_online_learning_cycles: 100
+  num_test_tasks: 10
+  num_train_tasks: 1
+  interactive_num_requests_per_cycle: 5
+  online_nsrt_learning_requests_per_cycle: 5
+  max_num_steps_interaction_request: 100
+  timeout: 1000
+  active_sampler_learning_num_samples: 10
+  same_levels: True
+START_SEED: 0
+NUM_SEEDS: 8
\ No newline at end of file
diff --git a/scripts/configs/RLBRIDGE_rwdshape_gridrowdoor.yaml b/scripts/configs/RLBRIDGE_rwdshape_gridrowdoor.yaml
new file mode 100644
index 000000000..d7cc4e6d1
--- /dev/null
+++ b/scripts/configs/RLBRIDGE_rwdshape_gridrowdoor.yaml
@@ -0,0 +1,34 @@
+# rl_bridge_approach experiments in gridrowdoor env.
+---
+APPROACHES:
+  rl_rwd_shape:
+    NAME: "maple_q"
+    FLAGS:
+      explorer: "maple_q"
+      bilevel_plan_without_sim: True
+      mlp_regressor_max_itr: 100000
+      segmenter: "oracle"
+      demonstrator: "oracle"
+      sesame_max_skeletons_optimized: 50
+      use_obj_centric: False
+      rl_rwd_shape: True
+ENVS:
+  RLBRIDGE_gridrowdoor:
+    NAME: "grid_row_door"
+ARGS:
+  - "debug"
+FLAGS:
+  max_initial_demos: 0
+  sampler_learner: "oracle"
+  strips_learner: "oracle"
+  num_online_learning_cycles: 100
+  num_test_tasks: 10
+  num_train_tasks: 1
+  interactive_num_requests_per_cycle: 5
+  online_nsrt_learning_requests_per_cycle: 5
+  max_num_steps_interaction_request: 100
+  timeout: 1000
+  active_sampler_learning_num_samples: 100
+  same_levels: True
+START_SEED: 0
+NUM_SEEDS: 8
\ No newline at end of file