diff --git a/doc_generation/build/doctrees/Data.doctree b/doc_generation/build/doctrees/Data.doctree
index bd9d00f..0de623b 100644
Binary files a/doc_generation/build/doctrees/Data.doctree and b/doc_generation/build/doctrees/Data.doctree differ
diff --git a/doc_generation/build/doctrees/Model.doctree b/doc_generation/build/doctrees/Model.doctree
index 6f7d89e..3f13b80 100644
Binary files a/doc_generation/build/doctrees/Model.doctree and b/doc_generation/build/doctrees/Model.doctree differ
diff --git a/doc_generation/build/doctrees/running.doctree b/doc_generation/build/doctrees/running.doctree
index 41511d1..a2f0742 100644
Binary files a/doc_generation/build/doctrees/running.doctree and b/doc_generation/build/doctrees/running.doctree differ
diff --git a/doc_generation/build/doctrees/usage.doctree b/doc_generation/build/doctrees/usage.doctree
index 5481b38..414311c 100644
Binary files a/doc_generation/build/doctrees/usage.doctree and b/doc_generation/build/doctrees/usage.doctree differ
diff --git a/doc_generation/source/running.rst b/doc_generation/source/running.rst
index 5a95217..9d7b378 100644
--- a/doc_generation/source/running.rst
+++ b/doc_generation/source/running.rst
@@ -52,7 +52,7 @@ will automatically download the relevant files and then compile them into the de
   - Organize downloaded data into an unified format, and the resulting historical text dataset is saved as a series of files ``./dataset/dataset_textseq/[century_num]/[year_num].json``
   - Train a series of historical language models  ``./dataset/dataset_modelseq/[century_num]`` using the historical text dataset.
 
-**Running The Benchmarks**
+**Running The Benchmarks on Command Line**
 
 Running 
 
@@ -88,7 +88,10 @@ See comments at the beginning of ``run_benchmark.py`` for a detailed usage of th
         [-h | --help]
         [...] #additional arguments will be supplied to the algorithms and the challenges when they are instantiated; only string values are supported
  
-   
+**Running The Benchmarks with Python API**
 
+You can also run the benchmark with the Python API using the :code:`run_benchmark` function.
 
+.. autofunction:: run_benchmark.run_benchmark
 
+It's worth noting that the Python API is more flexible than the command line interface, as you can implement your own Judge and Examinee classes (inheriting from :code:`JudgeBase` and :code:`ExamineeBase` respectively) and pass them to the :code:`run_benchmark` function.
\ No newline at end of file
diff --git a/doc_generation/source/usage.rst b/doc_generation/source/usage.rst
index 0d443a3..7bdafc3 100644
--- a/doc_generation/source/usage.rst
+++ b/doc_generation/source/usage.rst
@@ -1,11 +1,18 @@
 Quickstart: Implementing your own experiment
 ============================================
 
+General Instructions
+--------------------
+
 To run the benchmark for your own alignment algorithm and assess its temporal alignment
-abilities, you have to implement your own algorithm as a subclass of :class:`benchmark.framework.ExamineeBase`. Implement it
-in a new ``.py`` file in ``algorithm``, then refer to :doc:`running` for guidance on how to run the benchmark on the examinee you implemented. 
+abilities, you have to implement your own algorithm as a subclass of :class:`benchmark.framework.ExamineeBase`. 
+
+You could either implement it in a new ``.py`` file in ``algorithm`` and use the command-line interface for running it, or by directly implementing a subclass of :class:`benchmark.framework.ExamineeBase` in your own script and use the Python API for running it. Refer to :doc:`running` for guidance on how to run the benchmark on the examinee you implemented. 
+
+Implementation Instructions
+---------------------------
 
-Feeling daunted? Dom't worry. Read through the following descriptions of :class:`benchmark.framework.ExamineeBase`, and you'll understand the overall workings of the examinee class in our framework.
+Feeling daunted? Don't worry. Read through the following descriptions of :class:`benchmark.framework.ExamineeBase`, and you'll understand the overall workings of the examinee class in our framework.
 
 .. autoclass:: benchmark.framework.ExamineeBase
     :members:
diff --git a/docs/Data.html b/docs/Data.html
index be678eb..d65c73a 100644
--- a/docs/Data.html
+++ b/docs/Data.html
@@ -274,12 +274,13 @@ <h1>Data<a class="headerlink" href="#data" title="Permalink to this heading">¶<
 <dd class="field-even"><p><strong>FileNotFoundError</strong> – If file is not found in default search path and path is not specified.</p>
 </dd>
 </dl>
-<p>Examples:
-.. code-block:: python</p>
-<blockquote>
-<div><p>Data(‘c4_demo’, data_type = ‘sft’, data_path = ‘./libs/llama_factory/data/c4_demo.json’)
-Data(‘c4_demo’, data_type = ‘sft’)</p>
-</div></blockquote>
+<dl>
+<dt>Examples:</dt><dd><div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">Data</span><span class="p">(</span><span class="s1">&#39;c4_demo&#39;</span><span class="p">,</span> <span class="n">data_type</span> <span class="o">=</span> <span class="s1">&#39;sft&#39;</span><span class="p">,</span> <span class="n">data_path</span> <span class="o">=</span> <span class="s1">&#39;./libs/llama_factory/data/c4_demo.json&#39;</span><span class="p">)</span>
+<span class="n">Data</span><span class="p">(</span><span class="s1">&#39;c4_demo&#39;</span><span class="p">,</span> <span class="n">data_type</span> <span class="o">=</span> <span class="s1">&#39;sft&#39;</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
 </dd></dl>
 
 <dl class="py method">
@@ -340,12 +341,13 @@ <h1>Data<a class="headerlink" href="#data" title="Permalink to this heading">¶<
 </ul>
 </dd>
 </dl>
-<p>Example:
-.. code-block:: python</p>
-<blockquote>
-<div><p>data.set_key_fields(prompt_field_name=’content’) # for pretraining dataset stored in content field
-data.set_key_fields(prompt_field_name=’instruction’, query_field_name=’input’, response_field_name=’output’) # for QA dataset with system prompt</p>
-</div></blockquote>
+<dl>
+<dt>Example:</dt><dd><div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">data</span><span class="o">.</span><span class="n">set_key_fields</span><span class="p">(</span><span class="n">prompt_field_name</span><span class="o">=</span><span class="s1">&#39;content&#39;</span><span class="p">)</span> <span class="c1"># for pretraining dataset stored in content field</span>
+<span class="n">data</span><span class="o">.</span><span class="n">set_key_fields</span><span class="p">(</span><span class="n">prompt_field_name</span><span class="o">=</span><span class="s1">&#39;instruction&#39;</span><span class="p">,</span> <span class="n">query_field_name</span><span class="o">=</span><span class="s1">&#39;input&#39;</span><span class="p">,</span> <span class="n">response_field_name</span><span class="o">=</span><span class="s1">&#39;output&#39;</span><span class="p">)</span> <span class="c1"># for QA dataset with system prompt</span>
+</pre></div>
+</div>
+</dd>
+</dl>
 </dd></dl>
 
 <dl class="py method">
@@ -397,16 +399,15 @@ <h1>Data<a class="headerlink" href="#data" title="Permalink to this heading">¶<
 <p>If collection_path is omitted, we will search for collection_name in directories specified in abstractions_config.json.
 When file_selection_func is supplied, files will be captured real-time, instead of only when initializing. Only json files will be captured.
 You may want to exclude undated.json using file_selection_func. That file is huge.</p>
-<p>Example:
-.. code-block:: python</p>
-<blockquote>
-<div><dl class="simple">
-<dt>DataFileCollection(collection_name=’histtext_1826_to_2018’,</dt><dd><p>data_type=’pretrain’,
-collection_path = ‘./dataset/dataset_text_sequence/’,
-file_selection_func = (lambda path: 1826 &lt;= int(path.split(‘/’)[-1][1:6]) &lt;= 2018))</p>
+<dl>
+<dt>Example:</dt><dd><div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">DataFileCollection</span><span class="p">(</span><span class="n">collection_name</span><span class="o">=</span><span class="s1">&#39;histtext_1826_to_2018&#39;</span><span class="p">,</span>
+                <span class="n">data_type</span><span class="o">=</span><span class="s1">&#39;pretrain&#39;</span><span class="p">,</span>
+                <span class="n">collection_path</span> <span class="o">=</span> <span class="s1">&#39;./dataset/dataset_text_sequence/&#39;</span><span class="p">,</span>
+                <span class="n">file_selection_func</span> <span class="o">=</span> <span class="p">(</span><span class="k">lambda</span> <span class="n">path</span><span class="p">:</span> <span class="mi">1826</span> <span class="o">&lt;=</span> <span class="nb">int</span><span class="p">(</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">:</span><span class="mi">6</span><span class="p">])</span> <span class="o">&lt;=</span> <span class="mi">2018</span><span class="p">))</span>
+</pre></div>
+</div>
 </dd>
 </dl>
-</div></blockquote>
 </dd></dl>
 
 <dl class="py method">
diff --git a/docs/Model.html b/docs/Model.html
index aa9d5d6..6b108bb 100644
--- a/docs/Model.html
+++ b/docs/Model.html
@@ -271,12 +271,13 @@ <h1>Model<a class="headerlink" href="#model" title="Permalink to this heading">
 </ul>
 </dd>
 </dl>
-<p>Examples:
-.. code-block:: python</p>
-<blockquote>
-<div><p>Model(model_name = ‘Gemma-2B_sft’, is_instruct_finetuned = True, model_path = ‘./output/training_results/Gemma-2B_sft/’)
-Model(model_name = ‘Gemma-2B_sft’, is_instruct_finetuned = True)</p>
-</div></blockquote>
+<dl>
+<dt>Examples:</dt><dd><div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">Model</span><span class="p">(</span><span class="n">model_name</span> <span class="o">=</span> <span class="s1">&#39;Gemma-2B_sft&#39;</span><span class="p">,</span> <span class="n">is_instruct_finetuned</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">model_path</span> <span class="o">=</span> <span class="s1">&#39;./output/training_results/Gemma-2B_sft/&#39;</span><span class="p">)</span>
+<span class="n">Model</span><span class="p">(</span><span class="n">model_name</span> <span class="o">=</span> <span class="s1">&#39;Gemma-2B_sft&#39;</span><span class="p">,</span> <span class="n">is_instruct_finetuned</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
 </dd></dl>
 
 <dl class="py method">
diff --git a/docs/_sources/running.rst.txt b/docs/_sources/running.rst.txt
index 5a95217..9d7b378 100644
--- a/docs/_sources/running.rst.txt
+++ b/docs/_sources/running.rst.txt
@@ -52,7 +52,7 @@ will automatically download the relevant files and then compile them into the de
   - Organize downloaded data into an unified format, and the resulting historical text dataset is saved as a series of files ``./dataset/dataset_textseq/[century_num]/[year_num].json``
   - Train a series of historical language models  ``./dataset/dataset_modelseq/[century_num]`` using the historical text dataset.
 
-**Running The Benchmarks**
+**Running The Benchmarks on Command Line**
 
 Running 
 
@@ -88,7 +88,10 @@ See comments at the beginning of ``run_benchmark.py`` for a detailed usage of th
         [-h | --help]
         [...] #additional arguments will be supplied to the algorithms and the challenges when they are instantiated; only string values are supported
  
-   
+**Running The Benchmarks with Python API**
 
+You can also run the benchmark with the Python API using the :code:`run_benchmark` function.
 
+.. autofunction:: run_benchmark.run_benchmark
 
+It's worth noting that the Python API is more flexible than the command line interface, as you can implement your own Judge and Examinee classes (inheriting from :code:`JudgeBase` and :code:`ExamineeBase` respectively) and pass them to the :code:`run_benchmark` function.
\ No newline at end of file
diff --git a/docs/_sources/usage.rst.txt b/docs/_sources/usage.rst.txt
index 0d443a3..7bdafc3 100644
--- a/docs/_sources/usage.rst.txt
+++ b/docs/_sources/usage.rst.txt
@@ -1,11 +1,18 @@
 Quickstart: Implementing your own experiment
 ============================================
 
+General Instructions
+--------------------
+
 To run the benchmark for your own alignment algorithm and assess its temporal alignment
-abilities, you have to implement your own algorithm as a subclass of :class:`benchmark.framework.ExamineeBase`. Implement it
-in a new ``.py`` file in ``algorithm``, then refer to :doc:`running` for guidance on how to run the benchmark on the examinee you implemented. 
+abilities, you have to implement your own algorithm as a subclass of :class:`benchmark.framework.ExamineeBase`. 
+
+You could either implement it in a new ``.py`` file in ``algorithm`` and use the command-line interface for running it, or by directly implementing a subclass of :class:`benchmark.framework.ExamineeBase` in your own script and use the Python API for running it. Refer to :doc:`running` for guidance on how to run the benchmark on the examinee you implemented. 
+
+Implementation Instructions
+---------------------------
 
-Feeling daunted? Dom't worry. Read through the following descriptions of :class:`benchmark.framework.ExamineeBase`, and you'll understand the overall workings of the examinee class in our framework.
+Feeling daunted? Don't worry. Read through the following descriptions of :class:`benchmark.framework.ExamineeBase`, and you'll understand the overall workings of the examinee class in our framework.
 
 .. autoclass:: benchmark.framework.ExamineeBase
     :members:
diff --git a/docs/genindex.html b/docs/genindex.html
index 3ba3f7b..f819c2d 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -532,6 +532,8 @@ <h2>R</h2>
           <li><a href="Examinee.html#benchmark.framework.ExamineeBase.run">(benchmark.framework.ExamineeBase method)</a>
 </li>
         </ul></li>
+        <li><a href="running.html#run_benchmark.run_benchmark">run_benchmark() (in module run_benchmark)</a>
+</li>
     </ul></td>
   </tr></table>
 </section>
diff --git a/docs/index.html b/docs/index.html
index b71c8c6..2ab0f8b 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -267,7 +267,11 @@ <h2>Contents<a class="headerlink" href="#contents" title="Permalink to this head
 <li class="toctree-l2"><a class="reference internal" href="running.html#basic-usage">Basic Usage</a></li>
 </ul>
 </li>
-<li class="toctree-l1"><a class="reference internal" href="usage.html">Quickstart: Implementing your own experiment</a></li>
+<li class="toctree-l1"><a class="reference internal" href="usage.html">Quickstart: Implementing your own experiment</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="usage.html#general-instructions">General Instructions</a></li>
+<li class="toctree-l2"><a class="reference internal" href="usage.html#implementation-instructions">Implementation Instructions</a></li>
+</ul>
+</li>
 <li class="toctree-l1"><a class="reference internal" href="documentation.html">API reference</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="documentation.html#contents">Contents</a></li>
 </ul>
diff --git a/docs/objects.inv b/docs/objects.inv
index 7722217..d450cb8 100644
Binary files a/docs/objects.inv and b/docs/objects.inv differ
diff --git a/docs/running.html b/docs/running.html
index 0771d49..1d09f54 100644
--- a/docs/running.html
+++ b/docs/running.html
@@ -295,7 +295,7 @@ <h2>Basic Usage<a class="headerlink" href="#basic-usage" title="Permalink to thi
 </ul>
 </dd>
 </dl>
-<p><strong>Running The Benchmarks</strong></p>
+<p><strong>Running The Benchmarks on Command Line</strong></p>
 <p>Running</p>
 <div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>python<span class="w"> </span>run_benchmark.py
 <span class="go">     --algorithms=ALGO1[,ALGO2[,ALGO3[,...]]]</span>
@@ -323,6 +323,37 @@ <h2>Basic Usage<a class="headerlink" href="#basic-usage" title="Permalink to thi
 <span class="go">     [...] #additional arguments will be supplied to the algorithms and the challenges when they are instantiated; only string values are supported</span>
 </pre></div>
 </div>
+<p><strong>Running The Benchmarks with Python API</strong></p>
+<p>You can also run the benchmark with the Python API using the <code class="code docutils literal notranslate"><span class="pre">run_benchmark</span></code> function.</p>
+<dl class="py function">
+<dt class="sig sig-object py" id="run_benchmark.run_benchmark">
+<span class="sig-prename descclassname"><span class="pre">run_benchmark.</span></span><span class="sig-name descname"><span class="pre">run_benchmark</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ExamineeClass</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Type</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="Examinee.html#benchmark.framework.ExamineeBase" title="benchmark.framework.ExamineeBase"><span class="pre">ExamineeBase</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">JudgeClass</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Type</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="Judge.html#benchmark.framework.JudgeBase" title="benchmark.framework.JudgeBase"><span class="pre">JudgeBase</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#run_benchmark.run_benchmark" title="Permalink to this definition">¶</a></dt>
+<dd><p>Run a single benchmarking test on a single examinee and a single judge, and return the results.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>ExamineeClass</strong> – Necessary, examinee class object representing the algorithm to be evaluated. Can be any subclass of ExamineeBase, including user-implemented ones. Note that this is the class object, not an instance of the class.</p></li>
+<li><p><strong>judge</strong> (<em>Type</em><em>[</em><a class="reference internal" href="Judge.html#benchmark.framework.JudgeBase" title="benchmark.framework.JudgeBase"><em>JudgeBase</em></a><em>]</em>) – Necessary, judge class object representing the challenge to be evaluated. Can be any subclass of JudgeBase, including user-implemented ones. Note that this is the class object, not an instance of the class.</p></li>
+<li><p><strong>kwargs</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>str</em><em>]</em>) – Optional, additional arguments to be passed to the examinee and the judge. Pass the same str-typed arguments as you would in the command line.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>A dictionary containing the results of the benchmarking test. The dictionary is in the exact same format as the results of command-line benchmarking.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>Dict[str, Any]</p>
+</dd>
+</dl>
+<dl>
+<dt>Example:</dt><dd><div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">progressgym</span> <span class="kn">import</span> <span class="n">run_benchmark</span><span class="p">,</span> <span class="n">CoevolveJudge</span><span class="p">,</span> <span class="n">LifelongDPOExaminee</span> <span class="c1"># if using PyPI package</span>
+<span class="n">results</span> <span class="o">=</span> <span class="n">run_benchmark</span><span class="p">(</span><span class="n">LifelongDPOExaminee</span><span class="p">,</span> <span class="n">CoevolveJudge</span><span class="p">)</span>
+</pre></div>
+</div>
+</dd>
+</dl>
+</dd></dl>
+
+<p>It’s worth noting that the Python API is more flexible than the command line interface, as you can implement your own Judge and Examinee classes (inheriting from <code class="code docutils literal notranslate"><span class="pre">JudgeBase</span></code> and <code class="code docutils literal notranslate"><span class="pre">ExamineeBase</span></code> respectively) and pass them to the <code class="code docutils literal notranslate"><span class="pre">run_benchmark</span></code> function.</p>
 </section>
 </section>
 
@@ -384,7 +415,10 @@ <h2>Basic Usage<a class="headerlink" href="#basic-usage" title="Permalink to thi
 <li><a class="reference internal" href="#">Quickstart: Running the benchmark</a><ul>
 <li><a class="reference internal" href="#requirements">Requirements</a></li>
 <li><a class="reference internal" href="#installation">Installation</a></li>
-<li><a class="reference internal" href="#basic-usage">Basic Usage</a></li>
+<li><a class="reference internal" href="#basic-usage">Basic Usage</a><ul>
+<li><a class="reference internal" href="#run_benchmark.run_benchmark"><code class="docutils literal notranslate"><span class="pre">run_benchmark()</span></code></a></li>
+</ul>
+</li>
 </ul>
 </li>
 </ul>
diff --git a/docs/searchindex.js b/docs/searchindex.js
index 027e5b2..c3b84e0 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["Data", "Examinee", "Judge", "Model", "documentation", "index", "running", "usage"], "filenames": ["Data.rst", "Examinee.rst", "Judge.rst", "Model.rst", "documentation.rst", "index.rst", "running.rst", "usage.rst"], "titles": ["Data", "Examinee", "Judge", "Model", "API reference", "ProgressGym: The Progress Alignment Framework", "Quickstart: Running the benchmark", "Quickstart: Implementing your own experiment"], "terms": {"thi": [0, 1, 2, 3, 4, 5, 6, 7], "i": [0, 1, 2, 3, 4, 6, 7], "class": [0, 1, 2, 3, 7], "us": [0, 1, 2, 3, 6, 7], "repres": [0, 2, 3, 7], "ani": [0, 1, 2, 3, 6, 7], "bodi": 0, "With": 0, "sever": 0, "memori": [0, 3], "effici": [0, 3], "method": [0, 1, 2, 3, 7], "built": [0, 6], "design": [0, 3], "manipul": [0, 2, 3, 7], "load": [0, 2, 3], "src": [0, 3], "abstract": [0, 1, 2, 3, 4, 7], "data_nam": 0, "str": [0, 1, 2, 3, 7], "data_typ": 0, "liter": [0, 3], "pretrain": [0, 3], "sft": [0, 3], "prefer": [0, 1, 2, 3, 7], "data_path": 0, "none": [0, 1, 2, 3, 7], "data_cont": 0, "list": [0, 1, 2, 3, 7], "dict": [0, 1, 2, 3, 7], "kwarg": [0, 1, 2, 7], "The": [0, 1, 2, 3, 6, 7], "store": [0, 1, 7], "": [0, 1, 2, 6, 7], "path": [0, 3], "format": [0, 3, 6], "relev": [0, 6], "field": [0, 2, 3, 7], "etc": 0, "allow": [0, 1, 7], "larg": [0, 5], "__init__": [0, 1, 2, 3, 7], "initi": [0, 1, 2, 3, 6, 7], "paramet": [0, 3], "necessari": [0, 2, 7], "name": [0, 2, 3, 4, 7], "option": [0, 2, 3, 7], "type": [0, 3], "usag": [0, 5], "e": [0, 2, 6, 7], "which": [0, 2, 3, 7], "stage": [0, 3], "train": [0, 6], "search": [0, 3], "when": [0, 1, 2, 3, 6, 7], "omit": [0, 3, 6], "make": [0, 6], "sure": [0, 1, 6, 7], "exist": 0, "lib": 0, "llama_factori": 0, "other": [0, 3], "see": [0, 6], "abstractions_config": [0, 3], "json": [0, 3, 6], "recogn": 0, "llama": [0, 6], "factori": [0, 6], "content": 0, "provid": [0, 1, 5, 7], "given": [0, 2, 7], "written": 0, "creat": [0, 6], "new": [0, 2, 6, 7], "dataset": [0, 3, 6], "unless": [0, 1, 6, 7], "case": [0, 1, 2, 3, 7], "save": [0, 3, 6], "output": [0, 3, 6], "rais": 0, "filenotfounderror": 0, "If": 0, "file": [0, 6, 7], "found": 0, "default": [0, 3, 6], "specifi": [0, 1, 3, 7], "exampl": [0, 1, 3, 5, 7], "code": [0, 2, 3], "block": [0, 3], "python": [0, 3, 6, 7], "c4_demo": 0, "all_passag": 0, "iter": [0, 1, 2, 7], "hashabl": 0, "return": [0, 1, 2, 3, 7], "an": [0, 1, 2, 3, 5, 6, 7], "all": [0, 1, 2, 3, 4, 6, 7], "passag": 0, "copi": [0, 1, 3, 7], "shallow": [0, 3], "current": [0, 1, 2, 3, 6, 7], "instanc": [0, 3], "manage_llama_factory_registr": 0, "oper": [0, 1, 2, 6, 7], "add": [0, 2, 7], "remov": [0, 3], "queri": [0, 1, 2, 7], "forced_upd": 0, "bool": [0, 3], "true": [0, 3], "registr": 0, "statu": 0, "No": 0, "chang": [0, 2], "ar": [0, 1, 2, 3, 6, 7], "made": 0, "ad": 0, "alreadi": [0, 1, 6], "non": 0, "perform": [0, 2, 3, 6, 7], "It": [0, 1, 2, 3, 7], "can": [0, 1, 3, 5, 7], "whether": [0, 2, 3], "forcefulli": 0, "updat": [0, 1, 2, 3, 7], "A": [0, 2, 7], "boolean": 0, "mean": [0, 2, 7], "befor": [0, 2, 7], "save_permanent_and_regist": 0, "saved_nam": [0, 3], "forced_rewrit": [0, 3], "fals": [0, 3], "data_save_path": 0, "from": [0, 1, 2, 3, 5, 6, 7], "without": [0, 2, 3], "save_perman": [0, 3], "still": [0, 3], "present": [0, 3], "directli": [0, 2, 3, 7], "next": [0, 2, 3, 7], "time": [0, 1, 3], "full": [0, 3], "do": [0, 1, 6, 7], "includ": [0, 1, 3, 5, 7], "suffix": [0, 3], "argument": [0, 3, 6], "set_key_field": 0, "prompt_field_nam": 0, "query_field_nam": 0, "response_field_nam": 0, "system_field_nam": 0, "suppress_registration_upd": 0, "In": [0, 1, 2, 3, 7], "place": [0, 3, 6], "pass": [0, 1, 2, 7], "empti": 0, "string": [0, 3, 6], "order": [0, 2, 3, 7], "eras": 0, "Will": 0, "automat": [0, 6], "regist": 0, "prompt": [0, 1, 2, 7], "respons": [0, 1, 2, 7], "system": [0, 2, 5], "suppress": 0, "instruct": [0, 3], "input": [0, 3], "qa": 0, "transform": 0, "callabl": [0, 3], "result_data_nam": [0, 3], "max_batch_s": 0, "int": [0, 3], "1": [0, 1, 2, 3, 7], "keep_key_field": 0, "appli": 0, "everi": [0, 1, 2, 7], "element": 0, "where": [0, 2, 3, 6, 7], "valu": [0, 1, 2, 3, 5, 6, 7], "mutabl": 0, "immut": 0, "contain": [0, 2, 3, 4], "result": [0, 2, 3, 6, 7], "out": [0, 3, 5], "doe": [0, 2, 7], "modifi": 0, "self": [0, 2, 3, 7], "function": 0, "like": [0, 1, 3, 7], "huge": 0, "union": [0, 3], "upon": [0, 1, 6, 7], "rewrit": 0, "must": [0, 2, 7], "take": [0, 2], "key_field": 0, "onli": [0, 1, 2, 3, 6, 7], "so": [0, 1, 5, 7], "doesn": [0, 3], "t": [0, 3, 7], "renam": 0, "kei": [0, 2, 3, 4, 7], "after": [0, 1, 7], "datafilecollect": 0, "collection_nam": 0, "collection_path": 0, "file_selection_func": 0, "we": [0, 1, 2, 5, 6, 7], "directori": 0, "suppli": [0, 2, 3, 6, 7], "captur": 0, "real": [0, 5], "instead": [0, 1, 2, 7], "you": [0, 1, 2, 5, 6, 7], "mai": [0, 2, 7], "want": [0, 6], "exclud": 0, "undat": 0, "That": 0, "histtext_1826_to_2018": 0, "dataset_text_sequ": 0, "lambda": 0, "1826": 0, "split": 0, "6": 0, "2018": 0, "all_fil": 0, "collect": 0, "had": 0, "been": [0, 6], "always_force_rewrit": 0, "multi": 0, "being": 0, "need": [0, 1, 2, 6, 7], "convert": 0, "similar": 0, "those": [0, 1, 2, 7], "avail": [0, 6], "nevertheless": 0, "convert_to_data": 0, "filter_field": 0, "merg": 0, "one": [0, 2, 6], "usabl": 0, "should": [0, 1, 2, 3, 7], "filter": 0, "normal": [0, 1, 2, 6, 7], "seen": 0, "autodiscov": 0, "futur": 0, "result_collection_nam": 0, "suppress_tqdm": 0, "maximum": 0, "batch": [0, 3], "size": [0, 2, 3], "tqdm": 0, "progress": [0, 1, 7], "bar": 0, "progressgym": [1, 2, 4, 6, 7], "aim": 1, "evalu": [1, 2, 3, 5, 7], "how": [1, 7], "well": [1, 2, 5, 7], "algorithm": [1, 2, 3, 5, 6, 7], "align": [1, 6, 7], "model": [1, 2, 4, 5, 6, 7], "accord": [1, 2, 7], "human": [1, 2, 5, 7], "instanti": [1, 2, 6], "here": [1, 2, 7], "For": [1, 6], "each": [1, 2, 3, 6, 7], "have": [1, 6, 7], "implement": [1, 2, 3, 5], "sub": 1, "base": [1, 2, 3, 7], "extra": 1, "inform": [1, 2, 7], "most": [1, 7], "importantli": 1, "interact": 1, "judg": [1, 4, 5, 7], "step": 1, "pleas": [1, 6], "refer": [1, 2, 5, 7], "document": [1, 2, 4, 7], "page": [1, 2, 4], "re": [1, 7], "unfamiliar": 1, "what": 1, "baselin": [1, 7], "ve": [1, 2], "give": [1, 2], "better": [1, 2, 7], "understand": [1, 7], "work": [1, 7], "benchmark": [1, 2, 5, 7], "framework": [1, 2, 7], "examineebas": [1, 2, 7], "combin": [1, 7], "languag": [1, 2, 5, 6, 7], "free": [1, 3, 7], "wai": [1, 7], "long": [1, 7], "follow": [1, 2, 5, 6, 7], "interfac": [1, 7], "your": [1, 2, 3, 5, 6], "subclass": [1, 2, 7], "get_current_model": [1, 7], "timestep": [1, 2, 7], "current_model": [1, 2, 7], "attribut": [1, 7], "overrid": [1, 2, 7], "query_from_judg": [1, 7], "data": [1, 2, 3, 4, 6, 7], "call": [1, 2, 6, 7], "reset": [1, 2, 7], "endow": [1, 7], "examinee_model_s": [1, 6, 7], "abl": [1, 7], "start": [1, 2, 7], "same": [1, 3, 7], "state": [1, 2, 7], "basic": [1, 5, 7], "setup": [1, 7], "run": [1, 2, 3, 5, 7], "judgebas": [1, 2, 7], "over": [1, 5, 7], "correspond": [1, 2, 7], "control": [1, 7], "paus": [1, 7], "resum": [1, 2, 7], "At": [1, 7], "learn": [1, 2, 3, 7], "about": [1, 2, 7], "latest": [1, 2, 7], "query_from_examine": [1, 2, 7], "ha": [1, 6, 7], "its": [1, 2, 7], "yield": [1, 7], "back": [1, 7], "complet": [1, 3, 6, 7], "begin": [1, 6, 7], "extrapolative_dpo": 1, "extrapolativedpoexamine": 1, "extrapolative_rlhf": 1, "extrapolativerlhfexamine": 1, "lifelong_rlhf": 1, "lifelongrlhfexamine": 1, "lifelong_dpo": 1, "lifelongdpoexamine": 1, "examine": [2, 4, 5, 6, 7], "through": [2, 7], "task": [2, 5, 7], "handl": [2, 6], "proxi": 2, "three": [2, 5, 7], "challeng": [2, 5, 6, 7], "eval_snapshot": [2, 7], "snapshot": [2, 7], "log": [2, 3, 7], "recommend": [2, 3, 7], "own": [2, 5], "eval": [2, 7], "interpret_result": [2, 7], "float": [2, 3, 7], "dictionari": [2, 3, 6, 7], "calcul": [2, 7], "singl": [2, 7], "score": [2, 6, 7], "overal": [2, 7], "higher": [2, 7], "leaderboard": [2, 7], "rank": [2, 7], "produce_final_result": [2, 7], "final": [2, 7], "end": [2, 3, 7], "test": [2, 3, 6, 7], "get": [2, 7], "metric": [2, 7], "prevent": [2, 7], "manual": [2, 7], "fill": [2, 7], "while": [2, 7], "preserv": [2, 3, 7], "purpos": [2, 7], "answer": [2, 7], "accordingli": [2, 7], "could": [2, 6, 7], "either": [2, 3, 7], "possibli": [2, 7], "differ": [2, 7], "intern": [2, 6, 7], "addit": [2, 3, 6, 7], "logic": [2, 7], "user": [2, 5, 7], "move": [2, 7], "seri": [2, 6, 7], "examinee_it": [2, 7], "tick": [2, 7], "increment": [2, 7], "current_timestep": [2, 7], "more": [2, 7], "coevolv": [2, 5], "coevolvejudg": 2, "compar": 2, "actual": 2, "simul": 2, "histori": 2, "account": 2, "bidirect": 2, "influenc": [2, 5], "between": [2, 5, 7], "ai": [2, 5], "evolutionari": 2, "trajectori": 2, "forward": 2, "distanc": 2, "classmethod": 2, "produc": [2, 6], "supplementary_data": 2, "let": 2, "update_human_proxi": 2, "epoch": [2, 3], "comment": [2, 6], "support": [2, 3, 6], "previou": 2, "checkpoint": [2, 3], "failur": 2, "lead": 2, "rel": 2, "complex": 2, "structur": 2, "requir": [2, 5], "special": 2, "treatment": 2, "sinc": 2, "than": [2, 3], "plain": 2, "would": 2, "much": 2, "simpler": 2, "followjudg": 2, "measur": 2, "itself": 2, "match": 2, "timepoint": 2, "ground_truth_model": 2, "predict": [2, 3, 5], "predictjudg": 2, "dure": 3, "whole": 3, "process": 3, "processgym": 3, "just": 3, "feel": [3, 7], "util": [3, 6], "model_nam": 3, "is_instruct_finetun": 3, "model_path": 3, "num_gpu": 3, "template_typ": 3, "alpaca": 3, "indic": 3, "finetun": 3, "set": [3, 6], "training_result": 3, "number": 3, "gpu": 3, "parallel": 3, "infer": 3, "total": 3, "machin": [3, 6], "templat": 3, "gemma": 3, "2b_sft": 3, "deep_copi": 3, "dest_suffix": 3, "dest_full_nam": 3, "dest_subdir": 3, "rlhf_result": 3, "source_explicit_path": 3, "deep": 3, "destin": 3, "subdirectori": 3, "explicit": 3, "sourc": [3, 6], "fast": 3, "dummi": [3, 7], "ndarrai": 3, "high": 3, "dimension": 3, "vector": 3, "moral": [3, 5], "choos": 3, "debug": [3, 7], "dpo": [3, 5], "rlhf": [3, 5, 6], "algo": 3, "full_param": 3, "lora": 3, "result_model_nam": 3, "batch_size_multiplier_log2": 3, "0": 3, "grad_accu_multiplier_log2": 3, "2": [1, 3, 7], "lr": 3, "lr_scheduler_typ": 3, "lr_scheduler_kwarg": 3, "load_best_at_end": 3, "num_nod": 3, "save_checkpoint": 3, "perform_ev": 3, "ppo_data": 3, "backend": 3, "deepspe": 3, "trl": 3, "multipli": 3, "gradient": 3, "accumul": 3, "rate": 3, "schedul": 3, "best": 3, "node": 3, "ppo": 3, "free_gpu_memori": 3, "destroy_model_parallel": 3, "vllm": 3, "cach": 3, "wipe": 3, "serial": 3, "temperatur": 3, "object": 3, "also": [3, 6], "two": [3, 6], "text": [3, 6], "below": 3, "descreas": 3, "speed": 3, "faster": 3, "rest": 3, "magnitud": 3, "style": 3, "model_save_path": 3, "member": 4, "frontier": 5, "llm": 5, "hold": 5, "increas": 5, "epistemologi": 5, "Such": 5, "reinforc": 5, "prevail": 5, "societ": 5, "potenti": 5, "contribut": 5, "lock": 5, "misguid": 5, "belief": 5, "consequ": 5, "perpetu": 5, "problemat": 5, "practic": 5, "broad": 5, "scale": 5, "introduc": 5, "technic": 5, "solut": 5, "mitig": 5, "immin": 5, "risk": 5, "To": [5, 7], "empow": 5, "research": 5, "experiment": 5, "codifi": 5, "world": 5, "concret": 5, "specif": [5, 6], "core": 5, "track": 5, "evolv": 5, "pg": 5, "preemptiv": 5, "anticip": 5, "regul": 5, "feedback": 5, "loop": 5, "shift": 5, "asid": 5, "lifelong": 5, "extrapol": 5, "api": 5, "custom": 5, "check": 5, "quickstart": 5, "experi": 5, "section": 5, "instal": 5, "index": 5, "develop": 6, "3": 6, "8": 6, "linux": 6, "conda": 6, "thank": 6, "author": 6, "librari": 6, "moralchoic": 6, "safe": 6, "clone": 6, "codebas": 6, "git": 6, "http": 6, "github": 6, "com": 6, "pku": 6, "cd": 6, "sh": 6, "environ": 6, "pa38": 6, "lf": 6, "former": 6, "latter": 6, "entri": 6, "point": 6, "script": 6, "g": 6, "run_benchmark": [6, 7], "py": [6, 7], "build_dataset": 6, "further": 6, "switch": 6, "altern": 6, "construct": 6, "pypi": 6, "packag": 6, "via": 6, "pip": 6, "yet": 6, "expect": 6, "releas": 6, "soon": 6, "build": 6, "note": 6, "part": 6, "yourself": 6, "replic": 6, "our": [6, 7], "find": 6, "histext": 6, "histllama": 6, "sequenc": 6, "huggingfac": 6, "download": 6, "compil": 6, "them": 6, "desir": 6, "decompress": 6, "raw": 6, "multipl": 6, "raw_download": 6, "organ": 6, "unifi": 6, "histor": 6, "dataset_textseq": 6, "century_num": 6, "year_num": 6, "dataset_modelseq": 6, "algo1": 6, "algo2": 6, "algo3": 6, "subtask1": 6, "subtask2": 6, "subtask3": 6, "output_filenam": [6, 7], "output_dir": 6, "benchmark_result": 6, "h": 6, "help": 6, "thei": 6, "challenge_nam": 6, "output_directori": 6, "test_nam": 6, "item": 6, "comprehens": 6, "subtask": 6, "detail": 6, "command": 6, "line": 6, "judge_model_s": 6, "70b": 6, "8b": 6, "assess": 7, "tempor": 7, "abil": 7, "guidanc": 7, "daunt": 7, "dom": 7, "worri": 7, "read": 7, "descript": 7, "ll": 7, "four": 7, "describ": 7, "turn": 7, "pre": 7, "verifi": 7, "your_algorithm": 7, "dummy_debugging_run": 7, "Of": 7, "cours": 7, "major": 7, "similarli": 7, "your_task": 7}, "objects": {"algorithms.extrapolative_dpo": [[1, 0, 1, "", "ExtrapolativeDPOExaminee"]], "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee": [[1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "algorithms.extrapolative_rlhf": [[1, 0, 1, "", "ExtrapolativeRLHFExaminee"]], "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee": [[1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "algorithms.lifelong_dpo": [[1, 0, 1, "", "LifelongDPOExaminee"]], "algorithms.lifelong_dpo.LifelongDPOExaminee": [[1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "algorithms.lifelong_rlhf": [[1, 0, 1, "", "LifelongRLHFExaminee"]], "algorithms.lifelong_rlhf.LifelongRLHFExaminee": [[1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "benchmark.framework": [[1, 0, 1, "", "ExamineeBase"], [2, 0, 1, "", "JudgeBase"]], "benchmark.framework.ExamineeBase": [[1, 1, 1, "", "__init__"], [1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "benchmark.framework.JudgeBase": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval_snapshot"], [2, 1, 1, "", "interpret_result"], [2, 1, 1, "", "produce_final_result"], [2, 1, 1, "", "query_from_examinee"], [2, 1, 1, "", "reset"], [2, 1, 1, "", "test"], [2, 1, 1, "", "tick"]], "challenges.coevolve": [[2, 0, 1, "", "CoevolveJudge"]], "challenges.coevolve.CoevolveJudge": [[2, 1, 1, "", "eval_snapshot"], [2, 1, 1, "", "interpret_result"], [2, 1, 1, "", "produce_final_result"], [2, 1, 1, "", "query_from_examinee"], [2, 1, 1, "", "reset"], [2, 1, 1, "", "tick"], [2, 1, 1, "", "update_human_proxy"]], "challenges.follow": [[2, 0, 1, "", "FollowJudge"]], "challenges.follow.FollowJudge": [[2, 1, 1, "", "eval_snapshot"], [2, 1, 1, "", "interpret_result"], [2, 1, 1, "", "produce_final_result"], [2, 1, 1, "", "query_from_examinee"], [2, 1, 1, "", "reset"], [2, 1, 1, "", "tick"]], "challenges.predict": [[2, 0, 1, "", "PredictJudge"]], "challenges.predict.PredictJudge": [[2, 1, 1, "", "eval_snapshot"], [2, 1, 1, "", "query_from_examinee"], [2, 1, 1, "", "reset"], [2, 1, 1, "", "tick"]], "src.abstractions.data": [[0, 0, 1, "", "Data"], [0, 0, 1, "", "DataFileCollection"]], "src.abstractions.data.Data": [[0, 1, 1, "", "__init__"], [0, 1, 1, "", "all_passages"], [0, 1, 1, "", "copy"], [0, 1, 1, "", "manage_llama_factory_registration"], [0, 1, 1, "", "save_permanent_and_register"], [0, 1, 1, "", "set_key_fields"], [0, 1, 1, "", "transform"]], "src.abstractions.data.DataFileCollection": [[0, 1, 1, "", "__init__"], [0, 1, 1, "", "all_files"], [0, 1, 1, "", "all_passages"], [0, 2, 1, "", "always_force_rewrite"], [0, 1, 1, "", "convert_to_Data"], [0, 1, 1, "", "copy"], [0, 1, 1, "", "save_permanent"], [0, 1, 1, "", "transform"]], "src.abstractions.model": [[3, 0, 1, "", "Model"]], "src.abstractions.model.Model": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "copy"], [3, 1, 1, "", "deep_copy"], [3, 1, 1, "", "evaluate"], [3, 1, 1, "", "finetune"], [3, 1, 1, "", "free_gpu_memory"], [3, 1, 1, "", "inference"], [3, 1, 1, "", "save_permanent"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"]}, "titleterms": {"data": 0, "examine": 1, "judg": 2, "model": 3, "api": 4, "refer": 4, "content": [4, 5], "class": 4, "progressgym": 5, "The": 5, "progress": 5, "align": 5, "framework": 5, "quickstart": [6, 7], "run": 6, "benchmark": 6, "requir": 6, "instal": 6, "basic": 6, "usag": 6, "implement": 7, "your": 7, "own": 7, "experi": 7}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 58}, "alltitles": {"API reference": [[4, "api-reference"]], "Contents": [[4, "contents"], [5, "contents"]], "Classes:": [[4, null]], "ProgressGym: The Progress Alignment Framework": [[5, "progressgym-the-progress-alignment-framework"]], "Data": [[0, "data"]], "Model": [[3, "model"]], "Quickstart: Running the benchmark": [[6, "quickstart-running-the-benchmark"]], "Requirements": [[6, "requirements"]], "Installation": [[6, "installation"]], "Basic Usage": [[6, "basic-usage"]], "Examinee": [[1, "examinee"]], "Judge": [[2, "judge"]], "Quickstart: Implementing your own experiment": [[7, "quickstart-implementing-your-own-experiment"]]}, "indexentries": {"examineebase (class in benchmark.framework)": [[1, "benchmark.framework.ExamineeBase"]], "extrapolativedpoexaminee (class in algorithms.extrapolative_dpo)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee"]], "extrapolativerlhfexaminee (class in algorithms.extrapolative_rlhf)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee"]], "lifelongdpoexaminee (class in algorithms.lifelong_dpo)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee"]], "lifelongrlhfexaminee (class in algorithms.lifelong_rlhf)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee"]], "__init__() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.__init__"]], "get_current_model() (algorithms.extrapolative_dpo.extrapolativedpoexaminee method)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee.get_current_model"]], "get_current_model() (algorithms.extrapolative_rlhf.extrapolativerlhfexaminee method)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee.get_current_model"]], "get_current_model() (algorithms.lifelong_dpo.lifelongdpoexaminee method)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee.get_current_model"]], "get_current_model() (algorithms.lifelong_rlhf.lifelongrlhfexaminee method)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee.get_current_model"]], "get_current_model() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.get_current_model"]], "query_from_judge() (algorithms.extrapolative_dpo.extrapolativedpoexaminee method)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee.query_from_judge"]], "query_from_judge() (algorithms.extrapolative_rlhf.extrapolativerlhfexaminee method)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee.query_from_judge"]], "query_from_judge() (algorithms.lifelong_dpo.lifelongdpoexaminee method)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee.query_from_judge"]], "query_from_judge() (algorithms.lifelong_rlhf.lifelongrlhfexaminee method)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee.query_from_judge"]], "query_from_judge() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.query_from_judge"]], "reset() (algorithms.extrapolative_dpo.extrapolativedpoexaminee method)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee.reset"]], "reset() (algorithms.extrapolative_rlhf.extrapolativerlhfexaminee method)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee.reset"]], "reset() (algorithms.lifelong_dpo.lifelongdpoexaminee method)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee.reset"]], "reset() (algorithms.lifelong_rlhf.lifelongrlhfexaminee method)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee.reset"]], "reset() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.reset"]], "run() (algorithms.extrapolative_dpo.extrapolativedpoexaminee method)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee.run"]], "run() (algorithms.extrapolative_rlhf.extrapolativerlhfexaminee method)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee.run"]], "run() (algorithms.lifelong_dpo.lifelongdpoexaminee method)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee.run"]], "run() (algorithms.lifelong_rlhf.lifelongrlhfexaminee method)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee.run"]], "run() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.run"]], "coevolvejudge (class in challenges.coevolve)": [[2, "challenges.coevolve.CoevolveJudge"]], "followjudge (class in challenges.follow)": [[2, "challenges.follow.FollowJudge"]], "judgebase (class in benchmark.framework)": [[2, "benchmark.framework.JudgeBase"]], "predictjudge (class in challenges.predict)": [[2, "challenges.predict.PredictJudge"]], "__init__() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.__init__"]], "eval_snapshot() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.eval_snapshot"]], "eval_snapshot() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.eval_snapshot"]], "eval_snapshot() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.eval_snapshot"]], "eval_snapshot() (challenges.predict.predictjudge method)": [[2, "challenges.predict.PredictJudge.eval_snapshot"]], "interpret_result() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.interpret_result"]], "interpret_result() (challenges.coevolve.coevolvejudge class method)": [[2, "challenges.coevolve.CoevolveJudge.interpret_result"]], "interpret_result() (challenges.follow.followjudge class method)": [[2, "challenges.follow.FollowJudge.interpret_result"]], "produce_final_result() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.produce_final_result"]], "produce_final_result() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.produce_final_result"]], "produce_final_result() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.produce_final_result"]], "query_from_examinee() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.query_from_examinee"]], "query_from_examinee() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.query_from_examinee"]], "query_from_examinee() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.query_from_examinee"]], "query_from_examinee() (challenges.predict.predictjudge method)": [[2, "challenges.predict.PredictJudge.query_from_examinee"]], "reset() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.reset"]], "reset() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.reset"]], "reset() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.reset"]], "reset() (challenges.predict.predictjudge method)": [[2, "challenges.predict.PredictJudge.reset"]], "test() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.test"]], "tick() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.tick"]], "tick() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.tick"]], "tick() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.tick"]], "tick() (challenges.predict.predictjudge method)": [[2, "challenges.predict.PredictJudge.tick"]], "update_human_proxy() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.update_human_proxy"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["Data", "Examinee", "Judge", "Model", "documentation", "index", "running", "usage"], "filenames": ["Data.rst", "Examinee.rst", "Judge.rst", "Model.rst", "documentation.rst", "index.rst", "running.rst", "usage.rst"], "titles": ["Data", "Examinee", "Judge", "Model", "API reference", "ProgressGym: The Progress Alignment Framework", "Quickstart: Running the benchmark", "Quickstart: Implementing your own experiment"], "terms": {"thi": [0, 1, 2, 3, 4, 5, 6, 7], "i": [0, 1, 2, 3, 4, 6, 7], "class": [0, 1, 2, 3, 6, 7], "us": [0, 1, 2, 3, 6, 7], "repres": [0, 2, 3, 6, 7], "ani": [0, 1, 2, 3, 6, 7], "bodi": 0, "With": 0, "sever": 0, "memori": [0, 3], "effici": [0, 3], "method": [0, 1, 2, 3, 7], "built": [0, 6], "design": [0, 3], "manipul": [0, 2, 3, 7], "load": [0, 2, 3], "src": [0, 3], "abstract": [0, 1, 2, 3, 4, 7], "data_nam": 0, "str": [0, 1, 2, 3, 6, 7], "data_typ": 0, "liter": [0, 3], "pretrain": [0, 3], "sft": [0, 3], "prefer": [0, 1, 2, 3, 7], "data_path": 0, "none": [0, 1, 2, 3, 7], "data_cont": 0, "list": [0, 1, 2, 3, 7], "dict": [0, 1, 2, 3, 6, 7], "kwarg": [0, 1, 2, 6, 7], "The": [0, 1, 2, 3, 6, 7], "store": [0, 1, 7], "": [0, 1, 2, 6, 7], "path": [0, 3], "format": [0, 3, 6], "relev": [0, 6], "field": [0, 2, 3, 7], "etc": 0, "allow": [0, 1, 7], "larg": [0, 5], "__init__": [0, 1, 2, 3, 7], "initi": [0, 1, 2, 3, 6, 7], "paramet": [0, 3, 6], "necessari": [0, 2, 6, 7], "name": [0, 2, 3, 4, 7], "option": [0, 2, 3, 6, 7], "type": [0, 3, 6], "usag": [0, 5], "e": [0, 2, 6, 7], "which": [0, 2, 3, 7], "stage": [0, 3], "train": [0, 6], "search": [0, 3], "when": [0, 1, 2, 3, 6, 7], "omit": [0, 3, 6], "make": [0, 6], "sure": [0, 1, 6, 7], "exist": 0, "lib": 0, "llama_factori": 0, "other": [0, 3], "see": [0, 6], "abstractions_config": [0, 3], "json": [0, 3, 6], "recogn": 0, "llama": [0, 6], "factori": [0, 6], "content": 0, "provid": [0, 1, 5, 7], "given": [0, 2, 7], "written": 0, "creat": [0, 6], "new": [0, 2, 6, 7], "dataset": [0, 3, 6], "unless": [0, 1, 6, 7], "case": [0, 1, 2, 3, 7], "save": [0, 3, 6], "output": [0, 3, 6], "rais": 0, "filenotfounderror": 0, "If": 0, "file": [0, 6, 7], "found": 0, "default": [0, 3, 6], "specifi": [0, 1, 3, 7], "exampl": [0, 1, 3, 5, 6, 7], "c4_demo": 0, "all_passag": 0, "iter": [0, 1, 2, 7], "hashabl": 0, "return": [0, 1, 2, 3, 6, 7], "an": [0, 1, 2, 3, 5, 6, 7], "all": [0, 1, 2, 3, 4, 6, 7], "passag": 0, "copi": [0, 1, 3, 7], "shallow": [0, 3], "current": [0, 1, 2, 3, 6, 7], "instanc": [0, 3, 6], "manage_llama_factory_registr": 0, "oper": [0, 1, 2, 6, 7], "add": [0, 2, 7], "remov": [0, 3], "queri": [0, 1, 2, 7], "forced_upd": 0, "bool": [0, 3], "true": [0, 3], "registr": 0, "statu": 0, "No": 0, "chang": [0, 2], "ar": [0, 1, 2, 3, 6, 7], "made": 0, "ad": 0, "alreadi": [0, 1, 6], "non": 0, "perform": [0, 2, 3, 6, 7], "It": [0, 1, 2, 3, 6, 7], "can": [0, 1, 3, 5, 6, 7], "whether": [0, 2, 3], "forcefulli": 0, "updat": [0, 1, 2, 3, 7], "A": [0, 2, 6, 7], "boolean": 0, "mean": [0, 2, 7], "befor": [0, 2, 7], "save_permanent_and_regist": 0, "saved_nam": [0, 3], "forced_rewrit": [0, 3], "fals": [0, 3], "data_save_path": 0, "from": [0, 1, 2, 3, 5, 6, 7], "without": [0, 2, 3], "save_perman": [0, 3], "still": [0, 3], "present": [0, 3], "directli": [0, 2, 3, 7], "next": [0, 2, 3, 7], "time": [0, 1, 3], "full": [0, 3], "do": [0, 1, 6, 7], "includ": [0, 1, 3, 5, 6, 7], "suffix": [0, 3], "argument": [0, 3, 6], "set_key_field": 0, "prompt_field_nam": 0, "query_field_nam": 0, "response_field_nam": 0, "system_field_nam": 0, "suppress_registration_upd": 0, "In": [0, 1, 2, 3, 7], "place": [0, 3, 6], "pass": [0, 1, 2, 6, 7], "empti": 0, "string": [0, 3, 6], "order": [0, 2, 3, 7], "eras": 0, "Will": 0, "automat": [0, 6], "regist": 0, "prompt": [0, 1, 2, 7], "respons": [0, 1, 2, 7], "system": [0, 2, 5], "suppress": 0, "instruct": [0, 3, 5], "input": [0, 3], "qa": 0, "transform": 0, "callabl": [0, 3], "result_data_nam": [0, 3], "max_batch_s": 0, "int": [0, 3], "1": [0, 1, 2, 3, 7], "keep_key_field": 0, "appli": 0, "everi": [0, 1, 2, 7], "element": 0, "where": [0, 2, 3, 6, 7], "valu": [0, 1, 2, 3, 5, 6, 7], "mutabl": 0, "immut": 0, "contain": [0, 2, 3, 4, 6], "result": [0, 2, 3, 6, 7], "out": [0, 3, 5], "doe": [0, 2, 7], "modifi": 0, "self": [0, 2, 3, 7], "function": [0, 6], "like": [0, 1, 3, 7], "huge": 0, "union": [0, 3], "upon": [0, 1, 6, 7], "rewrit": 0, "must": [0, 2, 7], "take": [0, 2], "key_field": 0, "onli": [0, 1, 2, 3, 6, 7], "so": [0, 1, 5, 7], "doesn": [0, 3], "t": [0, 3, 7], "renam": 0, "kei": [0, 2, 3, 4, 7], "after": [0, 1, 7], "datafilecollect": 0, "collection_nam": 0, "collection_path": 0, "file_selection_func": 0, "we": [0, 1, 2, 5, 6, 7], "directori": 0, "suppli": [0, 2, 3, 6, 7], "captur": 0, "real": [0, 5], "instead": [0, 1, 2, 7], "you": [0, 1, 2, 5, 6, 7], "mai": [0, 2, 7], "want": [0, 6], "exclud": 0, "undat": 0, "That": 0, "histtext_1826_to_2018": 0, "dataset_text_sequ": 0, "lambda": 0, "1826": 0, "split": 0, "6": 0, "2018": 0, "all_fil": 0, "collect": 0, "had": 0, "been": [0, 6], "always_force_rewrit": 0, "multi": 0, "being": 0, "need": [0, 1, 2, 6, 7], "convert": 0, "similar": 0, "those": [0, 1, 2, 7], "avail": [0, 6], "nevertheless": 0, "convert_to_data": 0, "filter_field": 0, "merg": 0, "one": [0, 2, 6], "usabl": 0, "should": [0, 1, 2, 3, 7], "filter": 0, "normal": [0, 1, 2, 6, 7], "seen": 0, "autodiscov": 0, "futur": 0, "result_collection_nam": 0, "suppress_tqdm": 0, "maximum": 0, "batch": [0, 3], "size": [0, 2, 3], "tqdm": 0, "progress": [0, 1, 7], "bar": 0, "progressgym": [1, 2, 4, 6, 7], "aim": 1, "evalu": [1, 2, 3, 5, 6, 7], "how": [1, 7], "well": [1, 2, 5, 7], "algorithm": [1, 2, 3, 5, 6, 7], "align": [1, 6, 7], "model": [1, 2, 4, 5, 6, 7], "accord": [1, 2, 7], "human": [1, 2, 5, 7], "instanti": [1, 2, 6], "here": [1, 2, 7], "For": [1, 6], "each": [1, 2, 3, 6, 7], "have": [1, 6, 7], "implement": [1, 2, 3, 5, 6], "sub": 1, "base": [1, 2, 3, 7], "extra": 1, "inform": [1, 2, 7], "most": [1, 7], "importantli": 1, "interact": 1, "judg": [1, 4, 5, 6, 7], "step": 1, "pleas": [1, 6], "refer": [1, 2, 5, 7], "document": [1, 2, 4, 7], "page": [1, 2, 4], "re": [1, 7], "unfamiliar": 1, "what": 1, "baselin": [1, 7], "ve": [1, 2], "give": [1, 2], "better": [1, 2, 7], "understand": [1, 7], "work": [1, 7], "benchmark": [1, 2, 5, 7], "framework": [1, 2, 7], "examineebas": [1, 2, 6, 7], "combin": [1, 7], "languag": [1, 2, 5, 6, 7], "free": [1, 3, 7], "wai": [1, 7], "long": [1, 7], "follow": [1, 2, 5, 6, 7], "interfac": [1, 6, 7], "your": [1, 2, 3, 5, 6], "subclass": [1, 2, 6, 7], "get_current_model": [1, 7], "timestep": [1, 2, 7], "current_model": [1, 2, 7], "attribut": [1, 7], "overrid": [1, 2, 7], "query_from_judg": [1, 7], "data": [1, 2, 3, 4, 6, 7], "call": [1, 2, 6, 7], "reset": [1, 2, 7], "endow": [1, 7], "examinee_model_s": [1, 6, 7], "abl": [1, 7], "start": [1, 2, 7], "same": [1, 3, 6, 7], "state": [1, 2, 7], "basic": [1, 5, 7], "setup": [1, 7], "run": [1, 2, 3, 5, 7], "judgebas": [1, 2, 6, 7], "over": [1, 5, 7], "correspond": [1, 2, 7], "control": [1, 7], "paus": [1, 7], "resum": [1, 2, 7], "At": [1, 7], "learn": [1, 2, 3, 7], "about": [1, 2, 7], "latest": [1, 2, 7], "query_from_examine": [1, 2, 7], "2": [1, 3, 7], "ha": [1, 6, 7], "its": [1, 2, 7], "yield": [1, 7], "back": [1, 7], "complet": [1, 3, 6, 7], "begin": [1, 6, 7], "extrapolative_dpo": 1, "extrapolativedpoexamine": 1, "extrapolative_rlhf": 1, "extrapolativerlhfexamine": 1, "lifelong_rlhf": 1, "lifelongrlhfexamine": 1, "lifelong_dpo": 1, "lifelongdpoexamine": [1, 6], "examine": [2, 4, 5, 6, 7], "through": [2, 7], "task": [2, 5, 7], "code": 2, "handl": [2, 6], "proxi": 2, "three": [2, 5, 7], "challeng": [2, 5, 6, 7], "eval_snapshot": [2, 7], "snapshot": [2, 7], "log": [2, 3, 7], "recommend": [2, 3, 7], "own": [2, 5, 6], "eval": [2, 7], "interpret_result": [2, 7], "float": [2, 3, 7], "dictionari": [2, 3, 6, 7], "calcul": [2, 7], "singl": [2, 6, 7], "score": [2, 6, 7], "overal": [2, 7], "higher": [2, 7], "leaderboard": [2, 7], "rank": [2, 7], "produce_final_result": [2, 7], "final": [2, 7], "end": [2, 3, 7], "test": [2, 3, 6, 7], "get": [2, 7], "metric": [2, 7], "prevent": [2, 7], "manual": [2, 7], "fill": [2, 7], "while": [2, 7], "preserv": [2, 3, 7], "purpos": [2, 7], "answer": [2, 7], "accordingli": [2, 7], "could": [2, 6, 7], "either": [2, 3, 7], "possibli": [2, 7], "differ": [2, 7], "intern": [2, 6, 7], "addit": [2, 3, 6, 7], "logic": [2, 7], "user": [2, 5, 6, 7], "move": [2, 7], "seri": [2, 6, 7], "examinee_it": [2, 7], "tick": [2, 7], "increment": [2, 7], "current_timestep": [2, 7], "more": [2, 6, 7], "coevolv": [2, 5], "coevolvejudg": [2, 6], "compar": 2, "actual": 2, "simul": 2, "histori": 2, "account": 2, "bidirect": 2, "influenc": [2, 5], "between": [2, 5, 7], "ai": [2, 5], "evolutionari": 2, "trajectori": 2, "forward": 2, "distanc": 2, "classmethod": 2, "produc": [2, 6], "supplementary_data": 2, "let": 2, "update_human_proxi": 2, "epoch": [2, 3], "comment": [2, 6], "support": [2, 3, 6], "previou": 2, "checkpoint": [2, 3], "failur": 2, "lead": 2, "rel": 2, "complex": 2, "structur": 2, "requir": [2, 5], "special": 2, "treatment": 2, "sinc": 2, "than": [2, 3, 6], "plain": 2, "would": [2, 6], "much": 2, "simpler": 2, "followjudg": 2, "measur": 2, "itself": 2, "match": 2, "timepoint": 2, "ground_truth_model": 2, "predict": [2, 3, 5], "predictjudg": 2, "dure": 3, "whole": 3, "process": 3, "processgym": 3, "just": 3, "feel": [3, 7], "util": [3, 6], "model_nam": 3, "is_instruct_finetun": 3, "model_path": 3, "num_gpu": 3, "template_typ": 3, "alpaca": 3, "indic": 3, "finetun": 3, "set": [3, 6], "training_result": 3, "number": 3, "gpu": 3, "parallel": 3, "infer": 3, "total": 3, "machin": [3, 6], "templat": 3, "gemma": 3, "2b_sft": 3, "deep_copi": 3, "dest_suffix": 3, "dest_full_nam": 3, "dest_subdir": 3, "rlhf_result": 3, "source_explicit_path": 3, "deep": 3, "destin": 3, "subdirectori": 3, "explicit": 3, "sourc": [3, 6], "fast": 3, "dummi": [3, 7], "ndarrai": 3, "high": 3, "dimension": 3, "vector": 3, "moral": [3, 5], "choos": 3, "debug": [3, 7], "dpo": [3, 5], "rlhf": [3, 5, 6], "algo": 3, "full_param": 3, "lora": 3, "result_model_nam": 3, "batch_size_multiplier_log2": 3, "0": 3, "grad_accu_multiplier_log2": 3, "lr": 3, "lr_scheduler_typ": 3, "lr_scheduler_kwarg": 3, "load_best_at_end": 3, "num_nod": 3, "save_checkpoint": 3, "perform_ev": 3, "ppo_data": 3, "backend": 3, "deepspe": 3, "trl": 3, "multipli": 3, "gradient": 3, "accumul": 3, "rate": 3, "schedul": 3, "best": 3, "node": 3, "ppo": 3, "free_gpu_memori": 3, "destroy_model_parallel": 3, "vllm": 3, "cach": 3, "wipe": 3, "serial": 3, "temperatur": 3, "object": [3, 6], "also": [3, 6], "two": [3, 6], "text": [3, 6], "below": 3, "descreas": 3, "speed": 3, "faster": 3, "rest": 3, "magnitud": 3, "style": 3, "model_save_path": 3, "member": 4, "frontier": 5, "llm": 5, "hold": 5, "increas": 5, "epistemologi": 5, "Such": 5, "reinforc": 5, "prevail": 5, "societ": 5, "potenti": 5, "contribut": 5, "lock": 5, "misguid": 5, "belief": 5, "consequ": 5, "perpetu": 5, "problemat": 5, "practic": 5, "broad": 5, "scale": 5, "introduc": 5, "technic": 5, "solut": 5, "mitig": 5, "immin": 5, "risk": 5, "To": [5, 7], "empow": 5, "research": 5, "experiment": 5, "codifi": 5, "world": 5, "concret": 5, "specif": [5, 6], "core": 5, "track": 5, "evolv": 5, "pg": 5, "preemptiv": 5, "anticip": 5, "regul": 5, "feedback": 5, "loop": 5, "shift": 5, "asid": 5, "lifelong": 5, "extrapol": 5, "api": [5, 6, 7], "custom": 5, "check": 5, "quickstart": 5, "experi": 5, "section": 5, "instal": 5, "gener": 5, "index": 5, "develop": 6, "python": [6, 7], "3": 6, "8": 6, "linux": 6, "conda": 6, "thank": 6, "author": 6, "librari": 6, "moralchoic": 6, "safe": 6, "clone": 6, "codebas": 6, "git": 6, "http": 6, "github": 6, "com": 6, "pku": 6, "cd": 6, "sh": 6, "environ": 6, "pa38": 6, "lf": 6, "former": 6, "latter": 6, "entri": 6, "point": 6, "script": [6, 7], "g": 6, "run_benchmark": [6, 7], "py": [6, 7], "build_dataset": 6, "further": 6, "switch": 6, "altern": 6, "construct": 6, "pypi": 6, "packag": 6, "via": 6, "pip": 6, "yet": 6, "expect": 6, "releas": 6, "soon": 6, "build": 6, "note": 6, "part": 6, "yourself": 6, "replic": 6, "our": [6, 7], "find": 6, "histext": 6, "histllama": 6, "sequenc": 6, "huggingfac": 6, "download": 6, "compil": 6, "them": 6, "desir": 6, "decompress": 6, "raw": 6, "multipl": 6, "raw_download": 6, "organ": 6, "unifi": 6, "histor": 6, "dataset_textseq": 6, "century_num": 6, "year_num": 6, "dataset_modelseq": 6, "command": [6, 7], "line": [6, 7], "algo1": 6, "algo2": 6, "algo3": 6, "subtask1": 6, "subtask2": 6, "subtask3": 6, "output_filenam": [6, 7], "output_dir": 6, "benchmark_result": 6, "h": 6, "help": 6, "thei": 6, "challenge_nam": 6, "output_directori": 6, "test_nam": 6, "item": 6, "comprehens": 6, "subtask": 6, "detail": 6, "judge_model_s": 6, "70b": 6, "8b": 6, "examineeclass": 6, "judgeclass": 6, "ones": 6, "exact": 6, "import": 6, "worth": 6, "flexibl": 6, "inherit": 6, "respect": 6, "assess": 7, "tempor": 7, "abil": 7, "guidanc": 7, "daunt": 7, "don": 7, "worri": 7, "read": 7, "descript": 7, "ll": 7, "four": 7, "describ": 7, "turn": 7, "pre": 7, "verifi": 7, "your_algorithm": 7, "dummy_debugging_run": 7, "Of": 7, "cours": 7, "major": 7, "similarli": 7, "your_task": 7}, "objects": {"algorithms.extrapolative_dpo": [[1, 0, 1, "", "ExtrapolativeDPOExaminee"]], "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee": [[1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "algorithms.extrapolative_rlhf": [[1, 0, 1, "", "ExtrapolativeRLHFExaminee"]], "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee": [[1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "algorithms.lifelong_dpo": [[1, 0, 1, "", "LifelongDPOExaminee"]], "algorithms.lifelong_dpo.LifelongDPOExaminee": [[1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "algorithms.lifelong_rlhf": [[1, 0, 1, "", "LifelongRLHFExaminee"]], "algorithms.lifelong_rlhf.LifelongRLHFExaminee": [[1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "benchmark.framework": [[1, 0, 1, "", "ExamineeBase"], [2, 0, 1, "", "JudgeBase"]], "benchmark.framework.ExamineeBase": [[1, 1, 1, "", "__init__"], [1, 1, 1, "", "get_current_model"], [1, 1, 1, "", "query_from_judge"], [1, 1, 1, "", "reset"], [1, 1, 1, "", "run"]], "benchmark.framework.JudgeBase": [[2, 1, 1, "", "__init__"], [2, 1, 1, "", "eval_snapshot"], [2, 1, 1, "", "interpret_result"], [2, 1, 1, "", "produce_final_result"], [2, 1, 1, "", "query_from_examinee"], [2, 1, 1, "", "reset"], [2, 1, 1, "", "test"], [2, 1, 1, "", "tick"]], "challenges.coevolve": [[2, 0, 1, "", "CoevolveJudge"]], "challenges.coevolve.CoevolveJudge": [[2, 1, 1, "", "eval_snapshot"], [2, 1, 1, "", "interpret_result"], [2, 1, 1, "", "produce_final_result"], [2, 1, 1, "", "query_from_examinee"], [2, 1, 1, "", "reset"], [2, 1, 1, "", "tick"], [2, 1, 1, "", "update_human_proxy"]], "challenges.follow": [[2, 0, 1, "", "FollowJudge"]], "challenges.follow.FollowJudge": [[2, 1, 1, "", "eval_snapshot"], [2, 1, 1, "", "interpret_result"], [2, 1, 1, "", "produce_final_result"], [2, 1, 1, "", "query_from_examinee"], [2, 1, 1, "", "reset"], [2, 1, 1, "", "tick"]], "challenges.predict": [[2, 0, 1, "", "PredictJudge"]], "challenges.predict.PredictJudge": [[2, 1, 1, "", "eval_snapshot"], [2, 1, 1, "", "query_from_examinee"], [2, 1, 1, "", "reset"], [2, 1, 1, "", "tick"]], "run_benchmark": [[6, 2, 1, "", "run_benchmark"]], "src.abstractions.data": [[0, 0, 1, "", "Data"], [0, 0, 1, "", "DataFileCollection"]], "src.abstractions.data.Data": [[0, 1, 1, "", "__init__"], [0, 1, 1, "", "all_passages"], [0, 1, 1, "", "copy"], [0, 1, 1, "", "manage_llama_factory_registration"], [0, 1, 1, "", "save_permanent_and_register"], [0, 1, 1, "", "set_key_fields"], [0, 1, 1, "", "transform"]], "src.abstractions.data.DataFileCollection": [[0, 1, 1, "", "__init__"], [0, 1, 1, "", "all_files"], [0, 1, 1, "", "all_passages"], [0, 3, 1, "", "always_force_rewrite"], [0, 1, 1, "", "convert_to_Data"], [0, 1, 1, "", "copy"], [0, 1, 1, "", "save_permanent"], [0, 1, 1, "", "transform"]], "src.abstractions.model": [[3, 0, 1, "", "Model"]], "src.abstractions.model.Model": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "copy"], [3, 1, 1, "", "deep_copy"], [3, 1, 1, "", "evaluate"], [3, 1, 1, "", "finetune"], [3, 1, 1, "", "free_gpu_memory"], [3, 1, 1, "", "inference"], [3, 1, 1, "", "save_permanent"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:function", "3": "py:attribute"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "function", "Python function"], "3": ["py", "attribute", "Python attribute"]}, "titleterms": {"data": 0, "examine": 1, "judg": 2, "model": 3, "api": 4, "refer": 4, "content": [4, 5], "class": 4, "progressgym": 5, "The": 5, "progress": 5, "align": 5, "framework": 5, "quickstart": [6, 7], "run": 6, "benchmark": 6, "requir": 6, "instal": 6, "basic": 6, "usag": 6, "implement": 7, "your": 7, "own": 7, "experi": 7, "gener": 7, "instruct": 7}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 58}, "alltitles": {"API reference": [[4, "api-reference"]], "Contents": [[4, "contents"], [5, "contents"]], "Classes:": [[4, null]], "ProgressGym: The Progress Alignment Framework": [[5, "progressgym-the-progress-alignment-framework"]], "Examinee": [[1, "examinee"]], "Judge": [[2, "judge"]], "Data": [[0, "data"]], "Model": [[3, "model"]], "Quickstart: Running the benchmark": [[6, "quickstart-running-the-benchmark"]], "Requirements": [[6, "requirements"]], "Installation": [[6, "installation"]], "Basic Usage": [[6, "basic-usage"]], "Quickstart: Implementing your own experiment": [[7, "quickstart-implementing-your-own-experiment"]], "General Instructions": [[7, "general-instructions"]], "Implementation Instructions": [[7, "implementation-instructions"]]}, "indexentries": {"data (class in src.abstractions.data)": [[0, "src.abstractions.data.Data"]], "datafilecollection (class in src.abstractions.data)": [[0, "src.abstractions.data.DataFileCollection"]], "__init__() (src.abstractions.data.data method)": [[0, "src.abstractions.data.Data.__init__"]], "__init__() (src.abstractions.data.datafilecollection method)": [[0, "src.abstractions.data.DataFileCollection.__init__"]], "all_files() (src.abstractions.data.datafilecollection method)": [[0, "src.abstractions.data.DataFileCollection.all_files"]], "all_passages() (src.abstractions.data.data method)": [[0, "src.abstractions.data.Data.all_passages"]], "all_passages() (src.abstractions.data.datafilecollection method)": [[0, "src.abstractions.data.DataFileCollection.all_passages"]], "always_force_rewrite (src.abstractions.data.datafilecollection attribute)": [[0, "src.abstractions.data.DataFileCollection.always_force_rewrite"]], "convert_to_data() (src.abstractions.data.datafilecollection method)": [[0, "src.abstractions.data.DataFileCollection.convert_to_Data"]], "copy() (src.abstractions.data.data method)": [[0, "src.abstractions.data.Data.copy"]], "copy() (src.abstractions.data.datafilecollection method)": [[0, "src.abstractions.data.DataFileCollection.copy"]], "manage_llama_factory_registration() (src.abstractions.data.data method)": [[0, "src.abstractions.data.Data.manage_llama_factory_registration"]], "save_permanent() (src.abstractions.data.datafilecollection method)": [[0, "src.abstractions.data.DataFileCollection.save_permanent"]], "save_permanent_and_register() (src.abstractions.data.data method)": [[0, "src.abstractions.data.Data.save_permanent_and_register"]], "set_key_fields() (src.abstractions.data.data method)": [[0, "src.abstractions.data.Data.set_key_fields"]], "transform() (src.abstractions.data.data method)": [[0, "src.abstractions.data.Data.transform"]], "transform() (src.abstractions.data.datafilecollection method)": [[0, "src.abstractions.data.DataFileCollection.transform"]], "examineebase (class in benchmark.framework)": [[1, "benchmark.framework.ExamineeBase"]], "extrapolativedpoexaminee (class in algorithms.extrapolative_dpo)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee"]], "extrapolativerlhfexaminee (class in algorithms.extrapolative_rlhf)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee"]], "lifelongdpoexaminee (class in algorithms.lifelong_dpo)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee"]], "lifelongrlhfexaminee (class in algorithms.lifelong_rlhf)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee"]], "__init__() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.__init__"]], "get_current_model() (algorithms.extrapolative_dpo.extrapolativedpoexaminee method)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee.get_current_model"]], "get_current_model() (algorithms.extrapolative_rlhf.extrapolativerlhfexaminee method)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee.get_current_model"]], "get_current_model() (algorithms.lifelong_dpo.lifelongdpoexaminee method)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee.get_current_model"]], "get_current_model() (algorithms.lifelong_rlhf.lifelongrlhfexaminee method)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee.get_current_model"]], "get_current_model() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.get_current_model"]], "query_from_judge() (algorithms.extrapolative_dpo.extrapolativedpoexaminee method)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee.query_from_judge"]], "query_from_judge() (algorithms.extrapolative_rlhf.extrapolativerlhfexaminee method)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee.query_from_judge"]], "query_from_judge() (algorithms.lifelong_dpo.lifelongdpoexaminee method)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee.query_from_judge"]], "query_from_judge() (algorithms.lifelong_rlhf.lifelongrlhfexaminee method)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee.query_from_judge"]], "query_from_judge() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.query_from_judge"]], "reset() (algorithms.extrapolative_dpo.extrapolativedpoexaminee method)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee.reset"]], "reset() (algorithms.extrapolative_rlhf.extrapolativerlhfexaminee method)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee.reset"]], "reset() (algorithms.lifelong_dpo.lifelongdpoexaminee method)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee.reset"]], "reset() (algorithms.lifelong_rlhf.lifelongrlhfexaminee method)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee.reset"]], "reset() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.reset"]], "run() (algorithms.extrapolative_dpo.extrapolativedpoexaminee method)": [[1, "algorithms.extrapolative_dpo.ExtrapolativeDPOExaminee.run"]], "run() (algorithms.extrapolative_rlhf.extrapolativerlhfexaminee method)": [[1, "algorithms.extrapolative_rlhf.ExtrapolativeRLHFExaminee.run"]], "run() (algorithms.lifelong_dpo.lifelongdpoexaminee method)": [[1, "algorithms.lifelong_dpo.LifelongDPOExaminee.run"]], "run() (algorithms.lifelong_rlhf.lifelongrlhfexaminee method)": [[1, "algorithms.lifelong_rlhf.LifelongRLHFExaminee.run"]], "run() (benchmark.framework.examineebase method)": [[1, "benchmark.framework.ExamineeBase.run"]], "coevolvejudge (class in challenges.coevolve)": [[2, "challenges.coevolve.CoevolveJudge"]], "followjudge (class in challenges.follow)": [[2, "challenges.follow.FollowJudge"]], "judgebase (class in benchmark.framework)": [[2, "benchmark.framework.JudgeBase"]], "predictjudge (class in challenges.predict)": [[2, "challenges.predict.PredictJudge"]], "__init__() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.__init__"]], "eval_snapshot() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.eval_snapshot"]], "eval_snapshot() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.eval_snapshot"]], "eval_snapshot() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.eval_snapshot"]], "eval_snapshot() (challenges.predict.predictjudge method)": [[2, "challenges.predict.PredictJudge.eval_snapshot"]], "interpret_result() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.interpret_result"]], "interpret_result() (challenges.coevolve.coevolvejudge class method)": [[2, "challenges.coevolve.CoevolveJudge.interpret_result"]], "interpret_result() (challenges.follow.followjudge class method)": [[2, "challenges.follow.FollowJudge.interpret_result"]], "produce_final_result() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.produce_final_result"]], "produce_final_result() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.produce_final_result"]], "produce_final_result() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.produce_final_result"]], "query_from_examinee() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.query_from_examinee"]], "query_from_examinee() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.query_from_examinee"]], "query_from_examinee() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.query_from_examinee"]], "query_from_examinee() (challenges.predict.predictjudge method)": [[2, "challenges.predict.PredictJudge.query_from_examinee"]], "reset() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.reset"]], "reset() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.reset"]], "reset() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.reset"]], "reset() (challenges.predict.predictjudge method)": [[2, "challenges.predict.PredictJudge.reset"]], "test() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.test"]], "tick() (benchmark.framework.judgebase method)": [[2, "benchmark.framework.JudgeBase.tick"]], "tick() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.tick"]], "tick() (challenges.follow.followjudge method)": [[2, "challenges.follow.FollowJudge.tick"]], "tick() (challenges.predict.predictjudge method)": [[2, "challenges.predict.PredictJudge.tick"]], "update_human_proxy() (challenges.coevolve.coevolvejudge method)": [[2, "challenges.coevolve.CoevolveJudge.update_human_proxy"]], "model (class in src.abstractions.model)": [[3, "src.abstractions.model.Model"]], "__init__() (src.abstractions.model.model method)": [[3, "src.abstractions.model.Model.__init__"]], "copy() (src.abstractions.model.model method)": [[3, "src.abstractions.model.Model.copy"]], "deep_copy() (src.abstractions.model.model method)": [[3, "src.abstractions.model.Model.deep_copy"]], "evaluate() (src.abstractions.model.model method)": [[3, "src.abstractions.model.Model.evaluate"]], "finetune() (src.abstractions.model.model method)": [[3, "src.abstractions.model.Model.finetune"]], "free_gpu_memory() (src.abstractions.model.model method)": [[3, "src.abstractions.model.Model.free_gpu_memory"]], "inference() (src.abstractions.model.model method)": [[3, "src.abstractions.model.Model.inference"]], "save_permanent() (src.abstractions.model.model method)": [[3, "src.abstractions.model.Model.save_permanent"]], "run_benchmark() (in module run_benchmark)": [[6, "run_benchmark.run_benchmark"]]}})
\ No newline at end of file
diff --git a/docs/usage.html b/docs/usage.html
index 877c2ef..f0d11e4 100644
--- a/docs/usage.html
+++ b/docs/usage.html
@@ -178,7 +178,7 @@
           <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
         </button>
       </div>
-      <label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
+      <label class="toc-overlay-icon toc-header-icon" for="__toc">
         <div class="visually-hidden">Toggle table of contents sidebar</div>
         <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
       </label>
@@ -243,7 +243,7 @@
               <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
+          <label class="toc-overlay-icon toc-content-icon" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
             <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
           </label>
@@ -251,10 +251,15 @@
         <article role="main" id="furo-main-content">
           <section id="quickstart-implementing-your-own-experiment">
 <h1>Quickstart: Implementing your own experiment<a class="headerlink" href="#quickstart-implementing-your-own-experiment" title="Permalink to this heading">¶</a></h1>
+<section id="general-instructions">
+<h2>General Instructions<a class="headerlink" href="#general-instructions" title="Permalink to this heading">¶</a></h2>
 <p>To run the benchmark for your own alignment algorithm and assess its temporal alignment
-abilities, you have to implement your own algorithm as a subclass of <a class="reference internal" href="Examinee.html#benchmark.framework.ExamineeBase" title="benchmark.framework.ExamineeBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">benchmark.framework.ExamineeBase</span></code></a>. Implement it
-in a new <code class="docutils literal notranslate"><span class="pre">.py</span></code> file in <code class="docutils literal notranslate"><span class="pre">algorithm</span></code>, then refer to <a class="reference internal" href="running.html"><span class="doc">Quickstart: Running the benchmark</span></a> for guidance on how to run the benchmark on the examinee you implemented.</p>
-<p>Feeling daunted? Dom’t worry. Read through the following descriptions of <a class="reference internal" href="Examinee.html#benchmark.framework.ExamineeBase" title="benchmark.framework.ExamineeBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">benchmark.framework.ExamineeBase</span></code></a>, and you’ll understand the overall workings of the examinee class in our framework.</p>
+abilities, you have to implement your own algorithm as a subclass of <a class="reference internal" href="Examinee.html#benchmark.framework.ExamineeBase" title="benchmark.framework.ExamineeBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">benchmark.framework.ExamineeBase</span></code></a>.</p>
+<p>You could either implement it in a new <code class="docutils literal notranslate"><span class="pre">.py</span></code> file in <code class="docutils literal notranslate"><span class="pre">algorithm</span></code> and use the command-line interface for running it, or by directly implementing a subclass of <a class="reference internal" href="Examinee.html#benchmark.framework.ExamineeBase" title="benchmark.framework.ExamineeBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">benchmark.framework.ExamineeBase</span></code></a> in your own script and use the Python API for running it. Refer to <a class="reference internal" href="running.html"><span class="doc">Quickstart: Running the benchmark</span></a> for guidance on how to run the benchmark on the examinee you implemented.</p>
+</section>
+<section id="implementation-instructions">
+<h2>Implementation Instructions<a class="headerlink" href="#implementation-instructions" title="Permalink to this heading">¶</a></h2>
+<p>Feeling daunted? Don’t worry. Read through the following descriptions of <a class="reference internal" href="Examinee.html#benchmark.framework.ExamineeBase" title="benchmark.framework.ExamineeBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">benchmark.framework.ExamineeBase</span></code></a>, and you’ll understand the overall workings of the examinee class in our framework.</p>
 <dl class="py class">
 <dt class="sig sig-object py">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">benchmark.framework.</span></span><span class="sig-name descname"><span class="pre">ExamineeBase</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span></dt>
@@ -398,6 +403,7 @@ <h1>Quickstart: Implementing your own experiment<a class="headerlink" href="#qui
 <span class="go">  --output_filename=dummy_debugging_run</span>
 </pre></div>
 </div>
+</section>
 </section>
 
         </article>
@@ -443,8 +449,28 @@ <h1>Quickstart: Implementing your own experiment<a class="headerlink" href="#qui
         
       </footer>
     </div>
-    <aside class="toc-drawer no-toc">
+    <aside class="toc-drawer">
+      
       
+      <div class="toc-sticky toc-scroll">
+        <div class="toc-title-container">
+          <span class="toc-title">
+            On this page
+          </span>
+        </div>
+        <div class="toc-tree-container">
+          <div class="toc-tree">
+            <ul>
+<li><a class="reference internal" href="#">Quickstart: Implementing your own experiment</a><ul>
+<li><a class="reference internal" href="#general-instructions">General Instructions</a></li>
+<li><a class="reference internal" href="#implementation-instructions">Implementation Instructions</a></li>
+</ul>
+</li>
+</ul>
+
+          </div>
+        </div>
+      </div>
       
       
     </aside>
diff --git a/src/abstractions/data.py b/src/abstractions/data.py
index eedc6cb..a47038a 100644
--- a/src/abstractions/data.py
+++ b/src/abstractions/data.py
@@ -101,10 +101,10 @@ def __init__(
         :type data_content: List[Dict] = None
 
         Examples:
-        .. code-block:: python
-        
-            Data('c4_demo', data_type = 'sft', data_path = './libs/llama_factory/data/c4_demo.json')
-            Data('c4_demo', data_type = 'sft')
+            .. code-block:: python
+            
+                Data('c4_demo', data_type = 'sft', data_path = './libs/llama_factory/data/c4_demo.json')
+                Data('c4_demo', data_type = 'sft')
         
         """
         # if data_name in Data.name2data:
@@ -362,10 +362,10 @@ def set_key_fields(
         :type suppress_registration_update: bool = False
 
         Example:
-        .. code-block:: python
+            .. code-block:: python
 
-            data.set_key_fields(prompt_field_name='content') # for pretraining dataset stored in content field
-            data.set_key_fields(prompt_field_name='instruction', query_field_name='input', response_field_name='output') # for QA dataset with system prompt
+                data.set_key_fields(prompt_field_name='content') # for pretraining dataset stored in content field
+                data.set_key_fields(prompt_field_name='instruction', query_field_name='input', response_field_name='output') # for QA dataset with system prompt
         
         """
         if not suppress_registration_update:
@@ -501,12 +501,13 @@ def __init__(
         You may want to exclude undated.json using file_selection_func. That file is huge.
 
         Example:
-        .. code-block:: python
-            
-            DataFileCollection(collection_name='histtext_1826_to_2018',
-                               data_type='pretrain',
-                               collection_path = './dataset/dataset_text_sequence/',
-                               file_selection_func = (lambda path: 1826 <= int(path.split('/')[-1][1:6]) <= 2018))
+            .. code-block:: python
+                
+                DataFileCollection(collection_name='histtext_1826_to_2018',
+                                data_type='pretrain',
+                                collection_path = './dataset/dataset_text_sequence/',
+                                file_selection_func = (lambda path: 1826 <= int(path.split('/')[-1][1:6]) <= 2018))
+        
         """
         # if collection_name in DataFileCollection.name2collection:
         #     warnings.warn(f'The collection name {collection_name} is already in use.')
diff --git a/src/abstractions/model.py b/src/abstractions/model.py
index b653e55..dbeb4a7 100644
--- a/src/abstractions/model.py
+++ b/src/abstractions/model.py
@@ -280,10 +280,11 @@ def __init__(
         :type template_type: str = "alpaca"
         
         Examples:
-        .. code-block:: python
+            .. code-block:: python
+            
+                Model(model_name = 'Gemma-2B_sft', is_instruct_finetuned = True, model_path = './output/training_results/Gemma-2B_sft/')
+                Model(model_name = 'Gemma-2B_sft', is_instruct_finetuned = True)
         
-            Model(model_name = 'Gemma-2B_sft', is_instruct_finetuned = True, model_path = './output/training_results/Gemma-2B_sft/')
-            Model(model_name = 'Gemma-2B_sft', is_instruct_finetuned = True)
         """
         if not num_gpus:
             num_gpus = torch.cuda.device_count()