[air] Move to new storage_path API in tests and examples (ray-project…

…#34263) Following ray-project#33463, this PR updates our tests, examples, and docs to use the new `storage_path` API. The only locations where we continue to use the `local_dir` statement are tests where we specify both a local dir and a remote dir. For these tests, we can move to an environment-variable based wrapper in the future. Signed-off-by: Kai Fricke <[email protected]> Signed-off-by: elliottower <[email protected]>
elliottower · Apr 22, 2023 · 074e976 · 074e976
1 parent dba4144
commit 074e976
Show file tree

Hide file tree

Showing 47 changed files with 236 additions and 251 deletions.
diff --git a/doc/source/ray-air/doc_code/tuner.py b/doc/source/ray-air/doc_code/tuner.py
@@ -104,7 +104,7 @@
 
 tuner = Tuner(
     trainable=trainer,
-    run_config=RunConfig(name="test_tuner", local_dir="~/ray_results"),
+    run_config=RunConfig(name="test_tuner", storage_path="~/ray_results"),
     param_space=param_space,
     tune_config=tune.TuneConfig(
         mode="min", metric="loss", num_samples=2, max_concurrent_trials=2
@@ -215,14 +215,13 @@ def get_another_dataset():
 # __result_grid_inspection_end__
 
 # __run_config_start__
-from ray import air, tune
+from ray import air
 from ray.air.config import RunConfig
 
 run_config = RunConfig(
     name="MyExperiment",
-    local_dir="./your_log_directory/",
+    storage_path="s3://...",
     verbose=2,
-    sync_config=tune.SyncConfig(upload_dir="s3://..."),
     checkpoint_config=air.CheckpointConfig(checkpoint_frequency=2),
 )
 # __run_config_end__

diff --git a/doc/source/ray-air/examples/batch_forecasting.ipynb b/doc/source/ray-air/examples/batch_forecasting.ipynb
@@ -1411,7 +1411,7 @@
     "    ),\n",
     "    run_config=air.RunConfig(\n",
     "        # Redirect logs to relative path instead of default ~/ray_results/.\n",
-    "        local_dir=\"my_Tune_logs\",\n",
+    "        storage_path=\"my_Tune_logs\",\n",
     "        # Specify name to make logs easier to find in log path.\n",
     "        name=\"ptf_nyc\",\n",
     "    ),\n",

diff --git a/doc/source/ray-air/examples/batch_tuning.ipynb b/doc/source/ray-air/examples/batch_tuning.ipynb
@@ -667,7 +667,7 @@
     "    param_space=search_space,\n",
     "    run_config=air.RunConfig(\n",
     "        # redirect logs to relative path instead of default ~/ray_results/\n",
-    "        local_dir=\"my_Tune_logs\",\n",
+    "        storage_path=\"my_Tune_logs\",\n",
     "        name=\"batch_tuning\",\n",
     "        # Set Ray Tune verbosity. Print summary table only with levels 2 or 3.\n",
     "        verbose=2,\n",

diff --git a/doc/source/train/doc_code/key_concepts.py b/doc/source/train/doc_code/key_concepts.py
@@ -102,8 +102,8 @@ def train_fn(config):
 run_config = RunConfig(
     # Name of the training run (directory name).
     name="my_train_run",
-    # Directory to store results in (will be local_dir/name).
-    local_dir="~/ray_results",
+    # Directory to store results in (will be storage_path/name).
+    storage_path="~/ray_results",
     # Low training verbosity.
     verbose=1,
 )
@@ -125,10 +125,8 @@ def train_fn(config):
 from ray.tune import SyncConfig
 
 run_config = RunConfig(
-    sync_config=SyncConfig(
-        # This will store checkpoints on S3.
-        upload_dir="s3://remote-bucket/location"
-    )
+    # This will store checkpoints on S3.
+    storage_path="s3://remote-bucket/location"
 )
 # __sync_config_end__
 

diff --git a/doc/source/train/examples/lightning/lightning_mnist_example.ipynb b/doc/source/train/examples/lightning/lightning_mnist_example.ipynb
@@ -290,7 +290,7 @@
     "\n",
     "run_config = RunConfig(\n",
     "    name=\"ptl-mnist-example\",\n",
-    "    local_dir=\"/tmp/ray_results\",\n",
+    "    storage_path=\"/tmp/ray_results\",\n",
     "    checkpoint_config=CheckpointConfig(\n",
     "        num_to_keep=3,\n",
     "        checkpoint_score_attribute=\"val_accuracy\",\n",

diff --git a/doc/source/train/examples/pytorch/pytorch_resnet_finetune.ipynb b/doc/source/train/examples/pytorch/pytorch_resnet_finetune.ipynb
@@ -355,7 +355,7 @@
     "# Set experiment name and checkpoint configs\n",
     "run_config = RunConfig(\n",
     "    name=\"finetune-resnet\",\n",
-    "    local_dir=\"/tmp/ray_results\",\n",
+    "    storage_path=\"/tmp/ray_results\",\n",
     "    checkpoint_config=checkpoint_config,\n",
     ")\n"
    ]
@@ -628,7 +628,7 @@
     "## Load the checkpoint for prediction:\n",
     "\n",
     " \n",
-    " The metadata and checkpoints have already been saved in the `local_dir` specified in TorchTrainer:"
+    " The metadata and checkpoints have already been saved in the `storage_path` specified in TorchTrainer:"
    ]
   },
   {

diff --git a/doc/source/train/faq.rst b/doc/source/train/faq.rst
@@ -44,8 +44,8 @@ Since this is applicable to all of Ray Train's built-in trainers,
 we'll use `FrameworkTrainer` to refer to a generic trainer for the remainder of this answer.
 
 To restore an experiment, first find the experiment directory that your previous
-run was saved to. If you saved locally, this will look like ``{local_dir}/{name}``,
-where ``local_dir`` may be ``~/ray_results``, and ``name`` is something
+run was saved to. If you saved locally, this will look like ``{storage_path}/{name}``,
+where ``storage_path`` may be ``~/ray_results``, and ``name`` is something
 like ``FrameworkTrainer_2023-xxx``.
 
 Note that these are the same parameters that you pass through :class:`~ray.air.RunConfig`.
@@ -108,7 +108,7 @@ to determine the existence/validity of the given experiment directory.
             scaling_config=air.ScalingConfig(num_workers=2, use_gpu=False),
             run_config=air.RunConfig(
                 name=experiment_name,
-                local_dir="~/ray_results",
+                storage_path="~/ray_results",
                 failure_config=air.FailureConfig(max_failures=3),
                 stop={"training_iteration": 10},
             ),

diff --git a/doc/source/tune/api/suggestion.rst b/doc/source/tune/api/suggestion.rst
@@ -84,7 +84,7 @@ identifier:
         ),
         run_config=air.RunConfig(
             name="my-experiment-1",
-            local_dir="~/my_results",
+            storage_path="~/my_results",
         )
     )
     results = tuner_1.fit()

diff --git a/doc/source/tune/doc_code/faq.py b/doc/source/tune/doc_code/faq.py
@@ -223,7 +223,7 @@ def trainable(config):
 # __torch_seed_example_end__
 
 # __large_data_start__
-from ray import tune, air
+from ray import air, tune
 import numpy as np
 
 
@@ -244,7 +244,7 @@ def f(config, data=None):
     # __log_1_start__
     tuner = tune.Tuner(
         MyTrainableClass,
-        sync_config=tune.SyncConfig(upload_dir="s3://my-log-dir"),
+        run_config=air.RunConfig(storage_path="s3://my-log-dir"),
     )
     tuner.fit()
     # __log_1_end__
@@ -268,9 +268,7 @@ def delete(self, remote_dir: str) -> bool:
 
     tuner = tune.Tuner(
         MyTrainableClass,
-        sync_config=tune.SyncConfig(
-            upload_dir="s3://my-log-dir", syncer=CustomSyncer()
-        ),
+        run_config=air.RunConfig(storage_path="s3://my-log-dir"),
     )
     tuner.fit()
     # __log_2_end__
@@ -344,7 +342,6 @@ def wait(self):
             sync_down_template="aws s3 sync {source} {target}",
             delete_template="aws s3 rm {target} --recursive",
         ),
-        upload_dir="s3://bucket/path",
     )
     # __custom_command_syncer_end__
 
@@ -356,7 +353,7 @@ def wait(self):
     tuner = tune.Tuner(
         train_fn,
         # ...,
-        sync_config=tune.SyncConfig(upload_dir="s3://your-s3-bucket/durable-trial/"),
+        run_config=air.RunConfig(storage_path="s3://your-s3-bucket/durable-trial/"),
     )
     tuner.fit()
     # __s3_end__
@@ -367,7 +364,7 @@ def wait(self):
     tuner = tune.Tuner(
         train_fn,
         run_config=air.RunConfig(
-            local_dir="/path/to/shared/storage",
+            storage_path="/path/to/shared/storage",
         ),
         sync_config=tune.SyncConfig(
             # Do not sync because we are on shared storage

diff --git a/doc/source/tune/examples/pbt_guide.ipynb b/doc/source/tune/examples/pbt_guide.ipynb
@@ -199,7 +199,7 @@
     "            checkpoint_score_attribute=\"mean_accuracy\",\n",
     "            num_to_keep=4,\n",
     "        ),\n",
-    "        local_dir=\"/tmp/ray_results\",\n",
+    "        storage_path=\"/tmp/ray_results\",\n",
     "    ),\n",
     "    tune_config=tune.TuneConfig(\n",
     "        scheduler=scheduler,\n",

diff --git a/doc/source/tune/examples/tune-aim.ipynb b/doc/source/tune/examples/tune-aim.ipynb
@@ -262,7 +262,7 @@
                 "    train_function,\n",
                 "    run_config=air.RunConfig(\n",
                 "        callbacks=[AimLoggerCallback()],\n",
-                "        local_dir=\"/tmp/ray_results\",\n",
+                "        storage_path=\"/tmp/ray_results\",\n",
                 "        name=\"aim_example\",\n",
                 "    ),\n",
                 "    param_space={\n",

diff --git a/doc/source/tune/examples/tune_analyze_results.ipynb b/doc/source/tune/examples/tune_analyze_results.ipynb
@@ -36,7 +36,7 @@
     "from ray.tune.examples.mnist_pytorch import train_mnist\n",
     "from ray.tune import ResultGrid\n",
     "\n",
-    "local_dir = \"/tmp/ray_results\"\n",
+    "storage_path = \"/tmp/ray_results\"\n",
     "exp_name = \"tune_analyzing_results\"\n",
     "tuner = tune.Tuner(\n",
     "    train_mnist,\n",
@@ -52,7 +52,7 @@
     "            checkpoint_score_attribute=\"mean_accuracy\",\n",
     "            num_to_keep=5,\n",
     "        ),\n",
-    "        local_dir=local_dir,\n",
+    "        storage_path=storage_path,\n",
     "    ),\n",
     "    tune_config=tune.TuneConfig(mode=\"max\", metric=\"mean_accuracy\", num_samples=3),\n",
     ")\n",
@@ -91,7 +91,7 @@
     }
    ],
    "source": [
-    "experiment_path = f\"{local_dir}/{exp_name}\"\n",
+    "experiment_path = f\"{storage_path}/{exp_name}\"\n",
     "print(f\"Loading results from {experiment_path}...\")\n",
     "\n",
     "restored_tuner = tune.Tuner.restore(experiment_path, trainable=train_mnist)\n",

diff --git a/doc/source/tune/faq.rst b/doc/source/tune/faq.rst
@@ -476,7 +476,7 @@ not kept open by Ray Tune.
    logs and checkpoints will not be synced to the driver, so if you need to access them later, you will have to
    transfer them where you need them manually.
 
-2. You can use :ref:`cloud checkpointing <tune-cloud-checkpointing>` to save logs and checkpoints to a specified `upload_dir`.
+2. You can use :ref:`cloud checkpointing <tune-cloud-checkpointing>` to save logs and checkpoints to a specified `storage_path`.
    This is the preferred way to deal with this. All syncing will be taken care of automatically, as all nodes
    are able to access the cloud storage. Additionally, your results will be safe, so even when you're working on
    pre-emptible instances, you won't lose any of your data.
@@ -592,7 +592,7 @@ be automatically fetched and passed to your trainable as a parameter.
 How can I upload my Tune results to cloud storage?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-If an upload directory is provided, Tune will automatically sync results from the ``local_dir`` to the given directory,
+If an upload directory is provided, Tune will automatically sync results from the ``RAY_AIR_LOCAL_CACHE_DIR`` to the given directory,
 natively supporting standard URIs for systems like S3, gsutil or HDFS. You can add more filesystems by installing
 `fs-spec <https://filesystem-spec.readthedocs.io/en/latest/>`_-compatible filesystems e.g. using pip.
 

diff --git a/doc/source/tune/tutorials/tune-distributed.rst b/doc/source/tune/tutorials/tune-distributed.rst
@@ -52,7 +52,7 @@ Analyze your results on TensorBoard by starting TensorBoard on the remote head m
     ray exec tune-default.yaml 'tensorboard --logdir=~/ray_results/ --port 6006' --port-forward 6006
 
 
-Note that you can customize the directory of results by specifying: ``air.RunConfig(local_dir=..)``, taken in by ``Tuner``. You can then point TensorBoard to that directory to visualize results. You can also use `awless <https://github.com/wallix/awless>`_ for easy cluster management on AWS.
+Note that you can customize the directory of results by specifying: ``air.RunConfig(storage_path=..)``, taken in by ``Tuner``. You can then point TensorBoard to that directory to visualize results. You can also use `awless <https://github.com/wallix/awless>`_ for easy cluster management on AWS.
 
 
 Running a Distributed Tune Experiment
@@ -101,8 +101,8 @@ Storage Options in a Distributed Tune Run
 -----------------------------------------
 
 In a distributed experiment, you should try to use :ref:`cloud checkpointing <tune-cloud-checkpointing>` to
-reduce synchronization overhead. For this, you just have to specify an ``upload_dir`` in the
-:class:`tune.SyncConfig <ray.tune.SyncConfig>`.
+reduce synchronization overhead. For this, you just have to specify a remote ``storage_path`` in the
+:class:`air.RunConfig <ray.air.RunConfig>`.
 
 `my_trainable` is a user-defined :ref:`Tune Trainable <tune_60_seconds_trainables>` in the following example:
 
@@ -114,10 +114,8 @@ reduce synchronization overhead. For this, you just have to specify an ``upload_
     tuner = tune.Tuner(
         my_trainable,
         run_config=air.RunConfig(
-            name="experiment_name"
-            sync_config=tune.SyncConfig(
-                upload_dir="s3://bucket-name/sub-path/"
-            )
+            name="experiment_name",
+            storage_path="s3://bucket-name/sub-path/",
         )
     )
     tuner.fit()
@@ -214,7 +212,7 @@ To summarize, here are the commands to run:
 
 You should see Tune eventually continue the trials on a different worker node. See the :ref:`Fault Tolerance <tune-fault-tol>` section for more details.
 
-You can also specify ``sync_config=tune.SyncConfig(upload_dir=...)``, as part of ``air.RunConfig``, which is taken in by ``Tuner``, to sync results with a cloud storage like S3, allowing you to persist results in case you want to start and stop your cluster automatically.
+You can also specify ``storage_path=...``, as part of ``air.RunConfig``, which is taken in by ``Tuner``, to upload results to cloud storage like S3, allowing you to persist results in case you want to start and stop your cluster automatically.
 
 .. _tune-fault-tol:
 
@@ -256,8 +254,8 @@ Below are some commonly used commands for submitting experiments. Please see the
 
     # Start a cluster and run an experiment in a detached tmux session,
     # and shut down the cluster as soon as the experiment completes.
-    # In `tune_experiment.py`, set `tune.SyncConfig(upload_dir="s3://...")`
-    # and pass it to `sync_config=...` to persist results
+    # In `tune_experiment.py`, set `air.RunConfig(storage_path="s3://...")`
+    # to persist results
     $ ray submit CLUSTER.YAML --tmux --start --stop tune_experiment.py -- --address=localhost:6379
 
     # To start or update your cluster:

diff --git a/doc/source/tune/tutorials/tune-output.rst b/doc/source/tune/tutorials/tune-output.rst
@@ -21,7 +21,7 @@ Tune will log the results of each trial to a sub-folder under a specified local
     tuner = tune.Tuner(trainable, run_config=air.RunConfig(num_samples=2))
     results = tuner.fit()
 
-You can specify the ``local_dir`` and ``trainable_name``:
+You can specify the ``storage_path`` and ``trainable_name``:
 
 .. code-block:: python
 
@@ -30,7 +30,7 @@ You can specify the ``local_dir`` and ``trainable_name``:
     # Only trial_name is autogenerated.
     tuner = tune.Tuner(trainable,
         tune_config=tune.TuneConfig(num_samples=2),
-        run_config=air.RunConfig(local_dir="./results", name="test_experiment"))
+        run_config=air.RunConfig(storage_path="./results", name="test_experiment"))
     results = tuner.fit()