From bdca157ccead64378e826ec691ba26893bfc6849 Mon Sep 17 00:00:00 2001
From: Naomi Pentrel <5212232+npentrel@users.noreply.github.com>
Date: Wed, 23 Oct 2024 11:13:14 +0200
Subject: [PATCH 1/2] DOCS-2887: Support command line args for training scripts

---
 docs/cli.md                                    |  9 +++++----
 docs/how-tos/create-custom-training-scripts.md | 12 ++++++++----
 2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/docs/cli.md b/docs/cli.md
index 084e46b3cd..0b82a71205 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -1295,8 +1295,8 @@ Use a training script to train an ML model on data.
 
 ```sh {class="command-line" data-prompt="$"}
 viam train submit managed --dataset-id=<dataset-id> --model-org-id=<model-org-id> --model-name=<model-name> --model-type=<model-type> --model-labels=<model-labels> [...named args]
-viam train submit custom from-registry --dataset-id=<dataset-id> --org-id=<org-id> --model-name=<model-name> --script-name=<script-name> --version=<version> [...named args]
-viam train submit custom with-upload --dataset-id=<dataset-id> --org-id=<org-id> --model-name=<model-name> --path=<path> --script-name=<script-name> [...named args]
+viam train submit custom from-registry --dataset-id=<dataset-id> --org-id=<org-id> --model-name=<model-name> --script-name=<script-name> --version=<version> --args=<arg-key>=<arg-value> [...named args]
+viam train submit custom with-upload --dataset-id=<dataset-id> --org-id=<org-id> --model-name=<model-name> --path=<path> --script-name=<script-name> --args=<arg-key>=<arg-value> [...named args]
 viam train get --job-id=<job-id>
 viam train logs --job-id=<job-id>
 viam train cancel --job-id=<job-id>
@@ -1310,10 +1310,10 @@ Examples:
 viam train submit managed --dataset-id=456 --model-org-id=123 --model-name=MyCoolClassifier --model-type=single_label_classification --model-labels=1,2,3
 
 # submit custom training job with an existing training script in the Registry on data in Viam cloud
-viam train submit custom from-registry --dataset-id=<INSERT DATASET ID> --org-id=<INSERT ORG ID> --model-name=MyRegistryModel --model-version=2 --version=1 --script-name=mycompany:MyCustomTrainingScript
+viam train submit custom from-registry --dataset-id=<INSERT DATASET ID> --org-id=<INSERT ORG ID> --model-name=MyRegistryModel --model-version=2 --version=1 --script-name=mycompany:MyCustomTrainingScript  --args=num_epochs=3,model_type=multi_label
 
 # submit custom training job with an uploaded training script on data in Viam cloud
-viam train submit custom with-upload --dataset-id=<INSERT DATASET ID> --model-org-id=<INSERT ORG ID> --model-name=MyRegistryModel --model-type=single_label_classification --model-version=2 --version=1 --path=<path-to-tar.gz> --script-name=mycompany:MyCustomTrainingScript
+viam train submit custom with-upload --dataset-id=<INSERT DATASET ID> --model-org-id=<INSERT ORG ID> --model-name=MyRegistryModel --model-type=single_label_classification --model-version=2 --version=1 --path=<path-to-tar.gz> --script-name=mycompany:MyCustomTrainingScript --args=num_epochs=3,labels="'green_square blue_star'"
 
 # get a training job from Viam cloud based on training job ID
 viam train get --job-id=123
@@ -1373,6 +1373,7 @@ viam train list --org-id=123 --job-status=completed
 | `--job-id` | The ID of the training job to get or cancel. You can retrieve this value with `train list`. | `get`, `logs`, `cancel` | **Required** |
 | `--job-status` | Training status to filter for. Can be one of `canceled`, `canceling`, `completed`, `failed`, `in_progress`, `pending`, or `unspecified`. | `list` | **Required** |
 | `--framework` | Framework of the ML training script to upload, can be `tflite`, `tensorflow`, `pytorch`, or `onnx`. | `submit custom with-upload` | Optional |
+| `--args` | Pass custom comma-separated arguments to the training script. Example: `num_epochs=3,model_type=multi_label`. To include whitespace, enclose the value with whitespace in single and double quotes. Example: `num_epochs=3,labels="'green_square blue_star'"`. | `submit custom from-registry`, `submit custom with-upload` | Optional |
 
 ### `version`
 
diff --git a/docs/how-tos/create-custom-training-scripts.md b/docs/how-tos/create-custom-training-scripts.md
index 75bc8874fc..056d934448 100644
--- a/docs/how-tos/create-custom-training-scripts.md
+++ b/docs/how-tos/create-custom-training-scripts.md
@@ -310,6 +310,8 @@ The script you are creating must take the following command line inputs:
 
 The `parse_args()` function in the template parses your arguments.
 
+You can add additional custom commandline inputs by adding them to the `parse_args()` function.
+
 {{% /expand %}}
 
 {{% expand "Click for more information on parsing annotations from dataset file." %}}
@@ -452,7 +454,7 @@ You can export one of your Viam datasets to test your training script locally.
 
 You can get the dataset id from the dataset page or using the [`viam dataset list`](/cli/#dataset) command:
 
-```sh {class="command-line" data-prompt="$" data-output="1-10"}
+```sh {class="command-line" data-prompt="$"}
 viam dataset export --destination=<destination> --dataset-id=<dataset-id> --include-jsonl=true
 ```
 
@@ -465,8 +467,9 @@ Use the `parse_filenames_and_labels_from_json` and `parse_filenames_and_bboxes_f
 
 Install any required dependencies and run your training script specifying the path to the <FILE>dataset.jsonl</FILE> file from your exported dataset:
 
-```sh {class="command-line" data-prompt="$" data-output="1-10"}
-python3 -m model.training --dataset_file=/path/to/dataset.jsonl --model_output_directory=.
+```sh {class="command-line" data-prompt="$"}
+python3 -m model.training --dataset_file=/path/to/dataset.jsonl \
+    --model_output_directory=. --args=custom_arg=3
 ```
 
 {{% /tablestep %}}
@@ -482,7 +485,7 @@ To be able to use your training script in the Viam platform, you must upload it
 
 Before you can upload your training script to Viam, you have to compress your project folder into a tar.gz file:
 
-```sh {class="command-line" data-prompt="$" data-output="1-10"}
+```sh {class="command-line" data-prompt="$"}
 tar -czvf my-training.tar.gz my-training/
 ```
 
@@ -564,6 +567,7 @@ viam train submit custom from-registry --dataset-id=<INSERT DATASET ID> \
   --org-id=<INSERT ORG ID> --model-name=MyRegistryModel \
   --model-version=2 --version=1 \
   --script-name=mycompany:MyCustomTrainingScript
+  --args=custom_arg1=3,custom_arg2="'green_square blue_star'"
 ```
 
 This command submits a training job to the previously uploaded `MyCustomTrainingScript` with another input dataset, which trains `MyRegistryModel` and publishes that to the registry.

From 8353fdb58b93da116061de8fdd7cb0d28b10cc15 Mon Sep 17 00:00:00 2001
From: Naomi Pentrel <5212232+npentrel@users.noreply.github.com>
Date: Thu, 24 Oct 2024 14:30:42 +0200
Subject: [PATCH 2/2] Apply suggestions from code review

Co-authored-by: JessamyT <75634662+JessamyT@users.noreply.github.com>
Co-authored-by: Vignesh P <52717428+vpandiarajan20@users.noreply.github.com>
---
 docs/how-tos/create-custom-training-scripts.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/how-tos/create-custom-training-scripts.md b/docs/how-tos/create-custom-training-scripts.md
index 056d934448..57ecb96553 100644
--- a/docs/how-tos/create-custom-training-scripts.md
+++ b/docs/how-tos/create-custom-training-scripts.md
@@ -310,7 +310,7 @@ The script you are creating must take the following command line inputs:
 
 The `parse_args()` function in the template parses your arguments.
 
-You can add additional custom commandline inputs by adding them to the `parse_args()` function.
+You can add additional custom command line inputs by adding them to the `parse_args()` function.
 
 {{% /expand %}}
 
@@ -452,7 +452,7 @@ You can export one of your Viam datasets to test your training script locally.
 {{% tablestep %}}
 **1. Export your dataset**
 
-You can get the dataset id from the dataset page or using the [`viam dataset list`](/cli/#dataset) command:
+You can get the dataset ID from the dataset page or using the [`viam dataset list`](/cli/#dataset) command:
 
 ```sh {class="command-line" data-prompt="$"}
 viam dataset export --destination=<destination> --dataset-id=<dataset-id> --include-jsonl=true
@@ -469,7 +469,7 @@ Install any required dependencies and run your training script specifying the pa
 
 ```sh {class="command-line" data-prompt="$"}
 python3 -m model.training --dataset_file=/path/to/dataset.jsonl \
-    --model_output_directory=. --args=custom_arg=3
+    --model_output_directory=. --custom_arg=3
 ```
 
 {{% /tablestep %}}