Make the accelerator EP optional for non-onnx pass (#1072)

## Describe your changes * Originally, the execution_providers in accelerator spec is required regardless whether it is used or not. Even if user doesn't specify them, the local system will infer and get the available EPs to autofill the EPs. This is painful for passes that doesn't use the EP. * In this PR, if there is no pass that belongs to onnx techniques, we will not infer the EP based on the device. * On the other hand, if the device is not specified, we will choose cpu by default. ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link
microsoft · Apr 24, 2024 · 3de60e0 · 3de60e0
1 parent feff02b
commit 3de60e0
Show file tree

Hide file tree

Showing 13 changed files with 433 additions and 180 deletions.
diff --git a/examples/open_llama/llama_qlora.json b/examples/open_llama/llama_qlora.json
@@ -14,10 +14,7 @@
             "config": {
                 "accelerators": [
                     {
-                        "device": "gpu",
-                        "execution_providers": [
-                            "CPUExecutionProvider"
-                        ]
+                        "device": "gpu"
                     }
                 ]
             }

diff --git a/examples/open_llama/open_llama_loftq_tinycodes.json b/examples/open_llama/open_llama_loftq_tinycodes.json
@@ -14,10 +14,7 @@
             "config": {
                 "accelerators": [
                     {
-                        "device": "gpu",
-                        "execution_providers": [
-                            "CPUExecutionProvider"
-                        ]
+                        "device": "gpu"
                     }
                 ]
             }

diff --git a/examples/open_llama/open_llama_lora_tinycodes.json b/examples/open_llama/open_llama_lora_tinycodes.json
@@ -14,10 +14,7 @@
             "config": {
                 "accelerators": [
                     {
-                        "device": "gpu",
-                        "execution_providers": [
-                            "CPUExecutionProvider"
-                        ]
+                        "device": "gpu"
                     }
                 ]
             }

diff --git a/examples/open_llama/open_llama_qlora_ort_tinycodes.json b/examples/open_llama/open_llama_qlora_ort_tinycodes.json
@@ -14,10 +14,7 @@
             "config": {
                 "accelerators": [
                     {
-                        "device": "gpu",
-                        "execution_providers": [
-                            "CPUExecutionProvider"
-                        ]
+                        "device": "gpu"
                     }
                 ]
             }

diff --git a/examples/open_llama/open_llama_qlora_tinycodes.json b/examples/open_llama/open_llama_qlora_tinycodes.json
@@ -14,10 +14,7 @@
             "config": {
                 "accelerators": [
                     {
-                        "device": "gpu",
-                        "execution_providers": [
-                            "CPUExecutionProvider"
-                        ]
+                        "device": "gpu"
                     }
                 ]
             }

diff --git a/examples/phi/phi_qlora_tinycodes.json b/examples/phi/phi_qlora_tinycodes.json
@@ -17,10 +17,7 @@
             "config": {
                 "accelerators": [
                     {
-                        "device": "gpu",
-                        "execution_providers": [
-                            "CPUExecutionProvider"
-                        ]
+                        "device": "gpu"
                     }
                 ]
             }

diff --git a/olive/hardware/accelerator.py b/olive/hardware/accelerator.py
diff --git a/olive/systems/docker/utils.py b/olive/systems/docker/utils.py
@@ -52,8 +52,10 @@ def create_evaluate_command(
         f"--output_path {output_path}",
         f"--output_name {output_name}",
         f"--accelerator_type {accelerator.accelerator_type}",
-        f"--execution_provider {accelerator.execution_provider}",
     ]
+    if accelerator.execution_provider:
+        parameters.append(f"--execution_provider {accelerator.execution_provider}")
+
     return f"python {eval_script_path} {' '.join(parameters)}"
 
 

diff --git a/olive/systems/utils/misc.py b/olive/systems/utils/misc.py
@@ -39,9 +39,9 @@ def create_managed_system(system_config: "SystemConfig", accelerator: "Accelerat
 
     # for host system, use the first available accelerator
     if accelerator:
-        accelerator_cfg = [
-            {"device": accelerator.accelerator_type, "execution_providers": [accelerator.execution_provider]}
-        ]
+        accelerator_cfg = [{"device": accelerator.accelerator_type}]
+        if accelerator.execution_provider:
+            accelerator_cfg[0]["execution_providers"] = [accelerator.execution_provider]
     else:
         accelerator_cfg = None
         accelerator = DEFAULT_CPU_ACCELERATOR
@@ -83,6 +83,7 @@ def create_managed_system(system_config: "SystemConfig", accelerator: "Accelerat
     elif system_config.type == SystemType.Docker:
         from olive.systems.docker import DockerSystem
 
+        assert accelerator.execution_provider, "Execution provider must be specified for Docker system"
         dockerfile = PROVIDER_DOCKERFILE_MAPPING.get(accelerator.execution_provider, "Dockerfile.cpu")
         # TODO(myguo): create a temp dir for the build context
         new_system = DockerSystem(
@@ -102,6 +103,7 @@ def create_managed_system(system_config: "SystemConfig", accelerator: "Accelerat
     elif system_config.type == SystemType.AzureML:
         from olive.systems.azureml import AzureMLSystem
 
+        assert accelerator.execution_provider, "Execution provider must be specified for Docker system"
         dockerfile = PROVIDER_DOCKERFILE_MAPPING.get(accelerator.execution_provider, "Dockerfile.cpu")
         temp_dir = tempfile.TemporaryDirectory()  # pylint: disable=consider-using-with
         build_context_path = Path(temp_dir.name)

diff --git a/olive/workflows/run/run.py b/olive/workflows/run/run.py
@@ -119,6 +119,16 @@ def get_pass_extras(pass_type):
         )
 
 
+def get_pass_module_path(pass_type: str, package_config: OlivePackageConfig) -> str:
+    pass_module_config = package_config.passes.get(pass_type)
+    return pass_module_config.module_path
+
+
+def is_execution_provider_required(run_config: RunConfig, package_config: OlivePackageConfig) -> bool:
+    passes = get_used_passes(run_config)
+    return any(get_pass_module_path(p.type, package_config).startswith("olive.passes.onnx") for p in passes)
+
+
 def run_engine(package_config: OlivePackageConfig, run_config: RunConfig, data_root: str = None):
     import onnxruntime as ort
 
@@ -147,6 +157,16 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig, data_r
 
         AzureMLSystem.olive_config = run_config.to_json()
 
+    auto_optimizer_enabled = (
+        not run_config.passes
+        and run_config.auto_optimizer_config is not None
+        and not run_config.auto_optimizer_config.disable_auto_optimizer
+    )
+    if auto_optimizer_enabled:
+        is_ep_required = True
+    else:
+        is_ep_required = is_execution_provider_required(run_config, package_config)
+
     # Register passes since we need to know whether they need to run on target
     used_passes = list(get_used_passes(run_config))
     for pass_config in used_passes:
@@ -166,15 +186,13 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig, data_r
             for pass_config in used_passes
         )
     )
-    accelerator_specs = create_accelerators(engine.target_config, skip_supported_eps_check=target_not_used)
+    accelerator_specs = create_accelerators(
+        engine.target_config, skip_supported_eps_check=target_not_used, is_ep_required=is_ep_required
+    )
 
     pass_list = []
     acc_list = []
-    if (
-        not run_config.passes
-        and run_config.auto_optimizer_config is not None
-        and not run_config.auto_optimizer_config.disable_auto_optimizer
-    ):
+    if auto_optimizer_enabled:
         # For auto optimizer, Olive generates passes and pass_flows for each accelerator
         # that means, the passes and pass_flows might be different for each accelerator
         for acc_spec in accelerator_specs:
@@ -333,7 +351,7 @@ def get_local_ort_packages() -> List[str]:
     return local_ort_packages
 
 
-def get_used_passes(run_config: RunConfig) -> Generator[RunPassConfig, None, None]:
+def get_used_passes(run_config: RunConfig) -> Generator["RunPassConfig", None, None]:
     if run_config.pass_flows:
         passes = set()
         for pass_flow in run_config.pass_flows:

diff --git a/test/unit_test/hardware/test_accelerator.py b/test/unit_test/hardware/test_accelerator.py
@@ -10,7 +10,7 @@
 import pytest
 
 from olive.common.config_utils import validate_config
-from olive.hardware.accelerator import AcceleratorLookup, AcceleratorSpec, create_accelerators, normalize_accelerators
+from olive.hardware.accelerator import AcceleratorLookup, AcceleratorNormalizer, AcceleratorSpec, create_accelerators
 from olive.systems.common import AcceleratorConfig, SystemType
 from olive.systems.python_environment.python_environment_system import PythonEnvironmentSystem
 from olive.systems.system_config import SystemConfig
@@ -248,7 +248,7 @@ def test_create_accelerators(get_available_providers_mock, system_config, expect
                     "execution_providers": ["CUDAExecutionProvider", "CPUExecutionProvider"],
                 }
             ],
-            ["The following execution providers are not supported: ROCMExecutionProvider"],
+            ["The following execution providers are not supported: 'ROCMExecutionProvider'"],
             ["CUDAExecutionProvider", "CPUExecutionProvider"],
         ),
         (
@@ -273,7 +273,7 @@ def test_create_accelerators(get_available_providers_mock, system_config, expect
                     "execution_providers": ["CUDAExecutionProvider", "CPUExecutionProvider"],
                 }
             ],
-            ["The following execution providers are not supported: ROCMExecutionProvider"],
+            ["The following execution providers are not supported: 'ROCMExecutionProvider'"],
             ["CUDAExecutionProvider", "CPUExecutionProvider"],
         ),
     ],
@@ -302,7 +302,7 @@ def test_normalize_accelerators(
         )
         python_mock.start()
 
-    normalized_accs = normalize_accelerators(system_config, skip_supported_eps_check=False)
+    normalized_accs = AcceleratorNormalizer(system_config, skip_supported_eps_check=False).normalize()
     assert len(normalized_accs.config.accelerators) == len(expected_accs)
     for i, acc in enumerate(expected_accs):
         assert normalized_accs.config.accelerators[i].device == acc["device"]
@@ -316,6 +316,32 @@ def test_normalize_accelerators(
         python_mock.stop()
 
 
+@pytest.mark.parametrize(
+    ("system_config", "expected_acc"),
+    [
+        (
+            {
+                "type": "LocalSystem",
+                "config": {"accelerators": [{"device": "cpu", "execution_providers": ["CUDAExecutionProvider"]}]},
+            },
+            ("cpu", ["CPUExecutionProvider"]),
+        ),
+        (
+            {
+                "type": "LocalSystem",
+                "config": {"accelerators": [{"execution_providers": ["QNNExecutionProvider"]}]},
+            },
+            ("npu", ["QNNExecutionProvider"]),
+        ),
+    ],
+)
+def test_normalize_accelerators_skip_ep_check(system_config, expected_acc):
+    system_config = validate_config(system_config, SystemConfig)
+    normalized_accs = AcceleratorNormalizer(system_config, skip_supported_eps_check=True).normalize()
+    assert normalized_accs.config.accelerators[0].device == expected_acc[0]
+    assert normalized_accs.config.accelerators[0].execution_providers == expected_acc[1]
+
+
 @pytest.mark.parametrize(
     ("system_config", "available_providers", "exception", "error_message"),
     [
@@ -387,6 +413,63 @@ def test_create_accelerator_with_error(
         assert error_message in str(exp.value)
 
 
+@pytest.mark.parametrize(
+    ("system_config", "expected_acc_specs"),
+    [
+        # LocalSystem
+        (
+            {
+                "type": "LocalSystem",
+                "config": {"accelerators": [{"device": "cpu", "execution_providers": ["CPUExecutionProvider"]}]},
+            },
+            [("cpu", "CPUExecutionProvider")],
+        ),
+        # doesn't specify the accelerator
+        (
+            {
+                "type": "LocalSystem",
+            },
+            [("cpu", None)],
+        ),
+        # only specify the device
+        (
+            {
+                "type": "LocalSystem",
+                "config": {"accelerators": [{"device": "gpu"}]},
+            },
+            [("gpu", None)],
+        ),
+        # only specify the EP
+        (
+            {
+                "type": "LocalSystem",
+                "config": {"accelerators": [{"execution_providers": ["CPUExecutionProvider"]}]},
+            },
+            [("cpu", None)],
+        ),
+        (
+            {
+                "type": "AzureML",
+                "config": {
+                    "aml_compute": "aml_compute",
+                    "olive_managed_env": False,
+                    "accelerators": [{"device": "gpu"}],
+                },
+            },
+            [("gpu", None)],
+        ),
+    ],
+)
+def test_create_accelerator_without_ep(system_config, expected_acc_specs):
+    system_config = validate_config(system_config, SystemConfig)
+    expected_accelerator_specs = [
+        AcceleratorSpec(accelerator_type=acc_spec[0].lower(), execution_provider=acc_spec[1])
+        for acc_spec in expected_acc_specs
+    ]
+    accelerators = create_accelerators(system_config, skip_supported_eps_check=False, is_ep_required=False)
+    assert accelerators == expected_accelerator_specs
+
+
 def test_accelerator_config():
     acc_cfg1 = AcceleratorConfig.parse_obj({"device": "cpu"})
     assert acc_cfg1.execution_providers is None

diff --git a/test/unit_test/workflows/test_run_config.py b/test/unit_test/workflows/test_run_config.py
@@ -11,7 +11,9 @@
 
 from olive.common.pydantic_v1 import ValidationError
 from olive.data.config import DataConfig
+from olive.package_config import OlivePackageConfig
 from olive.workflows.run.config import INPUT_MODEL_DATA_CONFIG, RunConfig
+from olive.workflows.run.run import get_pass_module_path, is_execution_provider_required
 
 # pylint: disable=attribute-defined-outside-init, unsubscriptable-object
 
@@ -20,6 +22,7 @@ class TestRunConfig:
     # like: Systems/Evaluation/Model and etc.
     @pytest.fixture(autouse=True)
     def setup(self):
+        self.package_config = OlivePackageConfig.parse_file(OlivePackageConfig.get_default_config_path())
         self.user_script_config_file = Path(__file__).parent / "mock_data" / "user_script.json"
 
     @pytest.mark.parametrize(
@@ -150,6 +153,53 @@ def test_deprecated_engine_ep(self):
         errors = e.value.errors()
         assert errors[0]["loc"] == ("engine", "execution_providers")
 
+    @pytest.mark.parametrize(("pass_type", "is_onnx"), [("IncQuantization", True), ("LoRA", False)])
+    def test_get_module_path(self, pass_type, is_onnx):
+        pass_module = get_pass_module_path(pass_type, self.package_config)
+        assert pass_module.startswith("olive.passes.onnx") == is_onnx
+
+    @pytest.mark.parametrize(
+        ("passes", "pass_flows", "is_onnx"),
+        [
+            (None, None, True),
+            (
+                {
+                    "lora": {"type": "LoRA"},
+                },
+                None,
+                False,
+            ),
+            (
+                {
+                    "lora": {"type": "LoRA"},
+                    "quantization": {"type": "IncQuantization"},
+                },
+                None,
+                True,
+            ),
+            (
+                {
+                    "lora": {"type": "LoRA"},
+                    "quantization": {"type": "IncQuantization"},
+                },
+                [["lora"]],
+                False,
+            ),
+        ],
+    )
+    def test_is_execution_provider_required(self, passes, pass_flows, is_onnx):
+        with self.user_script_config_file.open() as f:
+            user_script_config = json.load(f)
+
+        if passes:
+            user_script_config["passes"] = passes
+        if pass_flows:
+            user_script_config["pass_flows"] = pass_flows
+
+        run_config = RunConfig.parse_obj(user_script_config)
+        result = is_execution_provider_required(run_config, self.package_config)
+        assert result == is_onnx
+
 
 class TestDataConfigValidation:
     @pytest.fixture(autouse=True)