From 54a6aef22bdb31abc4574dd5ecdc595e8826928f Mon Sep 17 00:00:00 2001
From: Elizabeth Santorella <santorella@fb.com>
Date: Sun, 28 Jul 2024 08:22:27 -0700
Subject: [PATCH 1/5] Move lazy construction of a surrogate from problem to
 runner (#2603)

Summary:
Pull Request resolved: https://github.com/facebook/Ax/pull/2603

Context: Surrogate benchmark problems allow for downloading datasets and constructing a surrogate lazily. Since the surrogates and datasets are only needed for the `Runner`, it makes sense to confine this logic to `SurrogateRunner`. This gives surrogate benchmark problems an interface that is much clsoer to that of non-surrogate benchmark problems. In the future, we should be able to get down to just one `BenchmarkProblem` class.

This PR:
* Moves lazy construction of surrogates from the `Problem` to the `Runner`.
* Moves corresponding unit tests from the problem's file to the runner's.
* Removes the attribute `noise_stds` from the problem, since it duplicates the same attribute on the runner and doesn't conform to the interface of other benchmark problems.
* Requires `is_noiseless` to be provided at problem initialization, to make surrogate problems have the same interface as other problems, and adds an attribute `SurrogateRunner.is_noiseless` so that this is not difficult to provide.

Differential Revision: D60266288

Reviewed By: saitcakmak
---
 ax/benchmark/problems/surrogate.py            | 108 +++---------------
 ax/benchmark/runners/surrogate.py             |  63 +++++++++-
 .../tests/problems/test_surrogate_problems.py |  26 +----
 .../tests/runners/test_surrogate_runner.py    |  62 +++++++++-
 ax/benchmark/tests/test_benchmark.py          |   1 -
 ax/utils/testing/benchmark_stubs.py           |  22 +++-
 6 files changed, 156 insertions(+), 126 deletions(-)

diff --git a/ax/benchmark/problems/surrogate.py b/ax/benchmark/problems/surrogate.py
index 4f08ecb84f8..c9f39836509 100644
--- a/ax/benchmark/problems/surrogate.py
+++ b/ax/benchmark/problems/surrogate.py
@@ -5,7 +5,7 @@
 
 # pyre-strict
 
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Union
 
 from ax.benchmark.metrics.base import BenchmarkMetricBase
 
@@ -14,21 +14,16 @@
     MultiObjectiveOptimizationConfig,
     OptimizationConfig,
 )
-from ax.core.runner import Runner
 from ax.core.search_space import SearchSpace
-from ax.modelbridge.torch import TorchModelBridge
 from ax.utils.common.base import Base
-from ax.utils.common.equality import equality_typechecker
-from ax.utils.common.typeutils import checked_cast, not_none
-from botorch.utils.datasets import SupervisedDataset
 
 
 class SurrogateBenchmarkProblemBase(Base):
     """
     Base class for SOOSurrogateBenchmarkProblem and MOOSurrogateBenchmarkProblem.
 
-    Its `runner` is created lazily, when `runner` is accessed or `set_runner` is
-    called, to defer construction of the surrogate and downloading of datasets.
+    Its `runner` is a `SurrogateRunner`, which allows for the surrogate to be
+    constructed lazily and datasets to be downloaded lazily.
     """
 
     def __init__(
@@ -38,14 +33,10 @@ def __init__(
         search_space: SearchSpace,
         optimization_config: OptimizationConfig,
         num_trials: int,
-        outcome_names: List[str],
+        runner: SurrogateRunner,
+        is_noiseless: bool,
         observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        noise_stds: Union[float, Dict[str, float]] = 0.0,
-        get_surrogate_and_datasets: Optional[
-            Callable[[], Tuple[TorchModelBridge, List[SupervisedDataset]]]
-        ] = None,
         tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-        _runner: Optional[Runner] = None,
     ) -> None:
         """Construct a `SurrogateBenchmarkProblemBase` instance.
 
@@ -54,80 +45,31 @@ def __init__(
             search_space: The search space to optimize over.
             optimization_config: THe optimization config for the problem.
             num_trials: The number of trials to run.
-            outcome_names: The names of the metrics the benchmark problem
-                produces outcome observations for.
+            runner: A `SurrogateRunner`, allowing for lazy construction of the
+                surrogate and datasets.
             observe_noise_stds: Whether or not to observe the observation noise
                 level for each metric. If True/False, observe the the noise standard
                 deviation for all/no metrics. If a dictionary, specify this for
                 individual metrics (metrics not appearing in the dictionary will
                 be assumed to not provide observation noise levels).
-            noise_stds: The standard deviation(s) of the observation noise(s).
-                If a single value is provided, it is used for all metrics. Providing
-                a dictionary allows specifying different noise levels for different
-                metrics (metrics not appearing in the dictionary will be assumed to
-                be noiseless - but not necessarily be known to the problem to be
-                noiseless).
-            get_surrogate_and_datasets: A factory function that retunrs the Surrogate
-                and a list of datasets to be used by the surrogate.
             tracking_metrics: Additional tracking metrics to compute during the
                 optimization (not used to inform the optimization).
         """
 
-        if get_surrogate_and_datasets is None and _runner is None:
-            raise ValueError(
-                "Either `get_surrogate_and_datasets` or `_runner` required."
-            )
         self.name = name
         self.search_space = search_space
         self.optimization_config = optimization_config
         self.num_trials = num_trials
-        self.outcome_names = outcome_names
         self.observe_noise_stds = observe_noise_stds
-        self.noise_stds = noise_stds
-        self.get_surrogate_and_datasets = get_surrogate_and_datasets
         self.tracking_metrics: List[BenchmarkMetricBase] = tracking_metrics or []
-        self._runner = _runner
-
-    @property
-    def is_noiseless(self) -> bool:
-        if self.noise_stds is None:
-            return True
-        if isinstance(self.noise_stds, float):
-            return self.noise_stds == 0.0
-        return all(std == 0.0 for std in checked_cast(dict, self.noise_stds).values())
+        self.runner = runner
+        self.is_noiseless = is_noiseless
 
     @property
     def has_ground_truth(self) -> bool:
         # All surrogate-based problems have a ground truth
         return True
 
-    @equality_typechecker
-    def __eq__(self, other: Base) -> bool:
-        if type(other) is not type(self):
-            return False
-
-        # Checking the whole datasets' equality here would be too expensive to be
-        # worth it; just check names instead
-        return self.name == other.name
-
-    def set_runner(self) -> None:
-        surrogate, datasets = not_none(self.get_surrogate_and_datasets)()
-
-        self._runner = SurrogateRunner(
-            name=self.name,
-            surrogate=surrogate,
-            datasets=datasets,
-            search_space=self.search_space,
-            outcome_names=self.outcome_names,
-            noise_stds=self.noise_stds,
-        )
-
-    @property
-    def runner(self) -> Runner:
-        if self._runner is None:
-            self.set_runner()
-        return not_none(self._runner)
-
     def __repr__(self) -> str:
         """
         Return a string representation that includes only the attributes that
@@ -140,7 +82,7 @@ def __repr__(self) -> str:
             f"num_trials={self.num_trials}, "
             f"is_noiseless={self.is_noiseless}, "
             f"observe_noise_stds={self.observe_noise_stds}, "
-            f"noise_stds={self.noise_stds}, "
+            f"noise_stds={self.runner.noise_stds}, "
             f"tracking_metrics={self.tracking_metrics})"
         )
 
@@ -161,26 +103,18 @@ def __init__(
         search_space: SearchSpace,
         optimization_config: OptimizationConfig,
         num_trials: int,
-        outcome_names: List[str],
+        runner: SurrogateRunner,
+        is_noiseless: bool,
         observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        noise_stds: Union[float, Dict[str, float]] = 0.0,
-        get_surrogate_and_datasets: Optional[
-            Callable[[], Tuple[TorchModelBridge, List[SupervisedDataset]]]
-        ] = None,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-        _runner: Optional[Runner] = None,
     ) -> None:
         super().__init__(
             name=name,
             search_space=search_space,
             optimization_config=optimization_config,
             num_trials=num_trials,
-            outcome_names=outcome_names,
             observe_noise_stds=observe_noise_stds,
-            noise_stds=noise_stds,
-            get_surrogate_and_datasets=get_surrogate_and_datasets,
-            tracking_metrics=tracking_metrics,
-            _runner=_runner,
+            runner=runner,
+            is_noiseless=is_noiseless,
         )
         self.optimal_value = optimal_value
 
@@ -204,26 +138,20 @@ def __init__(
         search_space: SearchSpace,
         optimization_config: MultiObjectiveOptimizationConfig,
         num_trials: int,
-        outcome_names: List[str],
+        runner: SurrogateRunner,
+        is_noiseless: bool,
         observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        noise_stds: Union[float, Dict[str, float]] = 0.0,
-        get_surrogate_and_datasets: Optional[
-            Callable[[], Tuple[TorchModelBridge, List[SupervisedDataset]]]
-        ] = None,
         tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-        _runner: Optional[Runner] = None,
     ) -> None:
         super().__init__(
             name=name,
             search_space=search_space,
             optimization_config=optimization_config,
             num_trials=num_trials,
-            outcome_names=outcome_names,
             observe_noise_stds=observe_noise_stds,
-            noise_stds=noise_stds,
-            get_surrogate_and_datasets=get_surrogate_and_datasets,
             tracking_metrics=tracking_metrics,
-            _runner=_runner,
+            runner=runner,
+            is_noiseless=is_noiseless,
         )
         self.reference_point = reference_point
         self.optimal_value = optimal_value
diff --git a/ax/benchmark/runners/surrogate.py b/ax/benchmark/runners/surrogate.py
index f64a5d1dd15..804919f0d68 100644
--- a/ax/benchmark/runners/surrogate.py
+++ b/ax/benchmark/runners/surrogate.py
@@ -6,7 +6,7 @@
 # pyre-strict
 
 import warnings
-from typing import Any, Dict, Iterable, List, Optional, Set, Union
+from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
 
 import torch
 from ax.benchmark.runners.base import BenchmarkRunner
@@ -15,20 +15,27 @@
 from ax.core.observation import ObservationFeatures
 from ax.core.search_space import SearchSpace
 from ax.modelbridge.torch import TorchModelBridge
+from ax.utils.common.base import Base
+from ax.utils.common.equality import equality_typechecker
 from ax.utils.common.serialization import TClassDecoderRegistry, TDecoderRegistry
 from botorch.utils.datasets import SupervisedDataset
+from pyre_extensions import assert_is_instance, none_throws
 from torch import Tensor
 
 
 class SurrogateRunner(BenchmarkRunner):
     def __init__(
         self,
+        *,
         name: str,
-        surrogate: TorchModelBridge,
-        datasets: List[SupervisedDataset],
         search_space: SearchSpace,
         outcome_names: List[str],
+        surrogate: Optional[TorchModelBridge] = None,
+        datasets: Optional[List[SupervisedDataset]] = None,
         noise_stds: Union[float, Dict[str, float]] = 0.0,
+        get_surrogate_and_datasets: Optional[
+            Callable[[], Tuple[TorchModelBridge, List[SupervisedDataset]]]
+        ] = None,
     ) -> None:
         """Runner for surrogate benchmark problems.
 
@@ -45,15 +52,42 @@ def __init__(
                 is added to all outputs. Alternatively, a dictionary mapping outcome
                 names to noise standard deviations can be provided to specify different
                 noise levels for different outputs.
+            get_surrogate_and_datasets: Function that returns the surrogate and
+                datasets, to allow for lazy construction. If
+                `get_surrogate_and_datasets` is not provided, `surrogate` and
+                `datasets` must be provided, and vice versa.
         """
+        if get_surrogate_and_datasets is None and (
+            surrogate is None or datasets is None
+        ):
+            raise ValueError(
+                "If get_surrogate_and_datasets is not provided, surrogate and "
+                "datasets must be provided, and vice versa."
+            )
+        self.get_surrogate_and_datasets = get_surrogate_and_datasets
         self.name = name
-        self.surrogate = surrogate
+        self._surrogate = surrogate
         self._outcome_names = outcome_names
-        self.datasets = datasets
+        self._datasets = datasets
         self.search_space = search_space
         self.noise_stds = noise_stds
         self.statuses: Dict[int, TrialStatus] = {}
 
+    def set_surrogate_and_datasets(self) -> None:
+        self._surrogate, self._datasets = none_throws(self.get_surrogate_and_datasets)()
+
+    @property
+    def surrogate(self) -> TorchModelBridge:
+        if self.get_surrogate_and_datasets is not None:
+            self.set_surrogate_and_datasets()
+        return none_throws(self._surrogate)
+
+    @property
+    def datasets(self) -> List[SupervisedDataset]:
+        if self.get_surrogate_and_datasets is not None:
+            self.set_surrogate_and_datasets()
+        return none_throws(self._datasets)
+
     @property
     def outcome_names(self) -> List[str]:
         return self._outcome_names
@@ -131,3 +165,22 @@ def deserialize_init_args(
         class_decoder_registry: Optional[TClassDecoderRegistry] = None,
     ) -> Dict[str, Any]:
         return {}
+
+    @property
+    def is_noiseless(self) -> bool:
+        if self.noise_stds is None:
+            return True
+        if isinstance(self.noise_stds, float):
+            return self.noise_stds == 0.0
+        return all(
+            std == 0.0 for std in assert_is_instance(self.noise_stds, dict).values()
+        )
+
+    @equality_typechecker
+    def __eq__(self, other: Base) -> bool:
+        if type(other) is not type(self):
+            return False
+
+        # Checking the whole datasets' equality here would be too expensive to be
+        # worth it; just check names instead
+        return self.name == other.name
diff --git a/ax/benchmark/tests/problems/test_surrogate_problems.py b/ax/benchmark/tests/problems/test_surrogate_problems.py
index 6d54784e0e0..c9211d57ce3 100644
--- a/ax/benchmark/tests/problems/test_surrogate_problems.py
+++ b/ax/benchmark/tests/problems/test_surrogate_problems.py
@@ -9,12 +9,15 @@
 import numpy as np
 from ax.benchmark.benchmark import compute_score_trace
 from ax.benchmark.benchmark_problem import BenchmarkProblemProtocol
-from ax.core.runner import Runner
 from ax.utils.common.testutils import TestCase
 from ax.utils.testing.benchmark_stubs import get_moo_surrogate, get_soo_surrogate
 
 
 class TestSurrogateProblems(TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        self.maxDiff = None
+
     def test_conforms_to_protocol(self) -> None:
         sbp = get_soo_surrogate()
         self.assertIsInstance(sbp, BenchmarkProblemProtocol)
@@ -22,11 +25,9 @@ def test_conforms_to_protocol(self) -> None:
         mbp = get_moo_surrogate()
         self.assertIsInstance(mbp, BenchmarkProblemProtocol)
 
-    def test_lazy_instantiation(self) -> None:
+    def test_repr(self) -> None:
 
-        # test instantiation from init
         sbp = get_soo_surrogate()
-        # test __repr__ method
 
         expected_repr = (
             "SOOSurrogateBenchmarkProblem(name=test, "
@@ -38,23 +39,6 @@ def test_lazy_instantiation(self) -> None:
         )
         self.assertEqual(repr(sbp), expected_repr)
 
-        self.assertIsNone(sbp._runner)
-        # sets runner
-        self.assertIsInstance(sbp.runner, Runner)
-
-        self.assertIsNotNone(sbp._runner)
-        self.assertIsNotNone(sbp.runner)
-
-        # repeat for MOO
-        sbp = get_moo_surrogate()
-
-        self.assertIsNone(sbp._runner)
-        # sets runner
-        self.assertIsInstance(sbp.runner, Runner)
-
-        self.assertIsNotNone(sbp._runner)
-        self.assertIsNotNone(sbp.runner)
-
     def test_compute_score_trace(self) -> None:
         soo_problem = get_soo_surrogate()
         score_trace = compute_score_trace(
diff --git a/ax/benchmark/tests/runners/test_surrogate_runner.py b/ax/benchmark/tests/runners/test_surrogate_runner.py
index 0fdf4e65154..7581e584e79 100644
--- a/ax/benchmark/tests/runners/test_surrogate_runner.py
+++ b/ax/benchmark/tests/runners/test_surrogate_runner.py
@@ -8,22 +8,27 @@
 from unittest.mock import MagicMock
 
 import torch
-from ax.benchmark.problems.surrogate import SurrogateRunner
+from ax.benchmark.runners.surrogate import SurrogateRunner
 from ax.core.parameter import ParameterType, RangeParameter
 from ax.core.search_space import SearchSpace
+from ax.modelbridge.torch import TorchModelBridge
 from ax.utils.common.testutils import TestCase
+from ax.utils.testing.benchmark_stubs import get_soo_surrogate
 
 
 class TestSurrogateRunner(TestCase):
-    def test_surrogate_runner(self) -> None:
-        # Construct a search space with log-scale parameters.
-        search_space = SearchSpace(
+    def setUp(self) -> None:
+        super().setUp()
+        self.search_space = SearchSpace(
             parameters=[
                 RangeParameter("x", ParameterType.FLOAT, 0.0, 5.0),
                 RangeParameter("y", ParameterType.FLOAT, 1.0, 10.0, log_scale=True),
                 RangeParameter("z", ParameterType.INT, 1.0, 5.0, log_scale=True),
             ]
         )
+
+    def test_surrogate_runner(self) -> None:
+        # Construct a search space with log-scale parameters.
         for noise_std in (0.0, 0.1, {"dummy_metric": 0.2}):
             with self.subTest(noise_std=noise_std):
                 surrogate = MagicMock()
@@ -35,7 +40,7 @@ def test_surrogate_runner(self) -> None:
                     name="test runner",
                     surrogate=surrogate,
                     datasets=[],
-                    search_space=search_space,
+                    search_space=self.search_space,
                     outcome_names=["dummy_metric"],
                     noise_stds=noise_std,
                 )
@@ -43,3 +48,50 @@ def test_surrogate_runner(self) -> None:
                 self.assertIs(runner.surrogate, surrogate)
                 self.assertEqual(runner.outcome_names, ["dummy_metric"])
                 self.assertEqual(runner.noise_stds, noise_std)
+
+    def test_lazy_instantiation(self) -> None:
+        runner = get_soo_surrogate().runner
+
+        self.assertIsNone(runner._surrogate)
+        self.assertIsNone(runner._datasets)
+
+        # Accessing `surrogat` sets datasets and surrogate
+        self.assertIsInstance(runner.surrogate, TorchModelBridge)
+        self.assertIsInstance(runner._surrogate, TorchModelBridge)
+        self.assertIsInstance(runner._datasets, list)
+
+        # Accessing `datasets` also sets datasets and surrogate
+        runner = get_soo_surrogate().runner
+        self.assertIsInstance(runner.datasets, list)
+        self.assertIsInstance(runner._surrogate, TorchModelBridge)
+        self.assertIsInstance(runner._datasets, list)
+
+    def test_instantiation_raises_with_missing_args(self) -> None:
+        with self.assertRaisesRegex(
+            ValueError, "If get_surrogate_and_datasets is not provided, surrogate and "
+        ):
+            SurrogateRunner(
+                name="test runner",
+                search_space=self.search_space,
+                outcome_names=[],
+                noise_stds=0.0,
+            )
+
+    def test_equality(self) -> None:
+
+        def _construct_runner(name: str) -> SurrogateRunner:
+            return SurrogateRunner(
+                name=name,
+                surrogate=MagicMock(),
+                datasets=[],
+                search_space=self.search_space,
+                outcome_names=["dummy_metric"],
+                noise_stds=0.0,
+            )
+
+        runner_1 = _construct_runner("test 1")
+        runner_2 = _construct_runner("test 2")
+        runner_1a = _construct_runner("test 1")
+        self.assertEqual(runner_1, runner_1a)
+        self.assertNotEqual(runner_1, runner_2)
+        self.assertNotEqual(runner_1, 1)
diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py
index dc24816effb..91ce8af336b 100644
--- a/ax/benchmark/tests/test_benchmark.py
+++ b/ax/benchmark/tests/test_benchmark.py
@@ -300,7 +300,6 @@ def test_replication_sobol_surrogate(self) -> None:
             ("moo", get_moo_surrogate()),
         ]:
             with self.subTest(name, problem=problem):
-                surrogate, datasets = not_none(problem.get_surrogate_and_datasets)()
                 res = benchmark_replication(problem=problem, method=method, seed=0)
 
                 self.assertEqual(
diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py
index 170334b0258..aca50f03259 100644
--- a/ax/utils/testing/benchmark_stubs.py
+++ b/ax/utils/testing/benchmark_stubs.py
@@ -20,6 +20,7 @@
     MOOSurrogateBenchmarkProblem,
     SOOSurrogateBenchmarkProblem,
 )
+from ax.benchmark.runners.surrogate import SurrogateRunner
 from ax.core.experiment import Experiment
 from ax.core.optimization_config import (
     MultiObjectiveOptimizationConfig,
@@ -110,6 +111,12 @@ def get_soo_surrogate() -> SOOSurrogateBenchmarkProblem:
         data=experiment.lookup_data(),
         transforms=[],
     )
+    runner = SurrogateRunner(
+        name="test",
+        search_space=experiment.search_space,
+        outcome_names=["branin"],
+        get_surrogate_and_datasets=lambda: (surrogate, []),
+    )
     return SOOSurrogateBenchmarkProblem(
         name="test",
         search_space=experiment.search_space,
@@ -117,10 +124,10 @@ def get_soo_surrogate() -> SOOSurrogateBenchmarkProblem:
             OptimizationConfig, experiment.optimization_config
         ),
         num_trials=6,
-        outcome_names=["branin"],
         observe_noise_stds=True,
-        get_surrogate_and_datasets=lambda: (surrogate, []),
         optimal_value=0.0,
+        runner=runner,
+        is_noiseless=runner.is_noiseless,
     )
 
 
@@ -133,6 +140,13 @@ def get_moo_surrogate() -> MOOSurrogateBenchmarkProblem:
         data=experiment.lookup_data(),
         transforms=[],
     )
+
+    runner = SurrogateRunner(
+        name="test",
+        search_space=experiment.search_space,
+        outcome_names=["branin_a", "branin_b"],
+        get_surrogate_and_datasets=lambda: (surrogate, []),
+    )
     return MOOSurrogateBenchmarkProblem(
         name="test",
         search_space=experiment.search_space,
@@ -140,11 +154,11 @@ def get_moo_surrogate() -> MOOSurrogateBenchmarkProblem:
             MultiObjectiveOptimizationConfig, experiment.optimization_config
         ),
         num_trials=10,
-        outcome_names=["branin_a", "branin_b"],
         observe_noise_stds=True,
-        get_surrogate_and_datasets=lambda: (surrogate, []),
         optimal_value=1.0,
         reference_point=[],
+        runner=runner,
+        is_noiseless=runner.is_noiseless,
     )
 
 

From 3d6f0e627b48c4a33a1642dffe4a335524700992 Mon Sep 17 00:00:00 2001
From: Elizabeth Santorella <santorella@fb.com>
Date: Sun, 28 Jul 2024 08:22:27 -0700
Subject: [PATCH 2/5] Require all problems to have an `optimal_value`; simplify
 Problem inheritance (#2601)

Summary:
Pull Request resolved: https://github.com/facebook/Ax/pull/2601

Context:
* Problems that lack an `optimal_value` get a NaN score and cannot really be used for benchmarking except when aggregated. This is a nasty "gotcha." We don't always know the optimum, but IMO it is better to guess.
* The inheritance structure of benchmark problems is too complex and will make subsequent refactors harder.

This PR:
* Adds an `optimal_value` requirement to `BenchmarkProblem` and makes `BenchmarkProblem` the base class to `SurrogateBenchmarkProblem`, enabling `BenchmarkProblem` to be the only type annotation needed. Therefore, the type annotation `BenchmarkProblemProtocol` is no longer necessary. It will be removed in the next PR.
* No longer allows for giving an NaN score to benchmarks where the problem lacks an optimal value, because this won't happen.
* Updates a lot of annotations.
* Raises an exception for constrained multi-objective problems.
* Use dataclasses to cut down on code.

Differential Revision: D60145193

Reviewed By: saitcakmak
---
 ax/benchmark/benchmark_problem.py             | 271 +++++++-----------
 ax/benchmark/problems/surrogate.py            | 137 +--------
 .../problems/test_mixed_integer_problems.py   |   3 -
 .../tests/problems/test_surrogate_problems.py |   8 +-
 ax/benchmark/tests/test_benchmark.py          |   9 +-
 ax/benchmark/tests/test_benchmark_problem.py  |  32 ++-
 ax/storage/json_store/encoders.py             |   1 +
 ax/utils/testing/benchmark_stubs.py           |   5 +-
 8 files changed, 150 insertions(+), 316 deletions(-)

diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
index b3f8e29531a..327f74a5b24 100644
--- a/ax/benchmark/benchmark_problem.py
+++ b/ax/benchmark/benchmark_problem.py
@@ -10,8 +10,18 @@
 # `BenchmarkProblem` as return type annotation, used for serialization and rendering
 # in the UI.
 
-import abc
-from typing import Any, Dict, List, Optional, Protocol, runtime_checkable, Type, Union
+from dataclasses import dataclass, field
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Protocol,
+    runtime_checkable,
+    Type,
+    TypeVar,
+    Union,
+)
 
 from ax.benchmark.metrics.base import BenchmarkMetricBase
 
@@ -30,10 +40,21 @@
 from ax.core.types import ComparisonOp
 from ax.utils.common.base import Base
 from ax.utils.common.typeutils import checked_cast
-from botorch.test_functions.base import BaseTestProblem, ConstrainedBaseTestProblem
-from botorch.test_functions.multi_objective import MultiObjectiveTestProblem
+from botorch.test_functions.base import (
+    BaseTestProblem,
+    ConstrainedBaseTestProblem,
+    MultiObjectiveTestProblem,
+)
 from botorch.test_functions.synthetic import SyntheticTestFunction
 
+TBenchmarkProblem = TypeVar("TBenchmarkProblem", bound="BenchmarkProblem")
+TSingleObjectiveBenchmarkProblem = TypeVar(
+    "TSingleObjectiveBenchmarkProblem", bound="SingleObjectiveBenchmarkProblem"
+)
+TMultiObjectiveBenchmarkProblem = TypeVar(
+    "TMultiObjectiveBenchmarkProblem", bound="MultiObjectiveBenchmarkProblem"
+)
+
 
 def _get_name(
     test_problem: BaseTestProblem,
@@ -70,10 +91,7 @@ class BenchmarkProblemProtocol(Protocol):
         bool, Dict[str, bool]
     ]  # Whether we observe the observation noise level
     has_ground_truth: bool  # if True, evals (w/o synthetic noise) are determinstic
-
-    @abc.abstractproperty
-    def runner(self) -> Runner:
-        pass  # pragma: no cover
+    runner: Runner
 
 
 @runtime_checkable
@@ -81,46 +99,68 @@ class BenchmarkProblemWithKnownOptimum(Protocol):
     optimal_value: float
 
 
+@dataclass(kw_only=True, repr=True)
 class BenchmarkProblem(Base):
-    """Benchmark problem, represented in terms of Ax search space, optimization
-    config, and runner.
+    """
+    Problem against which different methods can be benchmarked.
+
+    Defines how data is generated, the objective (via the OptimizationConfig),
+    and the SearchSpace.
+
+    Args:
+        name: Can be generated programmatically with `_get_name`.
+        optimization_config: Defines the objective of optimization.
+        num_trials: Number of optimization iterations to run. BatchTrials count
+            as one trial.
+        observe_noise_stds: If boolean, whether the standard deviation of the
+            observation noise is observed for all metrics. If a dictionary,
+            whether noise levels are observed on a per-metric basis.
+        has_ground_truth: Whether the Runner produces underlying ground truth
+            values, which are not observed in real noisy problems but may be
+            known in benchmarks.
+        tracking_metrics: Tracking metrics are not optimized, and for the
+            purpose of benchmarking, they will not be fit. The ground truth may
+            be provided as `tracking_metrics`.
+        optimal_value: The best ground-truth objective value. Hypervolume for
+            multi-objective problems. If the best value is not known, it is
+            conventional to set it to a value that is almost certainly better
+            than the best value, so that a benchmark's score will not exceed 100%.
+        search_space: The search space.
+        runner: The Runner that will be used to generate data for the problem,
+            including any ground-truth data stored as tracking metrics.
     """
 
-    def __init__(
-        self,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        runner: Runner,
-        num_trials: int,
-        is_noiseless: bool = False,
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        has_ground_truth: bool = False,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-    ) -> None:
-        self.name = name
-        self.search_space = search_space
-        self.optimization_config = optimization_config
-        self._runner = runner
-        self.num_trials = num_trials
-        self.is_noiseless = is_noiseless
-        self.observe_noise_stds = observe_noise_stds
-        self.has_ground_truth = has_ground_truth
-        self.tracking_metrics: List[BenchmarkMetricBase] = tracking_metrics or []
-
-    @property
-    def runner(self) -> Runner:
-        return self._runner
+    name: str
+    optimization_config: OptimizationConfig
+    num_trials: int
+    observe_noise_stds: Union[bool, Dict[str, bool]] = False
+    has_ground_truth: bool = True
+    tracking_metrics: List[BenchmarkMetricBase] = field(default_factory=list)
+    optimal_value: float
+
+    search_space: SearchSpace = field(repr=False)
+    runner: Runner = field(repr=False)
+    is_noiseless: bool
+
+
+@dataclass(kw_only=True, repr=True)
+class SingleObjectiveBenchmarkProblem(BenchmarkProblem):
+    """
+    Benchmark problem with a single objective.
+
+    For argument descriptions, see `BenchmarkProblem`; it additionally takes a
+    `Runner`.
+    """
 
     @classmethod
-    def from_botorch(
-        cls,
-        test_problem_class: Type[BaseTestProblem],
+    def from_botorch_synthetic(
+        cls: Type[TSingleObjectiveBenchmarkProblem],
+        test_problem_class: Type[SyntheticTestFunction],
         test_problem_kwargs: Dict[str, Any],
         lower_is_better: bool,
         num_trials: int,
         observe_noise_sd: bool = False,
-    ) -> "BenchmarkProblem":
+    ) -> TSingleObjectiveBenchmarkProblem:
         """
         Create a BenchmarkProblem from a BoTorch BaseTestProblem using
         specialized Metrics and Runners. The test problem's result will be
@@ -199,7 +239,11 @@ def from_botorch(
             objective=objective,
             outcome_constraints=outcome_constraints,
         )
-
+        optimal_value = (
+            test_problem.max_hv
+            if isinstance(test_problem, MultiObjectiveTestProblem)
+            else test_problem.optimal_value
+        )
         return cls(
             name=name,
             search_space=search_space,
@@ -213,155 +257,49 @@ def from_botorch(
             observe_noise_stds=observe_noise_sd,
             is_noiseless=test_problem.noise_std in (None, 0.0),
             has_ground_truth=True,  # all synthetic problems have ground truth
-        )
-
-    def __repr__(self) -> str:
-        """
-        Return a string representation that includes only the attributes that
-        print nicely and contain information likely to be useful.
-        """
-        return (
-            f"{self.__class__.__name__}("
-            f"name={self.name}, "
-            f"optimization_config={self.optimization_config}, "
-            f"num_trials={self.num_trials}, "
-            f"is_noiseless={self.is_noiseless}, "
-            f"observe_noise_stds={self.observe_noise_stds}, "
-            f"has_ground_truth={self.has_ground_truth}, "
-            f"tracking_metrics={self.tracking_metrics})"
-        )
-
-
-class SingleObjectiveBenchmarkProblem(BenchmarkProblem):
-    """The most basic BenchmarkProblem, with a single objective and a known optimal
-    value.
-    """
-
-    def __init__(
-        self,
-        optimal_value: float,
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        runner: Runner,
-        num_trials: int,
-        is_noiseless: bool = False,
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        has_ground_truth: bool = False,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-    ) -> None:
-        super().__init__(
-            name=name,
-            search_space=search_space,
-            optimization_config=optimization_config,
-            runner=runner,
-            num_trials=num_trials,
-            is_noiseless=is_noiseless,
-            observe_noise_stds=observe_noise_stds,
-            has_ground_truth=has_ground_truth,
-            tracking_metrics=tracking_metrics,
-        )
-        self.optimal_value = optimal_value
-
-    @classmethod
-    def from_botorch_synthetic(
-        cls,
-        test_problem_class: Type[SyntheticTestFunction],
-        test_problem_kwargs: Dict[str, Any],
-        lower_is_better: bool,
-        num_trials: int,
-        observe_noise_sd: bool = False,
-    ) -> "SingleObjectiveBenchmarkProblem":
-        """Create a BenchmarkProblem from a BoTorch BaseTestProblem using specialized
-        Metrics and Runners. The test problem's result will be computed on the Runner
-        and retrieved by the Metric.
-        """
-
-        # pyre-fixme [45]: Invalid class instantiation
-        test_problem = test_problem_class(**test_problem_kwargs)
-
-        problem = BenchmarkProblem.from_botorch(
-            test_problem_class=test_problem_class,
-            test_problem_kwargs=test_problem_kwargs,
-            lower_is_better=lower_is_better,
-            num_trials=num_trials,
-            observe_noise_sd=observe_noise_sd,
-        )
-
-        dim = test_problem_kwargs.get("dim", None)
-        name = _get_name(
-            test_problem=test_problem, observe_noise_sd=observe_noise_sd, dim=dim
-        )
-
-        return cls(
-            name=name,
-            search_space=problem.search_space,
-            optimization_config=problem.optimization_config,
-            runner=problem.runner,
-            num_trials=num_trials,
-            is_noiseless=problem.is_noiseless,
-            observe_noise_stds=problem.observe_noise_stds,
-            has_ground_truth=problem.has_ground_truth,
-            optimal_value=test_problem.optimal_value,
+            optimal_value=optimal_value,
         )
 
 
+@dataclass(kw_only=True, repr=True)
 class MultiObjectiveBenchmarkProblem(BenchmarkProblem):
     """
     A `BenchmarkProblem` that supports multiple objectives.
 
     For multi-objective problems, `optimal_value` indicates the maximum
     hypervolume attainable with the given `reference_point`.
+
+    For argument descriptions, see `BenchmarkProblem`; it additionally takes a `runner`
+    and a `reference_point`.
     """
 
-    def __init__(
-        self,
-        optimal_value: float,
-        reference_point: List[float],
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        runner: Runner,
-        num_trials: int,
-        is_noiseless: bool = False,
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        has_ground_truth: bool = False,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-    ) -> None:
-        self.optimal_value = optimal_value
-        self.reference_point = reference_point
-        super().__init__(
-            name=name,
-            search_space=search_space,
-            optimization_config=optimization_config,
-            runner=runner,
-            num_trials=num_trials,
-            is_noiseless=is_noiseless,
-            observe_noise_stds=observe_noise_stds,
-            has_ground_truth=has_ground_truth,
-            tracking_metrics=tracking_metrics,
-        )
+    reference_point: List[float]
+    optimization_config: MultiObjectiveOptimizationConfig
 
     @classmethod
     def from_botorch_multi_objective(
-        cls,
+        cls: Type[TMultiObjectiveBenchmarkProblem],
         test_problem_class: Type[MultiObjectiveTestProblem],
         test_problem_kwargs: Dict[str, Any],
         # TODO: Figure out whether we should use `lower_is_better` here.
         num_trials: int,
         observe_noise_sd: bool = False,
-    ) -> "MultiObjectiveBenchmarkProblem":
+    ) -> TMultiObjectiveBenchmarkProblem:
         """Create a BenchmarkProblem from a BoTorch BaseTestProblem using specialized
         Metrics and Runners. The test problem's result will be computed on the Runner
         once per trial and each Metric will retrieve its own result by index.
         """
+        if issubclass(test_problem_class, ConstrainedBaseTestProblem):
+            raise NotImplementedError(
+                "Constrained multi-objective problems are not supported."
+            )
 
         # pyre-fixme [45]: Invalid class instantiation
         test_problem = test_problem_class(**test_problem_kwargs)
 
-        problem = BenchmarkProblem.from_botorch(
+        problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+            # pyre-fixme [6]: Passing a multi-objective problem where a
+            # single-objective problem is expected.
             test_problem_class=test_problem_class,
             test_problem_kwargs=test_problem_kwargs,
             lower_is_better=True,  # Seems like we always assume minimization for MOO?
@@ -369,10 +307,7 @@ def from_botorch_multi_objective(
             observe_noise_sd=observe_noise_sd,
         )
 
-        dim = test_problem_kwargs.get("dim", None)
-        name = _get_name(
-            test_problem=test_problem, observe_noise_sd=observe_noise_sd, dim=dim
-        )
+        name = problem.name
 
         n_obj = test_problem.num_objectives
         if not observe_noise_sd:
@@ -420,7 +355,3 @@ def from_botorch_multi_objective(
             optimal_value=test_problem.max_hv,
             reference_point=test_problem._ref_point,
         )
-
-    @property
-    def maximum_hypervolume(self) -> float:
-        return self.optimal_value
diff --git a/ax/benchmark/problems/surrogate.py b/ax/benchmark/problems/surrogate.py
index c9f39836509..a165216b252 100644
--- a/ax/benchmark/problems/surrogate.py
+++ b/ax/benchmark/problems/surrogate.py
@@ -5,20 +5,17 @@
 
 # pyre-strict
 
-from typing import Dict, List, Optional, Union
+from dataclasses import dataclass, field
+from typing import List
 
-from ax.benchmark.metrics.base import BenchmarkMetricBase
+from ax.benchmark.benchmark_problem import BenchmarkProblem
 
 from ax.benchmark.runners.surrogate import SurrogateRunner
-from ax.core.optimization_config import (
-    MultiObjectiveOptimizationConfig,
-    OptimizationConfig,
-)
-from ax.core.search_space import SearchSpace
-from ax.utils.common.base import Base
+from ax.core.optimization_config import MultiObjectiveOptimizationConfig
 
 
-class SurrogateBenchmarkProblemBase(Base):
+@dataclass(kw_only=True)
+class SurrogateBenchmarkProblemBase(BenchmarkProblem):
     """
     Base class for SOOSurrogateBenchmarkProblem and MOOSurrogateBenchmarkProblem.
 
@@ -26,132 +23,20 @@ class SurrogateBenchmarkProblemBase(Base):
     constructed lazily and datasets to be downloaded lazily.
     """
 
-    def __init__(
-        self,
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        num_trials: int,
-        runner: SurrogateRunner,
-        is_noiseless: bool,
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-    ) -> None:
-        """Construct a `SurrogateBenchmarkProblemBase` instance.
-
-        Args:
-            name: The name of the benchmark problem.
-            search_space: The search space to optimize over.
-            optimization_config: THe optimization config for the problem.
-            num_trials: The number of trials to run.
-            runner: A `SurrogateRunner`, allowing for lazy construction of the
-                surrogate and datasets.
-            observe_noise_stds: Whether or not to observe the observation noise
-                level for each metric. If True/False, observe the the noise standard
-                deviation for all/no metrics. If a dictionary, specify this for
-                individual metrics (metrics not appearing in the dictionary will
-                be assumed to not provide observation noise levels).
-            tracking_metrics: Additional tracking metrics to compute during the
-                optimization (not used to inform the optimization).
-        """
-
-        self.name = name
-        self.search_space = search_space
-        self.optimization_config = optimization_config
-        self.num_trials = num_trials
-        self.observe_noise_stds = observe_noise_stds
-        self.tracking_metrics: List[BenchmarkMetricBase] = tracking_metrics or []
-        self.runner = runner
-        self.is_noiseless = is_noiseless
-
-    @property
-    def has_ground_truth(self) -> bool:
-        # All surrogate-based problems have a ground truth
-        return True
-
-    def __repr__(self) -> str:
-        """
-        Return a string representation that includes only the attributes that
-        print nicely and contain information likely to be useful.
-        """
-        return (
-            f"{self.__class__.__name__}("
-            f"name={self.name}, "
-            f"optimization_config={self.optimization_config}, "
-            f"num_trials={self.num_trials}, "
-            f"is_noiseless={self.is_noiseless}, "
-            f"observe_noise_stds={self.observe_noise_stds}, "
-            f"noise_stds={self.runner.noise_stds}, "
-            f"tracking_metrics={self.tracking_metrics})"
-        )
+    runner: SurrogateRunner = field(repr=False)
 
 
 class SOOSurrogateBenchmarkProblem(SurrogateBenchmarkProblemBase):
-    """
-    Has the same attributes/properties as a `MultiObjectiveBenchmarkProblem`,
-    but its runner is not constructed until needed, to allow for deferring
-    constructing the surrogate and downloading data. The surrogate is only
-    defined when `runner` is accessed or `set_runner` is called.
-    """
-
-    def __init__(
-        self,
-        optimal_value: float,
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        num_trials: int,
-        runner: SurrogateRunner,
-        is_noiseless: bool,
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-    ) -> None:
-        super().__init__(
-            name=name,
-            search_space=search_space,
-            optimization_config=optimization_config,
-            num_trials=num_trials,
-            observe_noise_stds=observe_noise_stds,
-            runner=runner,
-            is_noiseless=is_noiseless,
-        )
-        self.optimal_value = optimal_value
+    pass
 
 
+@dataclass(kw_only=True)
 class MOOSurrogateBenchmarkProblem(SurrogateBenchmarkProblemBase):
     """
     Has the same attributes/properties as a `MultiObjectiveBenchmarkProblem`,
     but its runner is not constructed until needed, to allow for deferring
-    constructing the surrogate and downloading data. The surrogate is only
-    defined when `runner` is accessed or `set_runner` is called.
+    constructing the surrogate and downloading data.
     """
 
     optimization_config: MultiObjectiveOptimizationConfig
-
-    def __init__(
-        self,
-        optimal_value: float,
-        reference_point: List[float],
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: MultiObjectiveOptimizationConfig,
-        num_trials: int,
-        runner: SurrogateRunner,
-        is_noiseless: bool,
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-    ) -> None:
-        super().__init__(
-            name=name,
-            search_space=search_space,
-            optimization_config=optimization_config,
-            num_trials=num_trials,
-            observe_noise_stds=observe_noise_stds,
-            tracking_metrics=tracking_metrics,
-            runner=runner,
-            is_noiseless=is_noiseless,
-        )
-        self.reference_point = reference_point
-        self.optimal_value = optimal_value
+    reference_point: List[float]
diff --git a/ax/benchmark/tests/problems/test_mixed_integer_problems.py b/ax/benchmark/tests/problems/test_mixed_integer_problems.py
index 717beb3aabd..fa6cb400515 100644
--- a/ax/benchmark/tests/problems/test_mixed_integer_problems.py
+++ b/ax/benchmark/tests/problems/test_mixed_integer_problems.py
@@ -58,9 +58,6 @@ def test_problems(self) -> None:
                 ).test_problem._bounds,
                 expected_bounds,
             )
-            print(f"{name=}")
-            print(f"{problem.optimal_value=}")
-            print(f"{problem_cls().optimal_value=}")
             self.assertGreaterEqual(problem.optimal_value, problem_cls().optimal_value)
 
         # Test that they match correctly to the original problems.
diff --git a/ax/benchmark/tests/problems/test_surrogate_problems.py b/ax/benchmark/tests/problems/test_surrogate_problems.py
index c9211d57ce3..81b772eaae8 100644
--- a/ax/benchmark/tests/problems/test_surrogate_problems.py
+++ b/ax/benchmark/tests/problems/test_surrogate_problems.py
@@ -16,6 +16,7 @@
 class TestSurrogateProblems(TestCase):
     def setUp(self) -> None:
         super().setUp()
+        # print max output so errors in 'repr' can be fully shown
         self.maxDiff = None
 
     def test_conforms_to_protocol(self) -> None:
@@ -30,12 +31,13 @@ def test_repr(self) -> None:
         sbp = get_soo_surrogate()
 
         expected_repr = (
-            "SOOSurrogateBenchmarkProblem(name=test, "
+            "SOOSurrogateBenchmarkProblem(name='test', "
             "optimization_config=OptimizationConfig(objective=Objective(metric_name="
             '"branin", '
             "minimize=False), "
-            "outcome_constraints=[]), num_trials=6, is_noiseless=True, "
-            "observe_noise_stds=True, noise_stds=0.0, tracking_metrics=[])"
+            "outcome_constraints=[]), num_trials=6, "
+            "observe_noise_stds=True, has_ground_truth=True, "
+            "tracking_metrics=[], optimal_value=0.0, is_noiseless=True)"
         )
         self.assertEqual(repr(sbp), expected_repr)
 
diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py
index 91ce8af336b..e5a184e9494 100644
--- a/ax/benchmark/tests/test_benchmark.py
+++ b/ax/benchmark/tests/test_benchmark.py
@@ -27,6 +27,7 @@
 from ax.benchmark.metrics.base import GroundTruthMetricMixin
 from ax.benchmark.metrics.benchmark import BenchmarkMetric, GroundTruthBenchmarkMetric
 from ax.benchmark.problems.registry import get_problem
+from ax.core.optimization_config import MultiObjectiveOptimizationConfig
 from ax.modelbridge.generation_strategy import GenerationNode, GenerationStrategy
 from ax.modelbridge.model_spec import ModelSpec
 from ax.modelbridge.registry import Models
@@ -36,7 +37,6 @@
 from ax.utils.common.testutils import TestCase
 from ax.utils.common.typeutils import checked_cast, not_none
 from ax.utils.testing.benchmark_stubs import (
-    get_constrained_multi_objective_benchmark_problem,
     get_moo_surrogate,
     get_multi_objective_benchmark_problem,
     get_single_objective_benchmark_problem,
@@ -162,9 +162,10 @@ def test_make_ground_truth_optimization_config(self) -> None:
         gt_opt_cfg = make_ground_truth_optimization_config(experiment)
         self.assertIs(gt_opt_cfg.objective.metric, gt_metric)
 
-        # Test behavior with MOO problem and outcome constraints
-        problem = get_constrained_multi_objective_benchmark_problem(
-            observe_noise_sd=False
+        # Test behavior with MOO problem
+        problem = get_multi_objective_benchmark_problem(observe_noise_sd=False)
+        self.assertIsInstance(
+            problem.optimization_config, MultiObjectiveOptimizationConfig
         )
         experiment = _create_benchmark_experiment(
             problem=problem, method_name="test_method"
diff --git a/ax/benchmark/tests/test_benchmark_problem.py b/ax/benchmark/tests/test_benchmark_problem.py
index b6de743528c..4df3f267202 100644
--- a/ax/benchmark/tests/test_benchmark_problem.py
+++ b/ax/benchmark/tests/test_benchmark_problem.py
@@ -16,7 +16,7 @@
 from ax.core.types import ComparisonOp
 from ax.utils.common.testutils import TestCase
 from ax.utils.common.typeutils import checked_cast
-from botorch.test_functions.multi_objective import BraninCurrin
+from botorch.test_functions.multi_objective import BraninCurrin, ConstrainedBraninCurrin
 from botorch.test_functions.synthetic import (
     Ackley,
     ConstrainedGramacy,
@@ -27,6 +27,11 @@
 
 
 class TestBenchmarkProblem(TestCase):
+    def setUp(self) -> None:
+        # Print full output, so that any differences in 'repr' output are shown
+        self.maxDiff = None
+        super().setUp()
+
     def test_single_objective_from_botorch(self) -> None:
         for botorch_test_problem in [Ackley(), ConstrainedHartmann(dim=6)]:
             test_problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
@@ -77,15 +82,16 @@ def test_single_objective_from_botorch(self) -> None:
                     test_problem.optimization_config.outcome_constraints, []
                 )
                 expected_repr = (
-                    "SingleObjectiveBenchmarkProblem(name=Ackley, "
+                    "SingleObjectiveBenchmarkProblem(name='Ackley', "
                     "optimization_config=OptimizationConfig(objective=Objective("
                     'metric_name="Ackley", '
                     "minimize=True), outcome_constraints=[]), "
                     "num_trials=1, "
-                    "is_noiseless=True, "
                     "observe_noise_stds=False, "
                     "has_ground_truth=True, "
-                    "tracking_metrics=[])"
+                    "tracking_metrics=[], "
+                    "optimal_value=0.0, "
+                    "is_noiseless=True)"
                 )
             else:
                 outcome_constraint = (
@@ -96,16 +102,17 @@ def test_single_objective_from_botorch(self) -> None:
                 self.assertFalse(outcome_constraint.relative)
                 self.assertEqual(outcome_constraint.bound, 0.0)
                 expected_repr = (
-                    "SingleObjectiveBenchmarkProblem(name=ConstrainedHartmann, "
+                    "SingleObjectiveBenchmarkProblem(name='ConstrainedHartmann', "
                     "optimization_config=OptimizationConfig(objective=Objective("
                     'metric_name="ConstrainedHartmann", minimize=True), '
                     "outcome_constraints=[OutcomeConstraint(constraint_slack_0"
                     " >= 0.0)]), "
                     "num_trials=1, "
-                    "is_noiseless=True, "
                     "observe_noise_stds=False, "
                     "has_ground_truth=True, "
-                    "tracking_metrics=[])"
+                    "tracking_metrics=[], "
+                    "optimal_value=-3.32237, "
+                    "is_noiseless=True)"
                 )
 
             self.assertEqual(repr(test_problem), expected_repr)
@@ -197,6 +204,17 @@ def test_moo_from_botorch(self) -> None:
         self.assertEqual(branin_currin_problem.optimal_value, test_problem._max_hv)
         self.assertEqual(branin_currin_problem.reference_point, test_problem._ref_point)
 
+    def test_moo_from_botorch_constrained(self) -> None:
+        with self.assertRaisesRegex(
+            NotImplementedError,
+            "Constrained multi-objective problems are not supported.",
+        ):
+            MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
+                test_problem_class=ConstrainedBraninCurrin,
+                test_problem_kwargs={},
+                num_trials=1,
+            )
+
     def test_maximization_problem(self) -> None:
         test_problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
             test_problem_class=Cosine8,
diff --git a/ax/storage/json_store/encoders.py b/ax/storage/json_store/encoders.py
index e22f641427d..dbfb20311e2 100644
--- a/ax/storage/json_store/encoders.py
+++ b/ax/storage/json_store/encoders.py
@@ -147,6 +147,7 @@ def benchmark_problem_to_dict(benchmark_problem: BenchmarkProblem) -> Dict[str,
         "observe_noise_stds": benchmark_problem.observe_noise_stds,
         "has_ground_truth": benchmark_problem.has_ground_truth,
         "tracking_metrics": benchmark_problem.tracking_metrics,
+        "optimal_value": benchmark_problem.optimal_value,
     }
 
 
diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py
index aca50f03259..d092963c3a1 100644
--- a/ax/utils/testing/benchmark_stubs.py
+++ b/ax/utils/testing/benchmark_stubs.py
@@ -11,7 +11,6 @@
 import numpy as np
 from ax.benchmark.benchmark_method import BenchmarkMethod
 from ax.benchmark.benchmark_problem import (
-    BenchmarkProblem,
     MultiObjectiveBenchmarkProblem,
     SingleObjectiveBenchmarkProblem,
 )
@@ -44,8 +43,8 @@
 from botorch.test_functions.synthetic import Branin
 
 
-def get_benchmark_problem() -> BenchmarkProblem:
-    return BenchmarkProblem.from_botorch(
+def get_benchmark_problem() -> SingleObjectiveBenchmarkProblem:
+    return SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
         test_problem_class=Branin,
         test_problem_kwargs={},
         lower_is_better=True,

From aa53efdc0a3f3c09014c4dbb2a2096bafa3de175 Mon Sep 17 00:00:00 2001
From: Elizabeth Santorella <santorella@fb.com>
Date: Sun, 28 Jul 2024 08:24:14 -0700
Subject: [PATCH 3/5] Remove `BenchmarkProblemWithKnownOptimum` and
 `BenchmarkProtocol` type annotations (#2602)

Summary:
Pull Request resolved: https://github.com/facebook/Ax/pull/2602

These are no longer necessary. See previous PR for context. This change reaps them and updates type annotations.

Differential Revision: D60146081

Reviewed By: saitcakmak
---
 ax/benchmark/benchmark.py                     | 21 ++++------
 ax/benchmark/benchmark_problem.py             | 39 +------------------
 .../tests/problems/test_surrogate_problems.py |  8 ++--
 3 files changed, 13 insertions(+), 55 deletions(-)

diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py
index 029889722bd..d32c3ed8d3f 100644
--- a/ax/benchmark/benchmark.py
+++ b/ax/benchmark/benchmark.py
@@ -27,10 +27,7 @@
 import numpy as np
 
 from ax.benchmark.benchmark_method import BenchmarkMethod
-from ax.benchmark.benchmark_problem import (
-    BenchmarkProblemProtocol,
-    BenchmarkProblemWithKnownOptimum,
-)
+from ax.benchmark.benchmark_problem import BenchmarkProblem
 from ax.benchmark.benchmark_result import AggregatedBenchmarkResult, BenchmarkResult
 from ax.benchmark.metrics.base import BenchmarkMetricBase, GroundTruthMetricMixin
 from ax.core.experiment import Experiment
@@ -53,16 +50,14 @@
 def compute_score_trace(
     optimization_trace: np.ndarray,
     num_baseline_trials: int,
-    problem: BenchmarkProblemProtocol,
+    problem: BenchmarkProblem,
 ) -> np.ndarray:
     """Computes a score trace from the optimization trace."""
 
     # Use the first GenerationStep's best found point as baseline. Sometimes (ex. in
     # a timeout) the first GenerationStep will not have not completed and we will not
     # have enough trials; in this case we do not score.
-    if (len(optimization_trace) <= num_baseline_trials) or not isinstance(
-        problem, BenchmarkProblemWithKnownOptimum
-    ):
+    if len(optimization_trace) <= num_baseline_trials:
         return np.full(len(optimization_trace), np.nan)
     optimum = problem.optimal_value
     baseline = optimization_trace[num_baseline_trials - 1]
@@ -77,7 +72,7 @@ def compute_score_trace(
 
 
 def _create_benchmark_experiment(
-    problem: BenchmarkProblemProtocol, method_name: str
+    problem: BenchmarkProblem, method_name: str
 ) -> Experiment:
     """Creates an empty experiment for the given problem and method.
 
@@ -117,7 +112,7 @@ def _create_benchmark_experiment(
 
 
 def benchmark_replication(
-    problem: BenchmarkProblemProtocol,
+    problem: BenchmarkProblem,
     method: BenchmarkMethod,
     seed: int,
 ) -> BenchmarkResult:
@@ -192,7 +187,7 @@ def benchmark_replication(
 
 
 def benchmark_one_method_problem(
-    problem: BenchmarkProblemProtocol,
+    problem: BenchmarkProblem,
     method: BenchmarkMethod,
     seeds: Iterable[int],
 ) -> AggregatedBenchmarkResult:
@@ -205,7 +200,7 @@ def benchmark_one_method_problem(
 
 
 def benchmark_multiple_problems_methods(
-    problems: Iterable[BenchmarkProblemProtocol],
+    problems: Iterable[BenchmarkProblem],
     methods: Iterable[BenchmarkMethod],
     seeds: Iterable[int],
 ) -> List[AggregatedBenchmarkResult]:
@@ -222,7 +217,7 @@ def benchmark_multiple_problems_methods(
 
 
 def make_ground_truth_metrics(
-    problem: BenchmarkProblemProtocol,
+    problem: BenchmarkProblem,
     include_tracking_metrics: bool = True,
 ) -> Dict[str, Metric]:
     """Makes a ground truth version for each metric defined on the problem.
diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
index 327f74a5b24..b1543b7f01b 100644
--- a/ax/benchmark/benchmark_problem.py
+++ b/ax/benchmark/benchmark_problem.py
@@ -11,17 +11,7 @@
 # in the UI.
 
 from dataclasses import dataclass, field
-from typing import (
-    Any,
-    Dict,
-    List,
-    Optional,
-    Protocol,
-    runtime_checkable,
-    Type,
-    TypeVar,
-    Union,
-)
+from typing import Any, Dict, List, Optional, Type, TypeVar, Union
 
 from ax.benchmark.metrics.base import BenchmarkMetricBase
 
@@ -72,33 +62,6 @@ def _get_name(
     return f"{base_name}{observed_noise}{dim_str}"
 
 
-@runtime_checkable
-class BenchmarkProblemProtocol(Protocol):
-    """
-    Specifies the interface any benchmark problem must adhere to.
-
-    Classes implementing this interface include BenchmarkProblem,
-    SurrogateBenchmarkProblem, and MOOSurrogateBenchmarkProblem.
-    """
-
-    name: str
-    search_space: SearchSpace
-    optimization_config: OptimizationConfig
-    num_trials: int
-    tracking_metrics: List[BenchmarkMetricBase]
-    is_noiseless: bool  # If True, evaluations are deterministic
-    observe_noise_stds: Union[
-        bool, Dict[str, bool]
-    ]  # Whether we observe the observation noise level
-    has_ground_truth: bool  # if True, evals (w/o synthetic noise) are determinstic
-    runner: Runner
-
-
-@runtime_checkable
-class BenchmarkProblemWithKnownOptimum(Protocol):
-    optimal_value: float
-
-
 @dataclass(kw_only=True, repr=True)
 class BenchmarkProblem(Base):
     """
diff --git a/ax/benchmark/tests/problems/test_surrogate_problems.py b/ax/benchmark/tests/problems/test_surrogate_problems.py
index 81b772eaae8..c9c2a334096 100644
--- a/ax/benchmark/tests/problems/test_surrogate_problems.py
+++ b/ax/benchmark/tests/problems/test_surrogate_problems.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 from ax.benchmark.benchmark import compute_score_trace
-from ax.benchmark.benchmark_problem import BenchmarkProblemProtocol
+from ax.benchmark.benchmark_problem import BenchmarkProblem
 from ax.utils.common.testutils import TestCase
 from ax.utils.testing.benchmark_stubs import get_moo_surrogate, get_soo_surrogate
 
@@ -19,12 +19,12 @@ def setUp(self) -> None:
         # print max output so errors in 'repr' can be fully shown
         self.maxDiff = None
 
-    def test_conforms_to_protocol(self) -> None:
+    def test_conforms_to_api(self) -> None:
         sbp = get_soo_surrogate()
-        self.assertIsInstance(sbp, BenchmarkProblemProtocol)
+        self.assertIsInstance(sbp, BenchmarkProblem)
 
         mbp = get_moo_surrogate()
-        self.assertIsInstance(mbp, BenchmarkProblemProtocol)
+        self.assertIsInstance(mbp, BenchmarkProblem)
 
     def test_repr(self) -> None:
 

From 79627e7c70390ea91e2b2be2279b4c76e7429a79 Mon Sep 17 00:00:00 2001
From: Elizabeth Santorella <santorella@fb.com>
Date: Sun, 28 Jul 2024 08:29:26 -0700
Subject: [PATCH 4/5] Get rid of benchmark problem class constructors (#2605)

Summary:
Pull Request resolved: https://github.com/facebook/Ax/pull/2605

Context: Benchmark problems are sometimes created with class methods `SingleObjectiveBenchmarkProblem.from_botorch`, `SingleObjectiveBenchmarkProblem.from_botorch_synthetic`, and `MultiObjectiveBenchmarkProblem.from_botorch_multi_objective`, the former two now being identical. This creates the need for some tricky type annotations; to me, replacing these with functions is obviously cleaner. This will make it easier to consolidate classes in the future.

This PR:
* Replaces `SingleObjectiveBenchmarkProblem.from_botorch` and `SingleObjectiveBenchmarkProblem.from_botorch_synthetic` with `create_single_objective_problem_from_botorch`
* Replaces `MultiObjectiveBenchmarkProblem.from_botorch_multi_objective` with `create_multi_objective_problem_from_botorch`

Differential Revision: D60284484

Reviewed By: saitcakmak
---
 ax/benchmark/benchmark_problem.py            | 368 +++++++++----------
 ax/benchmark/problems/registry.py            |  42 +--
 ax/benchmark/tests/test_benchmark.py         |   4 +-
 ax/benchmark/tests/test_benchmark_problem.py |  31 +-
 ax/utils/testing/benchmark_stubs.py          |  10 +-
 5 files changed, 216 insertions(+), 239 deletions(-)

diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
index b1543b7f01b..5d8766efe40 100644
--- a/ax/benchmark/benchmark_problem.py
+++ b/ax/benchmark/benchmark_problem.py
@@ -5,13 +5,8 @@
 
 # pyre-strict
 
-# NOTE: Do not add `from __future__ import annotations` to this file. Adding
-# `annotations` postpones evaluation of types and will break FBLearner's usage of
-# `BenchmarkProblem` as return type annotation, used for serialization and rendering
-# in the UI.
-
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Type, TypeVar, Union
+from typing import Any, Dict, List, Optional, Type, Union
 
 from ax.benchmark.metrics.base import BenchmarkMetricBase
 
@@ -37,14 +32,6 @@
 )
 from botorch.test_functions.synthetic import SyntheticTestFunction
 
-TBenchmarkProblem = TypeVar("TBenchmarkProblem", bound="BenchmarkProblem")
-TSingleObjectiveBenchmarkProblem = TypeVar(
-    "TSingleObjectiveBenchmarkProblem", bound="SingleObjectiveBenchmarkProblem"
-)
-TMultiObjectiveBenchmarkProblem = TypeVar(
-    "TMultiObjectiveBenchmarkProblem", bound="MultiObjectiveBenchmarkProblem"
-)
-
 
 def _get_name(
     test_problem: BaseTestProblem,
@@ -106,122 +93,116 @@ class BenchmarkProblem(Base):
     is_noiseless: bool
 
 
-@dataclass(kw_only=True, repr=True)
 class SingleObjectiveBenchmarkProblem(BenchmarkProblem):
-    """
-    Benchmark problem with a single objective.
+    """A `BenchmarkProblem` that supports a single objective."""
 
-    For argument descriptions, see `BenchmarkProblem`; it additionally takes a
-    `Runner`.
-    """
+    pass
 
-    @classmethod
-    def from_botorch_synthetic(
-        cls: Type[TSingleObjectiveBenchmarkProblem],
-        test_problem_class: Type[SyntheticTestFunction],
-        test_problem_kwargs: Dict[str, Any],
-        lower_is_better: bool,
-        num_trials: int,
-        observe_noise_sd: bool = False,
-    ) -> TSingleObjectiveBenchmarkProblem:
-        """
-        Create a BenchmarkProblem from a BoTorch BaseTestProblem using
-        specialized Metrics and Runners. The test problem's result will be
-        computed on the Runner and retrieved by the Metric.
-
-        Args:
-            test_problem_class: The BoTorch test problem class which will be used
-                to define the `search_space`, `optimization_config`, and `runner`.
-            test_problem_kwargs: Keyword arguments used to instantiate the
-                `test_problem_class`.
-            num_trials: Simply the `num_trials` of the `BenchmarkProblem` created.
-            observe_noise_sd: Whether the standard deviation of the observation noise is
-                observed or not (in which case it must be inferred by the model).
-                This is separate from whether synthetic noise is added to the
-                problem, which is controlled by the `noise_std` of the test problem.
-        """
-
-        # pyre-fixme [45]: Invalid class instantiation
-        test_problem = test_problem_class(**test_problem_kwargs)
-        is_constrained = isinstance(test_problem, ConstrainedBaseTestProblem)
-
-        search_space = SearchSpace(
-            parameters=[
-                RangeParameter(
-                    name=f"x{i}",
-                    parameter_type=ParameterType.FLOAT,
-                    lower=lower,
-                    upper=upper,
-                )
-                for i, (lower, upper) in enumerate(test_problem._bounds)
-            ]
-        )
 
-        dim = test_problem_kwargs.get("dim", None)
-        name = _get_name(
-            test_problem=test_problem, observe_noise_sd=observe_noise_sd, dim=dim
-        )
+def create_single_objective_problem_from_botorch(
+    test_problem_class: Type[SyntheticTestFunction],
+    test_problem_kwargs: Dict[str, Any],
+    lower_is_better: bool,
+    num_trials: int,
+    observe_noise_sd: bool = False,
+) -> SingleObjectiveBenchmarkProblem:
+    """
+    Create a BenchmarkProblem from a BoTorch BaseTestProblem using
+    specialized Metrics and Runners. The test problem's result will be
+    computed on the Runner and retrieved by the Metric.
 
-        # TODO: Support constrained MOO problems.
+    Args:
+        test_problem_class: The BoTorch test problem class which will be used
+            to define the `search_space`, `optimization_config`, and `runner`.
+        test_problem_kwargs: Keyword arguments used to instantiate the
+            `test_problem_class`.
+        num_trials: Simply the `num_trials` of the `BenchmarkProblem` created.
+        observe_noise_sd: Whether the standard deviation of the observation noise is
+            observed or not (in which case it must be inferred by the model).
+            This is separate from whether synthetic noise is added to the
+            problem, which is controlled by the `noise_std` of the test problem.
+    """
+    # pyre-fixme [45]: Invalid class instantiation
+    test_problem = test_problem_class(**test_problem_kwargs)
+    is_constrained = isinstance(test_problem, ConstrainedBaseTestProblem)
+
+    search_space = SearchSpace(
+        parameters=[
+            RangeParameter(
+                name=f"x{i}",
+                parameter_type=ParameterType.FLOAT,
+                lower=lower,
+                upper=upper,
+            )
+            for i, (lower, upper) in enumerate(test_problem._bounds)
+        ]
+    )
 
-        objective = Objective(
-            metric=BenchmarkMetric(
-                name=name,
-                lower_is_better=lower_is_better,
-                observe_noise_sd=observe_noise_sd,
-                outcome_index=0,
-            ),
-            minimize=lower_is_better,
-        )
+    dim = test_problem_kwargs.get("dim", None)
+    name = _get_name(
+        test_problem=test_problem, observe_noise_sd=observe_noise_sd, dim=dim
+    )
 
-        outcome_names = [name]
-        outcome_constraints = []
-
-        # NOTE: Currently we don't support the case where only some of the
-        # outcomes have noise levels observed.
-
-        if is_constrained:
-            for i in range(test_problem.num_constraints):
-                outcome_name = f"constraint_slack_{i}"
-                outcome_constraints.append(
-                    OutcomeConstraint(
-                        metric=BenchmarkMetric(
-                            name=outcome_name,
-                            lower_is_better=False,  # positive slack = feasible
-                            observe_noise_sd=observe_noise_sd,
-                            outcome_index=i,
-                        ),
-                        op=ComparisonOp.GEQ,
-                        bound=0.0,
-                        relative=False,
-                    )
-                )
-                outcome_names.append(outcome_name)
+    # TODO: Support constrained MOO problems.
 
-        optimization_config = OptimizationConfig(
-            objective=objective,
-            outcome_constraints=outcome_constraints,
-        )
-        optimal_value = (
-            test_problem.max_hv
-            if isinstance(test_problem, MultiObjectiveTestProblem)
-            else test_problem.optimal_value
-        )
-        return cls(
+    objective = Objective(
+        metric=BenchmarkMetric(
             name=name,
-            search_space=search_space,
-            optimization_config=optimization_config,
-            runner=BotorchTestProblemRunner(
-                test_problem_class=test_problem_class,
-                test_problem_kwargs=test_problem_kwargs,
-                outcome_names=outcome_names,
-            ),
-            num_trials=num_trials,
-            observe_noise_stds=observe_noise_sd,
-            is_noiseless=test_problem.noise_std in (None, 0.0),
-            has_ground_truth=True,  # all synthetic problems have ground truth
-            optimal_value=optimal_value,
-        )
+            lower_is_better=lower_is_better,
+            observe_noise_sd=observe_noise_sd,
+            outcome_index=0,
+        ),
+        minimize=lower_is_better,
+    )
+
+    outcome_names = [name]
+    outcome_constraints = []
+
+    # NOTE: Currently we don't support the case where only some of the
+    # outcomes have noise levels observed.
+
+    if is_constrained:
+        for i in range(test_problem.num_constraints):
+            outcome_name = f"constraint_slack_{i}"
+            outcome_constraints.append(
+                OutcomeConstraint(
+                    metric=BenchmarkMetric(
+                        name=outcome_name,
+                        lower_is_better=False,  # positive slack = feasible
+                        observe_noise_sd=observe_noise_sd,
+                        outcome_index=i,
+                    ),
+                    op=ComparisonOp.GEQ,
+                    bound=0.0,
+                    relative=False,
+                )
+            )
+            outcome_names.append(outcome_name)
+
+    optimization_config = OptimizationConfig(
+        objective=objective,
+        outcome_constraints=outcome_constraints,
+    )
+    optimal_value = (
+        test_problem.max_hv
+        if isinstance(test_problem, MultiObjectiveTestProblem)
+        else test_problem.optimal_value
+    )
+    return SingleObjectiveBenchmarkProblem(
+        name=name,
+        search_space=search_space,
+        optimization_config=optimization_config,
+        runner=BotorchTestProblemRunner(
+            test_problem_class=test_problem_class,
+            test_problem_kwargs=test_problem_kwargs,
+            outcome_names=outcome_names,
+        ),
+        num_trials=num_trials,
+        observe_noise_stds=observe_noise_sd,
+        is_noiseless=test_problem.noise_std in (None, 0.0),
+        has_ground_truth=True,  # all synthetic problems have ground truth
+        optimal_value=optimal_value,
+    )
 
 
 @dataclass(kw_only=True, repr=True)
@@ -239,82 +220,79 @@ class MultiObjectiveBenchmarkProblem(BenchmarkProblem):
     reference_point: List[float]
     optimization_config: MultiObjectiveOptimizationConfig
 
-    @classmethod
-    def from_botorch_multi_objective(
-        cls: Type[TMultiObjectiveBenchmarkProblem],
-        test_problem_class: Type[MultiObjectiveTestProblem],
-        test_problem_kwargs: Dict[str, Any],
-        # TODO: Figure out whether we should use `lower_is_better` here.
-        num_trials: int,
-        observe_noise_sd: bool = False,
-    ) -> TMultiObjectiveBenchmarkProblem:
-        """Create a BenchmarkProblem from a BoTorch BaseTestProblem using specialized
-        Metrics and Runners. The test problem's result will be computed on the Runner
-        once per trial and each Metric will retrieve its own result by index.
-        """
-        if issubclass(test_problem_class, ConstrainedBaseTestProblem):
-            raise NotImplementedError(
-                "Constrained multi-objective problems are not supported."
-            )
 
-        # pyre-fixme [45]: Invalid class instantiation
-        test_problem = test_problem_class(**test_problem_kwargs)
+def create_multi_objective_problem_from_botorch(
+    test_problem_class: Type[MultiObjectiveTestProblem],
+    test_problem_kwargs: Dict[str, Any],
+    # TODO: Figure out whether we should use `lower_is_better` here.
+    num_trials: int,
+    observe_noise_sd: bool = False,
+) -> MultiObjectiveBenchmarkProblem:
+    """Create a BenchmarkProblem from a BoTorch BaseTestProblem using specialized
+    Metrics and Runners. The test problem's result will be computed on the Runner
+    once per trial and each Metric will retrieve its own result by index.
+    """
+    if issubclass(test_problem_class, ConstrainedBaseTestProblem):
+        raise NotImplementedError(
+            "Constrained multi-objective problems are not supported."
+        )
 
-        problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
-            # pyre-fixme [6]: Passing a multi-objective problem where a
-            # single-objective problem is expected.
-            test_problem_class=test_problem_class,
-            test_problem_kwargs=test_problem_kwargs,
-            lower_is_better=True,  # Seems like we always assume minimization for MOO?
-            num_trials=num_trials,
+    # pyre-fixme [45]: Invalid class instantiation
+    test_problem = test_problem_class(**test_problem_kwargs)
+
+    problem = create_single_objective_problem_from_botorch(
+        # pyre-fixme [6]: Passing a multi-objective problem where a
+        # single-objective problem is expected.
+        test_problem_class=test_problem_class,
+        test_problem_kwargs=test_problem_kwargs,
+        lower_is_better=True,  # Seems like we always assume minimization for MOO?
+        num_trials=num_trials,
+        observe_noise_sd=observe_noise_sd,
+    )
+
+    name = problem.name
+
+    n_obj = test_problem.num_objectives
+    if not observe_noise_sd:
+        noise_sds = [None] * n_obj
+    elif isinstance(test_problem.noise_std, list):
+        noise_sds = test_problem.noise_std
+    else:
+        noise_sds = [checked_cast(float, test_problem.noise_std or 0.0)] * n_obj
+
+    metrics = [
+        BenchmarkMetric(
+            name=f"{name}_{i}",
+            lower_is_better=True,
             observe_noise_sd=observe_noise_sd,
+            outcome_index=i,
         )
-
-        name = problem.name
-
-        n_obj = test_problem.num_objectives
-        if not observe_noise_sd:
-            noise_sds = [None] * n_obj
-        elif isinstance(test_problem.noise_std, list):
-            noise_sds = test_problem.noise_std
-        else:
-            noise_sds = [checked_cast(float, test_problem.noise_std or 0.0)] * n_obj
-
-        metrics = [
-            BenchmarkMetric(
-                name=f"{name}_{i}",
-                lower_is_better=True,
-                observe_noise_sd=observe_noise_sd,
-                outcome_index=i,
+        for i, noise_sd in enumerate(noise_sds)
+    ]
+    optimization_config = MultiObjectiveOptimizationConfig(
+        objective=MultiObjective(
+            objectives=[Objective(metric=metric, minimize=True) for metric in metrics]
+        ),
+        objective_thresholds=[
+            ObjectiveThreshold(
+                metric=metric,
+                bound=test_problem.ref_point[i].item(),
+                relative=False,
+                op=ComparisonOp.LEQ,
             )
-            for i, noise_sd in enumerate(noise_sds)
-        ]
-        optimization_config = MultiObjectiveOptimizationConfig(
-            objective=MultiObjective(
-                objectives=[
-                    Objective(metric=metric, minimize=True) for metric in metrics
-                ]
-            ),
-            objective_thresholds=[
-                ObjectiveThreshold(
-                    metric=metric,
-                    bound=test_problem.ref_point[i].item(),
-                    relative=False,
-                    op=ComparisonOp.LEQ,
-                )
-                for i, metric in enumerate(metrics)
-            ],
-        )
-
-        return cls(
-            name=name,
-            search_space=problem.search_space,
-            optimization_config=optimization_config,
-            runner=problem.runner,
-            num_trials=num_trials,
-            is_noiseless=problem.is_noiseless,
-            observe_noise_stds=observe_noise_sd,
-            has_ground_truth=problem.has_ground_truth,
-            optimal_value=test_problem.max_hv,
-            reference_point=test_problem._ref_point,
-        )
+            for i, metric in enumerate(metrics)
+        ],
+    )
+
+    return MultiObjectiveBenchmarkProblem(
+        name=name,
+        search_space=problem.search_space,
+        optimization_config=optimization_config,
+        runner=problem.runner,
+        num_trials=num_trials,
+        is_noiseless=problem.is_noiseless,
+        observe_noise_stds=observe_noise_sd,
+        has_ground_truth=problem.has_ground_truth,
+        optimal_value=test_problem.max_hv,
+        reference_point=test_problem._ref_point,
+    )
diff --git a/ax/benchmark/problems/registry.py b/ax/benchmark/problems/registry.py
index 4c6521af1a6..a7bc652a7b9 100644
--- a/ax/benchmark/problems/registry.py
+++ b/ax/benchmark/problems/registry.py
@@ -11,8 +11,8 @@
 
 from ax.benchmark.benchmark_problem import (
     BenchmarkProblem,
-    MultiObjectiveBenchmarkProblem,
-    SingleObjectiveBenchmarkProblem,
+    create_multi_objective_problem_from_botorch,
+    create_single_objective_problem_from_botorch,
 )
 from ax.benchmark.problems.hd_embedding import embed_higher_dimension
 from ax.benchmark.problems.hpo.torchvision import PyTorchCNNTorchvisionBenchmarkProblem
@@ -29,7 +29,7 @@ class BenchmarkProblemRegistryEntry:
 
 BENCHMARK_PROBLEM_REGISTRY = {
     "ackley4": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Ackley,
             "test_problem_kwargs": {"dim": 4},
@@ -39,7 +39,7 @@ class BenchmarkProblemRegistryEntry:
         },
     ),
     "branin": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Branin,
             "test_problem_kwargs": {},
@@ -49,7 +49,7 @@ class BenchmarkProblemRegistryEntry:
         },
     ),
     "branin_currin": BenchmarkProblemRegistryEntry(
-        factory_fn=MultiObjectiveBenchmarkProblem.from_botorch_multi_objective,
+        factory_fn=create_multi_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": BraninCurrin,
             "test_problem_kwargs": {},
@@ -59,7 +59,7 @@ class BenchmarkProblemRegistryEntry:
     ),
     "branin_currin30": BenchmarkProblemRegistryEntry(
         factory_fn=lambda n, num_trials: embed_higher_dimension(
-            problem=MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
+            problem=create_multi_objective_problem_from_botorch(
                 test_problem_class=BraninCurrin,
                 test_problem_kwargs={},
                 num_trials=num_trials,
@@ -70,7 +70,7 @@ class BenchmarkProblemRegistryEntry:
         factory_kwargs={"n": 30, "num_trials": 30},
     ),
     "griewank4": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Griewank,
             "test_problem_kwargs": {"dim": 4},
@@ -80,7 +80,7 @@ class BenchmarkProblemRegistryEntry:
         },
     ),
     "hartmann3": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Hartmann,
             "test_problem_kwargs": {"dim": 3},
@@ -90,7 +90,7 @@ class BenchmarkProblemRegistryEntry:
         },
     ),
     "hartmann6": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Hartmann,
             "test_problem_kwargs": {"dim": 6},
@@ -101,7 +101,7 @@ class BenchmarkProblemRegistryEntry:
     ),
     "hartmann30": BenchmarkProblemRegistryEntry(
         factory_fn=lambda n, num_trials: embed_higher_dimension(
-            problem=SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+            problem=create_single_objective_problem_from_botorch(
                 test_problem_class=synthetic.Hartmann,
                 test_problem_kwargs={"dim": 6},
                 lower_is_better=True,
@@ -131,7 +131,7 @@ class BenchmarkProblemRegistryEntry:
         factory_kwargs={"num_trials": 50, "observe_noise_sd": False},
     ),
     "levy4": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Levy,
             "test_problem_kwargs": {"dim": 4},
@@ -141,7 +141,7 @@ class BenchmarkProblemRegistryEntry:
         },
     ),
     "powell4": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Powell,
             "test_problem_kwargs": {"dim": 4},
@@ -151,7 +151,7 @@ class BenchmarkProblemRegistryEntry:
         },
     ),
     "rosenbrock4": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Rosenbrock,
             "test_problem_kwargs": {"dim": 4},
@@ -161,7 +161,7 @@ class BenchmarkProblemRegistryEntry:
         },
     ),
     "six_hump_camel": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.SixHumpCamel,
             "test_problem_kwargs": {},
@@ -171,7 +171,7 @@ class BenchmarkProblemRegistryEntry:
         },
     ),
     "three_hump_camel": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.ThreeHumpCamel,
             "test_problem_kwargs": {},
@@ -182,7 +182,7 @@ class BenchmarkProblemRegistryEntry:
     ),
     # Problems where we observe the noise level
     "branin_observed_noise": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Branin,
             "test_problem_kwargs": {},
@@ -192,7 +192,7 @@ class BenchmarkProblemRegistryEntry:
         },
     ),
     "branin_currin_observed_noise": BenchmarkProblemRegistryEntry(
-        factory_fn=MultiObjectiveBenchmarkProblem.from_botorch_multi_objective,
+        factory_fn=create_multi_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": BraninCurrin,
             "test_problem_kwargs": {},
@@ -202,7 +202,7 @@ class BenchmarkProblemRegistryEntry:
     ),
     "branin_currin30_observed_noise": BenchmarkProblemRegistryEntry(
         factory_fn=lambda n, num_trials: embed_higher_dimension(
-            problem=MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
+            problem=create_multi_objective_problem_from_botorch(
                 test_problem_class=BraninCurrin,
                 test_problem_kwargs={},
                 num_trials=num_trials,
@@ -213,7 +213,7 @@ class BenchmarkProblemRegistryEntry:
         factory_kwargs={"n": 30, "num_trials": 30},
     ),
     "hartmann6_observed_noise": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.Hartmann,
             "test_problem_kwargs": {"dim": 6},
@@ -224,7 +224,7 @@ class BenchmarkProblemRegistryEntry:
     ),
     "hartmann30_observed_noise": BenchmarkProblemRegistryEntry(
         factory_fn=lambda n, num_trials: embed_higher_dimension(
-            problem=SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+            problem=create_single_objective_problem_from_botorch(
                 test_problem_class=synthetic.Hartmann,
                 test_problem_kwargs={"dim": 6},
                 lower_is_better=True,
@@ -240,7 +240,7 @@ class BenchmarkProblemRegistryEntry:
         factory_kwargs={"num_trials": 25, "observe_noise_sd": True},
     ),
     "constrained_gramacy_observed_noise": BenchmarkProblemRegistryEntry(
-        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_fn=create_single_objective_problem_from_botorch,
         factory_kwargs={
             "test_problem_class": synthetic.ConstrainedGramacy,
             "test_problem_kwargs": {},
diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py
index e5a184e9494..3d0ae2eeda3 100644
--- a/ax/benchmark/tests/test_benchmark.py
+++ b/ax/benchmark/tests/test_benchmark.py
@@ -21,7 +21,7 @@
     BenchmarkMethod,
     get_benchmark_scheduler_options,
 )
-from ax.benchmark.benchmark_problem import SingleObjectiveBenchmarkProblem
+from ax.benchmark.benchmark_problem import create_single_objective_problem_from_botorch
 from ax.benchmark.benchmark_result import BenchmarkResult
 from ax.benchmark.methods.modular_botorch import get_sobol_botorch_modular_acquisition
 from ax.benchmark.metrics.base import GroundTruthMetricMixin
@@ -439,7 +439,7 @@ def test_benchmark_multiple_problems_methods(self) -> None:
                 self.assertTrue((agg.score_trace[col] <= 100).all())
 
     def test_timeout(self) -> None:
-        problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+        problem = create_single_objective_problem_from_botorch(
             test_problem_class=Branin,
             test_problem_kwargs={},
             lower_is_better=True,
diff --git a/ax/benchmark/tests/test_benchmark_problem.py b/ax/benchmark/tests/test_benchmark_problem.py
index 4df3f267202..640e5f721be 100644
--- a/ax/benchmark/tests/test_benchmark_problem.py
+++ b/ax/benchmark/tests/test_benchmark_problem.py
@@ -8,15 +8,18 @@
 from typing import List, Optional, Union
 
 from ax.benchmark.benchmark_problem import (
-    MultiObjectiveBenchmarkProblem,
-    SingleObjectiveBenchmarkProblem,
+    create_multi_objective_problem_from_botorch,
+    create_single_objective_problem_from_botorch,
 )
 from ax.benchmark.metrics.benchmark import BenchmarkMetric
 from ax.benchmark.runners.botorch_test import BotorchTestProblemRunner
 from ax.core.types import ComparisonOp
 from ax.utils.common.testutils import TestCase
 from ax.utils.common.typeutils import checked_cast
-from botorch.test_functions.multi_objective import BraninCurrin, ConstrainedBraninCurrin
+from ax.utils.testing.benchmark_stubs import (
+    get_constrained_multi_objective_benchmark_problem,
+)
+from botorch.test_functions.multi_objective import BraninCurrin
 from botorch.test_functions.synthetic import (
     Ackley,
     ConstrainedGramacy,
@@ -34,7 +37,7 @@ def setUp(self) -> None:
 
     def test_single_objective_from_botorch(self) -> None:
         for botorch_test_problem in [Ackley(), ConstrainedHartmann(dim=6)]:
-            test_problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+            test_problem = create_single_objective_problem_from_botorch(
                 test_problem_class=botorch_test_problem.__class__,
                 test_problem_kwargs={},
                 lower_is_better=True,
@@ -131,7 +134,7 @@ def test_constrained_from_botorch(
         objective_noise_std: Optional[float],
         constraint_noise_std: Optional[Union[float, List[float]]],
     ) -> None:
-        ax_problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+        ax_problem = create_single_objective_problem_from_botorch(
             test_problem_class=ConstrainedGramacy,
             test_problem_kwargs={
                 "noise_std": objective_noise_std,
@@ -167,12 +170,10 @@ def test_constrained_from_botorch(
 
     def test_moo_from_botorch(self) -> None:
         test_problem = BraninCurrin()
-        branin_currin_problem = (
-            MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
-                test_problem_class=test_problem.__class__,
-                test_problem_kwargs={},
-                num_trials=1,
-            )
+        branin_currin_problem = create_multi_objective_problem_from_botorch(
+            test_problem_class=test_problem.__class__,
+            test_problem_kwargs={},
+            num_trials=1,
         )
 
         # Test search space
@@ -209,14 +210,10 @@ def test_moo_from_botorch_constrained(self) -> None:
             NotImplementedError,
             "Constrained multi-objective problems are not supported.",
         ):
-            MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
-                test_problem_class=ConstrainedBraninCurrin,
-                test_problem_kwargs={},
-                num_trials=1,
-            )
+            get_constrained_multi_objective_benchmark_problem()
 
     def test_maximization_problem(self) -> None:
-        test_problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+        test_problem = create_single_objective_problem_from_botorch(
             test_problem_class=Cosine8,
             lower_is_better=False,
             num_trials=1,
diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py
index d092963c3a1..663b25dcb13 100644
--- a/ax/utils/testing/benchmark_stubs.py
+++ b/ax/utils/testing/benchmark_stubs.py
@@ -11,6 +11,8 @@
 import numpy as np
 from ax.benchmark.benchmark_method import BenchmarkMethod
 from ax.benchmark.benchmark_problem import (
+    create_multi_objective_problem_from_botorch,
+    create_single_objective_problem_from_botorch,
     MultiObjectiveBenchmarkProblem,
     SingleObjectiveBenchmarkProblem,
 )
@@ -44,7 +46,7 @@
 
 
 def get_benchmark_problem() -> SingleObjectiveBenchmarkProblem:
-    return SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+    return create_single_objective_problem_from_botorch(
         test_problem_class=Branin,
         test_problem_kwargs={},
         lower_is_better=True,
@@ -57,7 +59,7 @@ def get_single_objective_benchmark_problem(
     num_trials: int = 4,
     test_problem_kwargs: Optional[Dict[str, Any]] = None,
 ) -> SingleObjectiveBenchmarkProblem:
-    return SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+    return create_single_objective_problem_from_botorch(
         test_problem_class=Branin,
         test_problem_kwargs=test_problem_kwargs or {},
         lower_is_better=True,
@@ -69,7 +71,7 @@ def get_single_objective_benchmark_problem(
 def get_multi_objective_benchmark_problem(
     observe_noise_sd: bool = False, num_trials: int = 4
 ) -> MultiObjectiveBenchmarkProblem:
-    return MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
+    return create_multi_objective_problem_from_botorch(
         test_problem_class=BraninCurrin,
         test_problem_kwargs={},
         num_trials=num_trials,
@@ -80,7 +82,7 @@ def get_multi_objective_benchmark_problem(
 def get_constrained_multi_objective_benchmark_problem(
     observe_noise_sd: bool = False, num_trials: int = 4
 ) -> MultiObjectiveBenchmarkProblem:
-    return MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
+    return create_multi_objective_problem_from_botorch(
         test_problem_class=ConstrainedBraninCurrin,
         test_problem_kwargs={},
         num_trials=num_trials,

From 646cef4aa80b2460315003ad9e3ac9e8cf38b013 Mon Sep 17 00:00:00 2001
From: Elizabeth Santorella <santorella@meta.com>
Date: Sun, 28 Jul 2024 09:06:32 -0700
Subject: [PATCH 5/5] Get rid of `SingleObjectiveBenchmarkProblem` (#2606)

Summary:
Pull Request resolved: https://github.com/facebook/Ax/pull/2606

The class is not adding anything. We should check if problems are single-objective by looking at the type of their `optimization_config`.

This PR:
* Removes `SingleObjectiveBenchmarkProblem`
* Replaces all references to it with `BenchmarkProblem`
* Removes `get_benchmark_problem`, which is redundant with `get_single_objective_benchmark_problem`

Reviewed By: dme65

Differential Revision: D60285191
---
 ax/benchmark/benchmark_problem.py             | 17 ++++-----
 ax/benchmark/problems/hpo/pytorch_cnn.py      |  4 +--
 .../synthetic/discretized/mixed_integer.py    | 12 +++----
 .../problems/synthetic/hss/jenatton.py        |  6 ++--
 ax/benchmark/tests/test_benchmark_problem.py  |  4 +--
 ax/storage/json_store/encoders.py             | 36 ++-----------------
 ax/storage/json_store/registry.py             |  4 ---
 .../json_store/tests/test_json_store.py       |  3 +-
 ax/utils/testing/benchmark_stubs.py           | 13 ++-----
 9 files changed, 25 insertions(+), 74 deletions(-)

diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
index 5d8766efe40..d7cf78cc46a 100644
--- a/ax/benchmark/benchmark_problem.py
+++ b/ax/benchmark/benchmark_problem.py
@@ -93,23 +93,18 @@ class BenchmarkProblem(Base):
     is_noiseless: bool
 
 
-class SingleObjectiveBenchmarkProblem(BenchmarkProblem):
-    """A `BenchmarkProblem` that supports a single objective."""
-
-    pass
-
-
 def create_single_objective_problem_from_botorch(
     test_problem_class: Type[SyntheticTestFunction],
     test_problem_kwargs: Dict[str, Any],
     lower_is_better: bool,
     num_trials: int,
     observe_noise_sd: bool = False,
-) -> SingleObjectiveBenchmarkProblem:
+) -> BenchmarkProblem:
     """
-    Create a BenchmarkProblem from a BoTorch BaseTestProblem using
-    specialized Metrics and Runners. The test problem's result will be
-    computed on the Runner and retrieved by the Metric.
+    Create a `BenchmarkProblem` whose `optimization_config` is a
+    `SingleObjectiveOptimizationConfig` a BoTorch SyntheticTestFunction using
+    specialized Metrics and Runners for benchmarking. The test problem's result
+    will be computed on the Runner and retrieved by the Metric.
 
     Args:
         test_problem_class: The BoTorch test problem class which will be used
@@ -188,7 +183,7 @@ def create_single_objective_problem_from_botorch(
         if isinstance(test_problem, MultiObjectiveTestProblem)
         else test_problem.optimal_value
     )
-    return SingleObjectiveBenchmarkProblem(
+    return BenchmarkProblem(
         name=name,
         search_space=search_space,
         optimization_config=optimization_config,
diff --git a/ax/benchmark/problems/hpo/pytorch_cnn.py b/ax/benchmark/problems/hpo/pytorch_cnn.py
index 0a5db5dbb8b..15da5b9d30e 100644
--- a/ax/benchmark/problems/hpo/pytorch_cnn.py
+++ b/ax/benchmark/problems/hpo/pytorch_cnn.py
@@ -9,7 +9,7 @@
 
 import pandas as pd
 import torch
-from ax.benchmark.benchmark_problem import SingleObjectiveBenchmarkProblem
+from ax.benchmark.benchmark_problem import BenchmarkProblem
 from ax.core.base_trial import BaseTrial, TrialStatus
 from ax.core.data import Data
 from ax.core.metric import Metric, MetricFetchE, MetricFetchResult
@@ -26,7 +26,7 @@
 from torch.utils.data import DataLoader, Dataset
 
 
-class PyTorchCNNBenchmarkProblem(SingleObjectiveBenchmarkProblem):
+class PyTorchCNNBenchmarkProblem(BenchmarkProblem):
     @equality_typechecker
     def __eq__(self, other: Base) -> bool:
         if not isinstance(other, PyTorchCNNBenchmarkProblem):
diff --git a/ax/benchmark/problems/synthetic/discretized/mixed_integer.py b/ax/benchmark/problems/synthetic/discretized/mixed_integer.py
index b7bcaeb9080..4f72e18fffb 100644
--- a/ax/benchmark/problems/synthetic/discretized/mixed_integer.py
+++ b/ax/benchmark/problems/synthetic/discretized/mixed_integer.py
@@ -20,7 +20,7 @@
 
 from typing import Dict, List, Optional, Tuple, Type, Union
 
-from ax.benchmark.benchmark_problem import SingleObjectiveBenchmarkProblem
+from ax.benchmark.benchmark_problem import BenchmarkProblem
 from ax.benchmark.metrics.benchmark import BenchmarkMetric
 from ax.benchmark.runners.botorch_test import BotorchTestProblemRunner
 from ax.core.objective import Objective
@@ -47,7 +47,7 @@ def _get_problem_from_common_inputs(
     num_trials: int,
     optimal_value: float,
     test_problem_bounds: Optional[List[Tuple[float, float]]] = None,
-) -> SingleObjectiveBenchmarkProblem:
+) -> BenchmarkProblem:
     """This is a helper that deduplicates common bits of the below problems.
 
     Args:
@@ -111,7 +111,7 @@ def _get_problem_from_common_inputs(
         outcome_names=[metric_name],
         modified_bounds=bounds,
     )
-    return SingleObjectiveBenchmarkProblem(
+    return BenchmarkProblem(
         name=benchmark_name + ("_observed_noise" if observe_noise_sd else ""),
         search_space=search_space,
         optimization_config=optimization_config,
@@ -128,7 +128,7 @@ def get_discrete_hartmann(
     num_trials: int = 50,
     observe_noise_sd: bool = False,
     bounds: Optional[List[Tuple[float, float]]] = None,
-) -> SingleObjectiveBenchmarkProblem:
+) -> BenchmarkProblem:
     """6D Hartmann problem where first 4 dimensions are discretized."""
     dim_int = 4
     if bounds is None:
@@ -160,7 +160,7 @@ def get_discrete_ackley(
     num_trials: int = 50,
     observe_noise_sd: bool = False,
     bounds: Optional[List[Tuple[float, float]]] = None,
-) -> SingleObjectiveBenchmarkProblem:
+) -> BenchmarkProblem:
     """13D Ackley problem where first 10 dimensions are discretized.
 
     This also restricts Ackley evaluation bounds to [0, 1].
@@ -193,7 +193,7 @@ def get_discrete_rosenbrock(
     num_trials: int = 50,
     observe_noise_sd: bool = False,
     bounds: Optional[List[Tuple[float, float]]] = None,
-) -> SingleObjectiveBenchmarkProblem:
+) -> BenchmarkProblem:
     """10D Rosenbrock problem where first 6 dimensions are discretized."""
     dim_int = 6
     if bounds is None:
diff --git a/ax/benchmark/problems/synthetic/hss/jenatton.py b/ax/benchmark/problems/synthetic/hss/jenatton.py
index f424db30904..f545ac39400 100644
--- a/ax/benchmark/problems/synthetic/hss/jenatton.py
+++ b/ax/benchmark/problems/synthetic/hss/jenatton.py
@@ -5,7 +5,7 @@
 
 # pyre-strict
 
-from ax.benchmark.benchmark_problem import SingleObjectiveBenchmarkProblem
+from ax.benchmark.benchmark_problem import BenchmarkProblem
 from ax.benchmark.metrics.jenatton import JenattonMetric
 from ax.core.objective import Objective
 from ax.core.optimization_config import OptimizationConfig
@@ -17,7 +17,7 @@
 def get_jenatton_benchmark_problem(
     num_trials: int = 50,
     observe_noise_sd: bool = False,
-) -> SingleObjectiveBenchmarkProblem:
+) -> BenchmarkProblem:
     search_space = HierarchicalSearchSpace(
         parameters=[
             ChoiceParameter(
@@ -65,7 +65,7 @@ def get_jenatton_benchmark_problem(
 
     name = "Jenatton" + ("_observed_noise" if observe_noise_sd else "")
 
-    return SingleObjectiveBenchmarkProblem(
+    return BenchmarkProblem(
         name=name,
         search_space=search_space,
         optimization_config=optimization_config,
diff --git a/ax/benchmark/tests/test_benchmark_problem.py b/ax/benchmark/tests/test_benchmark_problem.py
index 640e5f721be..cdd5a931a80 100644
--- a/ax/benchmark/tests/test_benchmark_problem.py
+++ b/ax/benchmark/tests/test_benchmark_problem.py
@@ -85,7 +85,7 @@ def test_single_objective_from_botorch(self) -> None:
                     test_problem.optimization_config.outcome_constraints, []
                 )
                 expected_repr = (
-                    "SingleObjectiveBenchmarkProblem(name='Ackley', "
+                    "BenchmarkProblem(name='Ackley', "
                     "optimization_config=OptimizationConfig(objective=Objective("
                     'metric_name="Ackley", '
                     "minimize=True), outcome_constraints=[]), "
@@ -105,7 +105,7 @@ def test_single_objective_from_botorch(self) -> None:
                 self.assertFalse(outcome_constraint.relative)
                 self.assertEqual(outcome_constraint.bound, 0.0)
                 expected_repr = (
-                    "SingleObjectiveBenchmarkProblem(name='ConstrainedHartmann', "
+                    "BenchmarkProblem(name='ConstrainedHartmann', "
                     "optimization_config=OptimizationConfig(objective=Objective("
                     'metric_name="ConstrainedHartmann", minimize=True), '
                     "outcome_constraints=[OutcomeConstraint(constraint_slack_0"
diff --git a/ax/storage/json_store/encoders.py b/ax/storage/json_store/encoders.py
index dbfb20311e2..bbd36271aa9 100644
--- a/ax/storage/json_store/encoders.py
+++ b/ax/storage/json_store/encoders.py
@@ -14,7 +14,6 @@
 from ax.benchmark.benchmark_problem import (
     BenchmarkProblem,
     MultiObjectiveBenchmarkProblem,
-    SingleObjectiveBenchmarkProblem,
 )
 from ax.benchmark.problems.hpo.torchvision import PyTorchCNNTorchvisionBenchmarkProblem
 from ax.core import ObservationFeatures
@@ -155,38 +154,9 @@ def multi_objective_benchmark_problem_to_dict(
     moo_benchmark_problem: MultiObjectiveBenchmarkProblem,
 ) -> Dict[str, Any]:
     """Convert Ax multi-objective benchmark problem to a dictionary."""
-    return {
-        "__type": moo_benchmark_problem.__class__.__name__,
-        "name": moo_benchmark_problem.name,
-        "search_space": moo_benchmark_problem.search_space,
-        "optimization_config": moo_benchmark_problem.optimization_config,
-        "runner": moo_benchmark_problem.runner,
-        "num_trials": moo_benchmark_problem.num_trials,
-        "is_noiseless": moo_benchmark_problem.is_noiseless,
-        "observe_noise_stds": moo_benchmark_problem.observe_noise_stds,
-        "has_ground_truth": moo_benchmark_problem.has_ground_truth,
-        "tracking_metrics": moo_benchmark_problem.tracking_metrics,
-        "optimal_value": moo_benchmark_problem.optimal_value,
-        "reference_point": moo_benchmark_problem.reference_point,
-    }
-
-
-def single_objective_benchmark_problem_to_dict(
-    soo_benchmark_problem: SingleObjectiveBenchmarkProblem,
-) -> Dict[str, Any]:
-    return {
-        "__type": soo_benchmark_problem.__class__.__name__,
-        "name": soo_benchmark_problem.name,
-        "search_space": soo_benchmark_problem.search_space,
-        "optimization_config": soo_benchmark_problem.optimization_config,
-        "runner": soo_benchmark_problem.runner,
-        "num_trials": soo_benchmark_problem.num_trials,
-        "is_noiseless": soo_benchmark_problem.is_noiseless,
-        "observe_noise_stds": soo_benchmark_problem.observe_noise_stds,
-        "has_ground_truth": soo_benchmark_problem.has_ground_truth,
-        "tracking_metrics": soo_benchmark_problem.tracking_metrics,
-        "optimal_value": soo_benchmark_problem.optimal_value,
-    }
+    result = benchmark_problem_to_dict(moo_benchmark_problem)
+    result["reference_point"] = moo_benchmark_problem.reference_point
+    return result
 
 
 def trial_to_dict(trial: Trial) -> Dict[str, Any]:
diff --git a/ax/storage/json_store/registry.py b/ax/storage/json_store/registry.py
index 9813e14fd7d..393cddb8e32 100644
--- a/ax/storage/json_store/registry.py
+++ b/ax/storage/json_store/registry.py
@@ -14,7 +14,6 @@
 from ax.benchmark.benchmark_problem import (
     BenchmarkProblem,
     MultiObjectiveBenchmarkProblem,
-    SingleObjectiveBenchmarkProblem,
 )
 from ax.benchmark.benchmark_result import AggregatedBenchmarkResult, BenchmarkResult
 from ax.benchmark.metrics.benchmark import BenchmarkMetric, GroundTruthBenchmarkMetric
@@ -157,7 +156,6 @@
     runner_to_dict,
     scalarized_objective_to_dict,
     search_space_to_dict,
-    single_objective_benchmark_problem_to_dict,
     sum_parameter_constraint_to_dict,
     surrogate_to_dict,
     threshold_early_stopping_strategy_to_dict,
@@ -256,7 +254,6 @@
     ScalarizedObjective: scalarized_objective_to_dict,
     SearchSpace: search_space_to_dict,
     SingleDiagnosticBestModelSelector: best_model_selector_to_dict,
-    SingleObjectiveBenchmarkProblem: single_objective_benchmark_problem_to_dict,
     HierarchicalSearchSpace: search_space_to_dict,
     SumConstraint: sum_parameter_constraint_to_dict,
     Surrogate: surrogate_to_dict,
@@ -382,7 +379,6 @@
     "SchedulerOptions": SchedulerOptions,
     "SearchSpace": SearchSpace,
     "SingleDiagnosticBestModelSelector": SingleDiagnosticBestModelSelector,
-    "SingleObjectiveBenchmarkProblem": SingleObjectiveBenchmarkProblem,
     "SklearnDataset": SklearnDataset,
     "SklearnMetric": SklearnMetric,
     "SklearnModelType": SklearnModelType,
diff --git a/ax/storage/json_store/tests/test_json_store.py b/ax/storage/json_store/tests/test_json_store.py
index 3540cd3bcf3..ff56180c988 100644
--- a/ax/storage/json_store/tests/test_json_store.py
+++ b/ax/storage/json_store/tests/test_json_store.py
@@ -46,7 +46,6 @@
 from ax.utils.common.testutils import TestCase
 from ax.utils.testing.benchmark_stubs import (
     get_aggregated_benchmark_result,
-    get_benchmark_problem,
     get_benchmark_result,
     get_multi_objective_benchmark_problem,
     get_single_objective_benchmark_problem,
@@ -142,7 +141,7 @@
     ("AugmentedHartmannMetric", get_augmented_hartmann_metric),
     ("BatchTrial", get_batch_trial),
     ("BenchmarkMethod", get_sobol_gpei_benchmark_method),
-    ("BenchmarkProblem", get_benchmark_problem),
+    ("BenchmarkProblem", get_single_objective_benchmark_problem),
     ("BenchmarkResult", get_benchmark_result),
     ("BoTorchModel", get_botorch_model),
     ("BoTorchModel", get_botorch_model_with_default_acquisition_class),
diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py
index 663b25dcb13..7614df239b5 100644
--- a/ax/utils/testing/benchmark_stubs.py
+++ b/ax/utils/testing/benchmark_stubs.py
@@ -11,10 +11,10 @@
 import numpy as np
 from ax.benchmark.benchmark_method import BenchmarkMethod
 from ax.benchmark.benchmark_problem import (
+    BenchmarkProblem,
     create_multi_objective_problem_from_botorch,
     create_single_objective_problem_from_botorch,
     MultiObjectiveBenchmarkProblem,
-    SingleObjectiveBenchmarkProblem,
 )
 from ax.benchmark.benchmark_result import AggregatedBenchmarkResult, BenchmarkResult
 from ax.benchmark.problems.surrogate import (
@@ -45,20 +45,11 @@
 from botorch.test_functions.synthetic import Branin
 
 
-def get_benchmark_problem() -> SingleObjectiveBenchmarkProblem:
-    return create_single_objective_problem_from_botorch(
-        test_problem_class=Branin,
-        test_problem_kwargs={},
-        lower_is_better=True,
-        num_trials=4,
-    )
-
-
 def get_single_objective_benchmark_problem(
     observe_noise_sd: bool = False,
     num_trials: int = 4,
     test_problem_kwargs: Optional[Dict[str, Any]] = None,
-) -> SingleObjectiveBenchmarkProblem:
+) -> BenchmarkProblem:
     return create_single_objective_problem_from_botorch(
         test_problem_class=Branin,
         test_problem_kwargs=test_problem_kwargs or {},