facebook · esantorella · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/ax/benchmark/metrics/jenatton.py b/ax/benchmark/metrics/jenatton.py
@@ -5,78 +5,11 @@
 
 # pyre-strict
 
-from __future__ import annotations
+from typing import Optional
 
-from typing import Any, Optional
-
-import numpy as np
-import pandas as pd
-from ax.benchmark.metrics.base import BenchmarkMetricBase, GroundTruthMetricMixin
-from ax.core.base_trial import BaseTrial
-from ax.core.data import Data
-from ax.core.metric import MetricFetchE, MetricFetchResult
-from ax.utils.common.result import Err, Ok
 from ax.utils.common.typeutils import not_none
 
 
-class JenattonMetric(BenchmarkMetricBase):
-    """Jenatton metric for hierarchical search spaces."""
-
-    has_ground_truth: bool = True
-
-    def __init__(
-        self,
-        name: str = "jenatton",
-        noise_std: float = 0.0,
-        observe_noise_sd: bool = False,
-    ) -> None:
-        super().__init__(name=name)
-        self.noise_std = noise_std
-        self.observe_noise_sd = observe_noise_sd
-        self.lower_is_better = True
-
-    def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult:
-        try:
-            mean = [
-                jenatton_test_function(**arm.parameters)  # pyre-ignore [6]
-                for _, arm in trial.arms_by_name.items()
-            ]
-            if self.noise_std != 0:
-                mean = [m + self.noise_std * np.random.randn() for m in mean]
-            df = pd.DataFrame(
-                {
-                    "arm_name": [name for name, _ in trial.arms_by_name.items()],
-                    "metric_name": self.name,
-                    "mean": mean,
-                    "sem": self.noise_std if self.observe_noise_sd else None,
-                    "trial_index": trial.index,
-                }
-            )
-            return Ok(value=Data(df=df))
-
-        except Exception as e:
-            return Err(
-                MetricFetchE(message=f"Failed to fetch {self.name}", exception=e)
-            )
-
-    def make_ground_truth_metric(self) -> GroundTruthJenattonMetric:
-        return GroundTruthJenattonMetric(original_metric=self)
-
-
-class GroundTruthJenattonMetric(JenattonMetric, GroundTruthMetricMixin):
-    def __init__(self, original_metric: JenattonMetric) -> None:
-        """
-        Args:
-            original_metric: The original JenattonMetric to which this metric
-                corresponds.
-        """
-        super().__init__(
-            name=self.get_ground_truth_name(original_metric),
-            noise_std=0.0,
-            observe_noise_sd=False,
-        )
-
-
 def jenatton_test_function(
     x1: Optional[int] = None,
     x2: Optional[int] = None,

diff --git a/ax/benchmark/problems/synthetic/hss/jenatton.py b/ax/benchmark/problems/synthetic/hss/jenatton.py
@@ -5,18 +5,52 @@
 
 # pyre-strict
 
+from dataclasses import dataclass
+from typing import Optional
+
+import torch
 from ax.benchmark.benchmark_problem import BenchmarkProblem
-from ax.benchmark.metrics.jenatton import JenattonMetric
+from ax.benchmark.metrics.benchmark import BenchmarkMetric
+from ax.benchmark.metrics.jenatton import jenatton_test_function
+from ax.benchmark.runners.botorch_test import (
+    ParamBasedTestProblem,
+    ParamBasedTestProblemRunner,
+)
 from ax.core.objective import Objective
 from ax.core.optimization_config import OptimizationConfig
 from ax.core.parameter import ChoiceParameter, ParameterType, RangeParameter
 from ax.core.search_space import HierarchicalSearchSpace
-from ax.runners.synthetic import SyntheticRunner
+from ax.core.types import TParameterization
+
+
+@dataclass(kw_only=True)
+class Jenatton(ParamBasedTestProblem):
+    r"""Jenatton test function for hierarchical search spaces.
+
+    This function is taken from:
+
+    R. Jenatton, C. Archambeau, J. González, and M. Seeger. Bayesian
+    optimization with tree-structured dependencies. ICML 2017.
+    """
+
+    noise_std: Optional[float] = None
+    negate: bool = False
+    num_objectives: int = 1
+    optimal_value: float = 0.1
+    _is_constrained: bool = False
+
+    def evaluate_true(self, params: TParameterization) -> torch.Tensor:
+        # pyre-fixme: Incompatible parameter type [6]: In call
+        # `jenatton_test_function`, for 1st positional argument, expected
+        # `Optional[float]` but got `Union[None, bool, float, int, str]`.
+        value = jenatton_test_function(**params)
+        return torch.tensor(value)
 
 
 def get_jenatton_benchmark_problem(
     num_trials: int = 50,
     observe_noise_sd: bool = False,
+    noise_std: float = 0.0,
 ) -> BenchmarkProblem:
     search_space = HierarchicalSearchSpace(
         parameters=[
@@ -55,24 +89,28 @@ def get_jenatton_benchmark_problem(
             ),
         ]
     )
+    name = "Jenatton" + ("_observed_noise" if observe_noise_sd else "")
 
     optimization_config = OptimizationConfig(
         objective=Objective(
-            metric=JenattonMetric(observe_noise_sd=observe_noise_sd),
+            metric=BenchmarkMetric(
+                name=name, observe_noise_sd=observe_noise_sd, lower_is_better=True
+            ),
             minimize=True,
         )
     )
-
-    name = "Jenatton" + ("_observed_noise" if observe_noise_sd else "")
-
     return BenchmarkProblem(
         name=name,
         search_space=search_space,
         optimization_config=optimization_config,
-        runner=SyntheticRunner(),
+        runner=ParamBasedTestProblemRunner(
+            test_problem_class=Jenatton,
+            test_problem_kwargs={"noise_std": noise_std},
+            outcome_names=[name],
+        ),
         num_trials=num_trials,
-        is_noiseless=True,
+        is_noiseless=noise_std == 0.0,
         observe_noise_stds=observe_noise_sd,
         has_ground_truth=True,
-        optimal_value=0.1,
+        optimal_value=Jenatton.optimal_value,
     )
diff --git a/ax/benchmark/runners/base.py b/ax/benchmark/runners/base.py
@@ -6,12 +6,14 @@
 # pyre-strict
 
 from abc import ABC, abstractmethod
+from collections.abc import Iterable
 from math import sqrt
-from typing import Any, Optional, Union
+from typing import Any, Union
 
 import torch
 from ax.core.arm import Arm
-from ax.core.base_trial import BaseTrial
+
+from ax.core.base_trial import BaseTrial, TrialStatus
 from ax.core.batch_trial import BatchTrial
 from ax.core.runner import Runner
 from ax.core.trial import Trial
@@ -21,45 +23,41 @@
 
 
 class BenchmarkRunner(Runner, ABC):
-
-    @property
-    @abstractmethod
-    def outcome_names(self) -> list[str]:
-        """The names of the outcomes of the problem (in the order of the outcomes)."""
-        pass  # pragma: no cover
+    """
+    A Runner that produces both observed and ground-truth values.
+
+    Observed values equal ground-truth values plus noise, with the noise added
+    according to the standard deviations returned by `get_noise_stds()`.
+
+    This runner does require that every benchmark has a ground truth, which
+    won't necessarily be true for real-world problems. Such problems fall into
+    two categories:
+        - If they are deterministic, they can be used with this runner by
+          viewing them as noiseless problems where the observed values are the
+          ground truth. The observed values will be used for tracking the
+          progress of optimization.
+        - If they are not deterministc, they are not supported. It is not
+          conceptually clear how to benchmark such problems, so we decided to
+          not over-engineer for that before such a use case arrives.
+    """
+
+    outcome_names: list[str]
 
     def get_Y_true(self, arm: Arm) -> Tensor:
-        """Function returning the ground truth values for a given arm. The
-        synthetic noise is added as part of the Runner's `run()` method.
-        For problems that do not have a ground truth, the Runner must
-        implement the `get_Y_Ystd()` method instead."""
-        raise NotImplementedError(
-            "Must implement method `get_Y_true()` for Runner "
-            f"{self.__class__.__name__} as it does not implement a "
-            "`get_Y_Ystd()` method."
-        )
+        """
+        Return the ground truth values for a given arm.
+
+        Synthetic noise is added as part of the Runner's `run()` method.
+        """
+        ...
 
+    @abstractmethod
     def get_noise_stds(self) -> Union[None, float, dict[str, float]]:
-        """Function returning the standard errors for the synthetic noise
-        to be applied to the observed values. For problems that do not have
-        a ground truth, the Runner must implement the `get_Y_Ystd()` method
-        instead."""
-        raise NotImplementedError(
-            "Must implement method `get_Y_Ystd()` for Runner "
-            f"{self.__class__.__name__} as it does not implement a "
-            "`get_noise_stds()` method."
-        )
-
-    def get_Y_Ystd(self, arm: Arm) -> tuple[Tensor, Optional[Tensor]]:
-        """Function returning the observed values and their standard errors
-        for a given arm. This function is unused for problems that have a
-        ground truth (in this case `get_Y_true()` is used), and is required
-        for problems that do not have a ground truth."""
-        raise NotImplementedError(
-            "Must implement method `get_Y_Ystd()` for Runner "
-            f"{self.__class__.__name__} as it does not implement a "
-            "`get_Y_true()` method."
-        )
+        """
+        Return the standard errors for the synthetic noise to be applied to the
+        observed values.
+        """
+        ...
 
     def run(self, trial: BaseTrial) -> dict[str, Any]:
         """Run the trial by evaluating its parameterization(s).
@@ -110,33 +108,32 @@ def run(self, trial: BaseTrial) -> dict[str, Any]:
                 )
 
         for arm in trial.arms:
-            try:
-                # Case where we do have a ground truth
-                Y_true = self.get_Y_true(arm)
-                Ys_true[arm.name] = Y_true.tolist()
-                if noise_stds is None:
-                    # No noise, so just return the true outcome.
-                    Ystds[arm.name] = [0.0] * len(Y_true)
-                    Ys[arm.name] = Y_true.tolist()
-                else:
-                    # We can scale the noise std by the inverse of the relative sample
-                    # budget allocation to each arm. This works b/c (i) we assume that
-                    # observations per unit sample budget are i.i.d. and (ii) the
-                    # normalized weights sum to one.
-                    std = noise_stds_tsr.to(Y_true) / sqrt(nlzd_arm_weights[arm])
-                    Ystds[arm.name] = std.tolist()
-                    Ys[arm.name] = (Y_true + std * torch.randn_like(Y_true)).tolist()
-            except NotImplementedError:
-                # Case where we don't have a ground truth.
-                Y, Ystd = self.get_Y_Ystd(arm)
-                Ys[arm.name] = Y.tolist()
-                Ystds[arm.name] = Ystd.tolist() if Ystd is not None else None
+            # Case where we do have a ground truth
+            Y_true = self.get_Y_true(arm)
+            Ys_true[arm.name] = Y_true.tolist()
+            if noise_stds is None:
+                # No noise, so just return the true outcome.
+                Ystds[arm.name] = [0.0] * len(Y_true)
+                Ys[arm.name] = Y_true.tolist()
+            else:
+                # We can scale the noise std by the inverse of the relative sample
+                # budget allocation to each arm. This works b/c (i) we assume that
+                # observations per unit sample budget are i.i.d. and (ii) the
+                # normalized weights sum to one.
+                std = noise_stds_tsr.to(Y_true) / sqrt(nlzd_arm_weights[arm])
+                Ystds[arm.name] = std.tolist()
+                Ys[arm.name] = (Y_true + std * torch.randn_like(Y_true)).tolist()
 
         run_metadata = {
             "Ys": Ys,
             "Ystds": Ystds,
             "outcome_names": self.outcome_names,
+            "Ys_true": Ys_true,
         }
-        if Ys_true:  # only add key if we actually have a ground truth
-            run_metadata["Ys_true"] = Ys_true
         return run_metadata
+
+    # This will need to be udpated once asynchronous benchmarks are supported.
+    def poll_trial_status(
+        self, trials: Iterable[BaseTrial]
+    ) -> dict[TrialStatus, set[int]]:
+        return {TrialStatus.COMPLETED: {t.index for t in trials}}