From 9b8df90a3394a09bde4f65274e699fb07f2ca41f Mon Sep 17 00:00:00 2001
From: David Eriksson <deriksson@meta.com>
Date: Wed, 28 Dec 2022 19:07:19 -0800
Subject: [PATCH] Make it possible to infer noise (#1345)

Summary:
Pull Request resolved: https://github.com/facebook/Ax/pull/1345

We currently set the noise to 0 for all benchmark problems even though we often infer the noise in practice. This diff adds an `infer_noise` flag that we can use to control whether different problems should set the noise to a fixed value or `None`, where the latter will trigger inferring the noise in the model. I'm setting `infer_noise=True` by default for all problems.

Reviewed By: saitcakmak, Balandat

Differential Revision: D39413742

fbshipit-source-id: 70075940a43379bc4cf2b35366bb3e00bdd24947
---
 ax/benchmark/benchmark_problem.py             | 21 ++++--
 ax/benchmark/problems/hpo/pytorch_cnn.py      | 15 +++-
 ax/benchmark/problems/hpo/torchvision.py      | 12 ++-
 ax/benchmark/problems/registry.py             | 73 ++++++++++++++++++-
 ax/benchmark/problems/surrogate.py            |  9 ++-
 .../problems/synthetic/hss/jenatton.py        |  6 +-
 ax/metrics/botorch_test_problem.py            |  4 +-
 ax/metrics/jenatton.py                        |  4 +-
 ax/storage/json_store/decoder.py              |  4 +-
 ax/storage/json_store/encoders.py             |  1 +
 ax/utils/testing/benchmark_stubs.py           |  4 +-
 11 files changed, 125 insertions(+), 28 deletions(-)

diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
index 79cbbb87bec..3ab904b329b 100644
--- a/ax/benchmark/benchmark_problem.py
+++ b/ax/benchmark/benchmark_problem.py
@@ -43,6 +43,7 @@ class BenchmarkProblem(Base):
     optimization_config: OptimizationConfig
     runner: Runner
     num_trials: int
+    infer_noise: bool
     tracking_metrics: List[Metric] = field(default_factory=list)
 
     @classmethod
@@ -51,6 +52,7 @@ def from_botorch(
         test_problem_class: Type[BaseTestProblem],
         test_problem_kwargs: Dict[str, Any],
         num_trials: int,
+        infer_noise: bool = True,
     ) -> "BenchmarkProblem":
         """Create a BenchmarkProblem from a BoTorch BaseTestProblem using specialized
         Metrics and Runners. The test problem's result will be computed on the Runner
@@ -76,7 +78,7 @@ def from_botorch(
             objective=Objective(
                 metric=BotorchTestProblemMetric(
                     name=f"{test_problem.__class__.__name__}",
-                    noise_sd=(test_problem.noise_std or 0),
+                    noise_sd=None if infer_noise else (test_problem.noise_std or 0),
                 ),
                 minimize=True,
             )
@@ -91,6 +93,7 @@ def from_botorch(
                 test_problem_kwargs=test_problem_kwargs,
             ),
             num_trials=num_trials,
+            infer_noise=infer_noise,
         )
 
 
@@ -102,10 +105,8 @@ class SingleObjectiveBenchmarkProblem(BenchmarkProblem):
 
     optimal_value: float = field()
 
-    # pyre-fixme[2]: Parameter must be annotated.
-    def __init__(self, optimal_value: float, **kwargs) -> None:
+    def __init__(self, optimal_value: float, **kwargs: Any) -> None:
         super().__init__(**kwargs)
-
         object.__setattr__(self, "optimal_value", optimal_value)
 
     @classmethod
@@ -114,6 +115,7 @@ def from_botorch_synthetic(
         test_problem_class: Type[SyntheticTestFunction],
         test_problem_kwargs: Dict[str, Any],
         num_trials: int,
+        infer_noise: bool = True,
     ) -> "SingleObjectiveBenchmarkProblem":
         """Create a BenchmarkProblem from a BoTorch BaseTestProblem using specialized
         Metrics and Runners. The test problem's result will be computed on the Runner
@@ -127,6 +129,7 @@ def from_botorch_synthetic(
             test_problem_class=test_problem_class,
             test_problem_kwargs=test_problem_kwargs,
             num_trials=num_trials,
+            infer_noise=infer_noise,
         )
 
         return cls(
@@ -135,6 +138,7 @@ def from_botorch_synthetic(
             optimization_config=problem.optimization_config,
             runner=problem.runner,
             num_trials=num_trials,
+            infer_noise=infer_noise,
             optimal_value=test_problem.optimal_value,
         )
 
@@ -153,11 +157,9 @@ def __init__(
         self,
         maximum_hypervolume: float,
         reference_point: List[float],
-        # pyre-fixme[2]: Parameter must be annotated.
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         super().__init__(**kwargs)
-
         object.__setattr__(self, "maximum_hypervolume", maximum_hypervolume)
         object.__setattr__(self, "reference_point", reference_point)
 
@@ -167,6 +169,7 @@ def from_botorch_multi_objective(
         test_problem_class: Type[MultiObjectiveTestProblem],
         test_problem_kwargs: Dict[str, Any],
         num_trials: int,
+        infer_noise: bool = True,
     ) -> "MultiObjectiveBenchmarkProblem":
         """Create a BenchmarkProblem from a BoTorch BaseTestProblem using specialized
         Metrics and Runners. The test problem's result will be computed on the Runner
@@ -180,12 +183,13 @@ def from_botorch_multi_objective(
             test_problem_class=test_problem_class,
             test_problem_kwargs=test_problem_kwargs,
             num_trials=num_trials,
+            infer_noise=infer_noise,
         )
 
         metrics = [
             BotorchTestProblemMetric(
                 name=f"{test_problem.__class__.__name__}_{i}",
-                noise_sd=(test_problem.noise_std or 0),
+                noise_sd=None if infer_noise else (test_problem.noise_std or 0),
                 index=i,
             )
             for i in range(test_problem.num_objectives)
@@ -218,6 +222,7 @@ def from_botorch_multi_objective(
             optimization_config=optimization_config,
             runner=problem.runner,
             num_trials=num_trials,
+            infer_noise=infer_noise,
             maximum_hypervolume=test_problem.max_hv,
             reference_point=test_problem._ref_point,
         )
diff --git a/ax/benchmark/problems/hpo/pytorch_cnn.py b/ax/benchmark/problems/hpo/pytorch_cnn.py
index 4c808eded4c..251ae703345 100644
--- a/ax/benchmark/problems/hpo/pytorch_cnn.py
+++ b/ax/benchmark/problems/hpo/pytorch_cnn.py
@@ -36,7 +36,12 @@ def __eq__(self, other: Base) -> bool:
 
     @classmethod
     def from_datasets(
-        cls, name: str, num_trials: int, train_set: Dataset, test_set: Dataset
+        cls,
+        name: str,
+        num_trials: int,
+        train_set: Dataset,
+        test_set: Dataset,
+        infer_noise: bool = True,
     ) -> "PyTorchCNNBenchmarkProblem":
         optimal_value = 1
 
@@ -73,7 +78,7 @@ def from_datasets(
         )
         optimization_config = OptimizationConfig(
             objective=Objective(
-                metric=PyTorchCNNMetric(),
+                metric=PyTorchCNNMetric(infer_noise=infer_noise),
                 minimize=False,
             )
         )
@@ -87,12 +92,14 @@ def from_datasets(
             optimization_config=optimization_config,
             runner=runner,
             num_trials=num_trials,
+            infer_noise=infer_noise,
         )
 
 
 class PyTorchCNNMetric(Metric):
-    def __init__(self) -> None:
+    def __init__(self, infer_noise: bool = True) -> None:
         super().__init__(name="accuracy")
+        self.infer_noise = infer_noise
 
     def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult:
         try:
@@ -105,7 +112,7 @@ def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult
                     "arm_name": [name for name, _ in trial.arms_by_name.items()],
                     "metric_name": self.name,
                     "mean": accuracy,
-                    "sem": 0,
+                    "sem": None if self.infer_noise else 0,
                     "trial_index": trial.index,
                 }
             )
diff --git a/ax/benchmark/problems/hpo/torchvision.py b/ax/benchmark/problems/hpo/torchvision.py
index 0bc2c26242b..ffcb5a5782a 100644
--- a/ax/benchmark/problems/hpo/torchvision.py
+++ b/ax/benchmark/problems/hpo/torchvision.py
@@ -25,7 +25,10 @@
 class PyTorchCNNTorchvisionBenchmarkProblem(PyTorchCNNBenchmarkProblem):
     @classmethod
     def from_dataset_name(
-        cls, name: str, num_trials: int
+        cls,
+        name: str,
+        num_trials: int,
+        infer_noise: bool = True,
     ) -> "PyTorchCNNTorchvisionBenchmarkProblem":
         if name not in _REGISTRY:
             raise UserInputError(
@@ -49,7 +52,11 @@ def from_dataset_name(
         )
 
         problem = cls.from_datasets(
-            name=name, num_trials=num_trials, train_set=train_set, test_set=test_set
+            name=name,
+            num_trials=num_trials,
+            train_set=train_set,
+            test_set=test_set,
+            infer_noise=infer_noise,
         )
         runner = PyTorchCNNTorchvisionRunner(
             name=name, train_set=train_set, test_set=test_set
@@ -61,6 +68,7 @@ def from_dataset_name(
             optimization_config=problem.optimization_config,
             runner=runner,
             num_trials=num_trials,
+            infer_noise=infer_noise,
             optimal_value=problem.optimal_value,
         )
 
diff --git a/ax/benchmark/problems/registry.py b/ax/benchmark/problems/registry.py
index 1a9eedec336..52e0236b33f 100644
--- a/ax/benchmark/problems/registry.py
+++ b/ax/benchmark/problems/registry.py
@@ -31,6 +31,7 @@ class BenchmarkProblemRegistryEntry:
             "test_problem_class": Ackley,
             "test_problem_kwargs": {},
             "num_trials": 50,
+            "infer_noise": True,
         },
     ),
     "branin": BenchmarkProblemRegistryEntry(
@@ -39,6 +40,7 @@ class BenchmarkProblemRegistryEntry:
             "test_problem_class": Branin,
             "test_problem_kwargs": {},
             "num_trials": 30,
+            "infer_noise": True,
         },
     ),
     "branin_currin": BenchmarkProblemRegistryEntry(
@@ -47,12 +49,16 @@ class BenchmarkProblemRegistryEntry:
             "test_problem_class": BraninCurrin,
             "test_problem_kwargs": {},
             "num_trials": 30,
+            "infer_noise": True,
         },
     ),
     "branin_currin30": BenchmarkProblemRegistryEntry(
         factory_fn=lambda n: embed_higher_dimension(
             problem=MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
-                test_problem_class=BraninCurrin, test_problem_kwargs={}, num_trials=100
+                test_problem_class=BraninCurrin,
+                test_problem_kwargs={},
+                num_trials=100,
+                infer_noise=True,
             ),
             total_dimensionality=n,
         ),
@@ -64,6 +70,7 @@ class BenchmarkProblemRegistryEntry:
             "test_problem_class": Hartmann,
             "test_problem_kwargs": {"dim": 6},
             "num_trials": 50,
+            "infer_noise": True,
         },
     ),
     "hartmann30": BenchmarkProblemRegistryEntry(
@@ -72,6 +79,7 @@ class BenchmarkProblemRegistryEntry:
                 test_problem_class=Hartmann,
                 test_problem_kwargs={"dim": 6},
                 num_trials=100,
+                infer_noise=True,
             ),
             total_dimensionality=n,
         ),
@@ -79,15 +87,15 @@ class BenchmarkProblemRegistryEntry:
     ),
     "hpo_pytorch_cnn_MNIST": BenchmarkProblemRegistryEntry(
         factory_fn=PyTorchCNNTorchvisionBenchmarkProblem.from_dataset_name,
-        factory_kwargs={"name": "MNIST", "num_trials": 50},
+        factory_kwargs={"name": "MNIST", "num_trials": 50, "infer_noise": True},
     ),
     "hpo_pytorch_cnn_FashionMNIST": BenchmarkProblemRegistryEntry(
         factory_fn=PyTorchCNNTorchvisionBenchmarkProblem.from_dataset_name,
-        factory_kwargs={"name": "FashionMNIST", "num_trials": 50},
+        factory_kwargs={"name": "FashionMNIST", "num_trials": 50, "infer_noise": True},
     ),
     "jenatton": BenchmarkProblemRegistryEntry(
         factory_fn=get_jenatton_benchmark_problem,
-        factory_kwargs={"num_trials": 50},
+        factory_kwargs={"num_trials": 50, "infer_noise": True},
     ),
     "powell": BenchmarkProblemRegistryEntry(
         factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
@@ -95,8 +103,65 @@ class BenchmarkProblemRegistryEntry:
             "test_problem_class": Powell,
             "test_problem_kwargs": {},
             "num_trials": 50,
+            "infer_noise": True,
+        },
+    ),
+    # Problems without inferred noise
+    "branin_fixed_noise": BenchmarkProblemRegistryEntry(
+        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_kwargs={
+            "test_problem_class": Branin,
+            "test_problem_kwargs": {},
+            "num_trials": 30,
+            "infer_noise": False,
+        },
+    ),
+    "branin_currin_fixed_noise": BenchmarkProblemRegistryEntry(
+        factory_fn=MultiObjectiveBenchmarkProblem.from_botorch_multi_objective,
+        factory_kwargs={
+            "test_problem_class": BraninCurrin,
+            "test_problem_kwargs": {},
+            "num_trials": 30,
+            "infer_noise": False,
+        },
+    ),
+    "branin_currin30_fixed_noise": BenchmarkProblemRegistryEntry(
+        factory_fn=lambda n: embed_higher_dimension(
+            problem=MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
+                test_problem_class=BraninCurrin,
+                test_problem_kwargs={},
+                num_trials=100,
+                infer_noise=False,
+            ),
+            total_dimensionality=n,
+        ),
+        factory_kwargs={"n": 30},
+    ),
+    "hartmann6_fixed_noise": BenchmarkProblemRegistryEntry(
+        factory_fn=SingleObjectiveBenchmarkProblem.from_botorch_synthetic,
+        factory_kwargs={
+            "test_problem_class": Hartmann,
+            "test_problem_kwargs": {"dim": 6},
+            "num_trials": 50,
+            "infer_noise": False,
         },
     ),
+    "hartmann30_fixed_noise": BenchmarkProblemRegistryEntry(
+        factory_fn=lambda n: embed_higher_dimension(
+            problem=SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+                test_problem_class=Hartmann,
+                test_problem_kwargs={"dim": 6},
+                num_trials=100,
+                infer_noise=False,
+            ),
+            total_dimensionality=n,
+        ),
+        factory_kwargs={"n": 30},
+    ),
+    "jenatton_fixed_noise": BenchmarkProblemRegistryEntry(
+        factory_fn=get_jenatton_benchmark_problem,
+        factory_kwargs={"num_trials": 50, "infer_noise": False},
+    ),
 }
 
 
diff --git a/ax/benchmark/problems/surrogate.py b/ax/benchmark/problems/surrogate.py
index f616c76e81c..b7eeb029e8a 100644
--- a/ax/benchmark/problems/surrogate.py
+++ b/ax/benchmark/problems/surrogate.py
@@ -5,8 +5,6 @@
 
 from typing import Any, Dict, Iterable, List, Set
 
-import numpy as np
-
 import pandas as pd
 import torch
 from ax.benchmark.benchmark_problem import SingleObjectiveBenchmarkProblem
@@ -45,6 +43,7 @@ def from_surrogate(
         minimize: bool,
         optimal_value: float,
         num_trials: int,
+        infer_noise: bool = True,
     ) -> "SurrogateBenchmarkProblem":
         return SurrogateBenchmarkProblem(
             name=name,
@@ -63,12 +62,14 @@ def from_surrogate(
             ),
             optimal_value=optimal_value,
             num_trials=num_trials,
+            infer_noise=infer_noise,
         )
 
 
 class SurrogateMetric(Metric):
-    def __init__(self) -> None:
+    def __init__(self, infer_noise: bool = True) -> None:
         super().__init__(name="prediction")
+        self.infer_noise = infer_noise
 
     # pyre-fixme[2]: Parameter must be annotated.
     def fetch_trial_data(self, trial: BaseTrial, **kwargs) -> MetricFetchResult:
@@ -82,7 +83,7 @@ def fetch_trial_data(self, trial: BaseTrial, **kwargs) -> MetricFetchResult:
                     "arm_name": [name for name, _ in trial.arms_by_name.items()],
                     "metric_name": self.name,
                     "mean": prediction,
-                    "sem": np.nan,
+                    "sem": None if self.infer_noise else 0,
                     "trial_index": trial.index,
                 }
             )
diff --git a/ax/benchmark/problems/synthetic/hss/jenatton.py b/ax/benchmark/problems/synthetic/hss/jenatton.py
index fa255b79bc9..b75a1b6e4a8 100644
--- a/ax/benchmark/problems/synthetic/hss/jenatton.py
+++ b/ax/benchmark/problems/synthetic/hss/jenatton.py
@@ -14,6 +14,7 @@
 
 def get_jenatton_benchmark_problem(
     num_trials: int = 50,
+    infer_noise: bool = True,
 ) -> SingleObjectiveBenchmarkProblem:
     search_space = HierarchicalSearchSpace(
         parameters=[
@@ -54,7 +55,9 @@ def get_jenatton_benchmark_problem(
     )
 
     optimization_config = OptimizationConfig(
-        objective=Objective(metric=JenattonMetric(), minimize=True)
+        objective=Objective(
+            metric=JenattonMetric(infer_noise=infer_noise), minimize=True
+        )
     )
 
     return SingleObjectiveBenchmarkProblem(
@@ -63,5 +66,6 @@ def get_jenatton_benchmark_problem(
         optimization_config=optimization_config,
         runner=SyntheticRunner(),
         num_trials=num_trials,
+        infer_noise=infer_noise,
         optimal_value=0.1,
     )
diff --git a/ax/metrics/botorch_test_problem.py b/ax/metrics/botorch_test_problem.py
index d7d2743fed3..8a5c2cb253b 100644
--- a/ax/metrics/botorch_test_problem.py
+++ b/ax/metrics/botorch_test_problem.py
@@ -19,7 +19,9 @@ class BotorchTestProblemMetric(Metric):
     proper value from the resulting tensor given its index.
     """
 
-    def __init__(self, name: str, noise_sd: float, index: Optional[int] = None) -> None:
+    def __init__(
+        self, name: str, noise_sd: Optional[float] = None, index: Optional[int] = None
+    ) -> None:
         super().__init__(name=name)
         self.noise_sd = noise_sd
         self.index = index
diff --git a/ax/metrics/jenatton.py b/ax/metrics/jenatton.py
index be6d5e90776..8acf2939650 100644
--- a/ax/metrics/jenatton.py
+++ b/ax/metrics/jenatton.py
@@ -17,8 +17,10 @@ class JenattonMetric(Metric):
     def __init__(
         self,
         name: str = "jenatton",
+        infer_noise: bool = True,
     ) -> None:
         super().__init__(name=name)
+        self.infer_noise = infer_noise
 
     @staticmethod
     def _f(
@@ -52,7 +54,7 @@ def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult
                     "arm_name": [name for name, _ in trial.arms_by_name.items()],
                     "metric_name": self.name,
                     "mean": mean,
-                    "sem": 0,
+                    "sem": None if self.infer_noise else 0,
                     "trial_index": trial.index,
                 }
             )
diff --git a/ax/storage/json_store/decoder.py b/ax/storage/json_store/decoder.py
index cdd599a7a46..eb676984322 100644
--- a/ax/storage/json_store/decoder.py
+++ b/ax/storage/json_store/decoder.py
@@ -205,7 +205,9 @@ def object_from_json(
             )
         elif _class == TorchvisionBenchmarkProblem:
             return TorchvisionBenchmarkProblem.from_dataset_name(  # pragma: no cover
-                name=object_json["name"], num_trials=object_json["num_trials"]
+                name=object_json["name"],
+                num_trials=object_json["num_trials"],
+                infer_noise=object_json["infer_noise"],
             )
         elif issubclass(_class, SerializationMixin):
             return _class(**_class.deserialize_init_args(args=object_json))
diff --git a/ax/storage/json_store/encoders.py b/ax/storage/json_store/encoders.py
index dc993ab8b9d..ed4bc30822f 100644
--- a/ax/storage/json_store/encoders.py
+++ b/ax/storage/json_store/encoders.py
@@ -575,6 +575,7 @@ def pytorch_cnn_torchvision_benchmark_problem_to_dict(
         "__type": problem.__class__.__name__,
         "name": not_none(re.compile("(?<=::).*").search(problem.name)).group(),
         "num_trials": problem.num_trials,
+        "infer_noise": problem.infer_noise,
     }
 
 
diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py
index bb817b293d7..f7b3cb2e07e 100644
--- a/ax/utils/testing/benchmark_stubs.py
+++ b/ax/utils/testing/benchmark_stubs.py
@@ -19,7 +19,7 @@
 from ax.service.scheduler import SchedulerOptions
 from ax.utils.common.constants import Keys
 from botorch.acquisition.monte_carlo import qNoisyExpectedImprovement
-from botorch.models.gp_regression import FixedNoiseGP
+from botorch.models.gp_regression import SingleTaskGP
 from botorch.test_functions.multi_objective import BraninCurrin
 from botorch.test_functions.synthetic import Branin
 
@@ -66,7 +66,7 @@ def get_sobol_gpei_benchmark_method() -> BenchmarkMethod:
                     model=Models.BOTORCH_MODULAR,
                     num_trials=-1,
                     model_kwargs={
-                        "surrogate": Surrogate(FixedNoiseGP),
+                        "surrogate": Surrogate(SingleTaskGP),
                         "botorch_acqf_class": qNoisyExpectedImprovement,
                     },
                     model_gen_kwargs={