diff --git a/docs/source/getting_started/index.rst b/docs/source/getting_started/index.rst index bab1afe..ab635c4 100644 --- a/docs/source/getting_started/index.rst +++ b/docs/source/getting_started/index.rst @@ -198,8 +198,15 @@ model. Which shows that the emulator can predict the model with high accuracy. +Note that the `predict_values()` method also returns the estimated +variance of the values it returns. In cases where the variance is +not required, the method `predict_values_no_error()` can instead be +used. In some cases, this latter function can be much faster than the +one additionally returning the variances. + + .. image:: predict_vs_model.png This covers the most basic way to use SWIFT-Emulator and should give a good baseline for using some of the -additional features it offers. \ No newline at end of file +additional features it offers. diff --git a/docs/source/index.rst b/docs/source/index.rst index bda262f..429b7e4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -29,7 +29,7 @@ It includes functionality to: SWIFT that processed by VELOCIraptor and the swift-pipeline. Information about `SWIFT` can be found -`here `_, Information about +`here `_, Information about `VELOCIraptor` can be found `here `_ and tnformation about the `SWIFT-pipeline` can be found diff --git a/swiftemulator/emulators/base.py b/swiftemulator/emulators/base.py index 276f242..e27ffb7 100644 --- a/swiftemulator/emulators/base.py +++ b/swiftemulator/emulators/base.py @@ -80,7 +80,8 @@ def predict_values( self, independent: np.array, model_parameters: Dict[str, float] ) -> np.array: """ - Predict values from the trained emulator contained within this object. + Predict values and the associated variance from the trained emulator contained + within this object. Parameters ---------- @@ -124,6 +125,52 @@ def predict_values( raise NotImplementedError + def predict_values_no_error( + self, independent: np.array, model_parameters: Dict[str, float] + ) -> np.array: + """ + Predict values from the trained emulator contained within this object. + In cases where the error estimates are not required, this method is + significantly faster than predict_values(). + + Parameters + ---------- + + independent, np.array + Independent continuous variables to evaluate the emulator + at. If the emulator is discrete, these are only allowed to be + the discrete independent variables that the emulator was trained at + (disregarding the additional 'independent' model parameters, below.) + + model_parameters: Dict[str, float] + The point in model parameter space to create predicted + values at. + + Returns + ------- + + dependent_predictions, np.array + Array of predictions, if the emulator is a function f, these + are the predicted values of f(independent) evaluted at the position + of the input ``model_parameters``. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction, or when attempting to evaluate the model at + disallowed independent variables. + """ + + if self.emulator is None: + raise AttributeError( + "Please train the emulator with fit_model before attempting " + "to make predictions." + ) + + raise NotImplementedError + def interactive_plot( self, x: np.array, @@ -140,10 +187,10 @@ def interactive_plot( the parameter values will be used instead. If no reference data is passed to be overplotted then the plot will display a line which corresponds to the predictions for the initial parameter values. - + Parameters ---------- - + x: np.array Array of data for which the emulator should make predictions. @@ -164,6 +211,7 @@ def interactive_plot( Array containing y-values of reference data to plot. Must be the same shape as x_data """ + import matplotlib.pyplot as plt from matplotlib.widgets import Slider diff --git a/swiftemulator/emulators/gaussian_process.py b/swiftemulator/emulators/gaussian_process.py index 3db612e..b9a1e2c 100644 --- a/swiftemulator/emulators/gaussian_process.py +++ b/swiftemulator/emulators/gaussian_process.py @@ -231,7 +231,8 @@ def predict_values( self, independent: np.array, model_parameters: Dict[str, float] ) -> np.array: """ - Predict values from the trained emulator contained within this object. + Predict values and the associated variance from the trained emulator + contained within this object. Parameters ---------- @@ -253,7 +254,14 @@ def predict_values( of the input model_parameters. dependent_prediction_errors, np.array - Errors on the model predictions. + Errors (variances) on the model predictions. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction. """ if self.emulator is None: @@ -279,3 +287,62 @@ def predict_values( ) return model, errors + + def predict_values_no_error( + self, independent: np.array, model_parameters: Dict[str, float] + ) -> np.array: + """ + Predict values from the trained emulator contained within this object. + In cases where the error estimates are not required, this method is + significantly faster than predict_values(). + + Parameters + ---------- + + independent, np.array + Independent continuous variables to evaluate the emulator + at. + + model_parameters: Dict[str, float] + The point in model parameter space to create predicted + values at. + + Returns + ------- + + dependent_predictions, np.array + Array of predictions, if the emulator is a function f, these + are the predicted values of f(independent) evaluted at the position + of the input model_parameters. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction. + """ + + if self.emulator is None: + raise AttributeError( + "Please train the emulator with fit_model before attempting " + "to make predictions." + ) + + model_parameter_array = np.array( + [model_parameters[parameter] for parameter in self.parameter_order] + ) + + t = np.empty( + (len(independent), len(model_parameter_array) + 1), dtype=np.float32 + ) + + for line, value in enumerate(independent): + t[line][0] = value + t[line][1:] = model_parameter_array + + model = self.emulator.predict( + y=self.dependent_variables, t=t, return_cov=False, return_var=False + ) + + return model diff --git a/swiftemulator/emulators/gaussian_process_bins.py b/swiftemulator/emulators/gaussian_process_bins.py index 90380ef..6bfcd3a 100644 --- a/swiftemulator/emulators/gaussian_process_bins.py +++ b/swiftemulator/emulators/gaussian_process_bins.py @@ -269,7 +269,8 @@ def predict_values( self, independent: np.array, model_parameters: Dict[str, float] ) -> np.array: """ - Predict values from the trained emulator contained within this object. + Predict values and the associated variance from the trained emulator contained + within this object. Parameters ---------- @@ -365,3 +366,97 @@ def predict_values( np.array(dependent_predictions), np.array(dependent_prediction_errors), ) + + def predict_values_no_error( + self, independent: np.array, model_parameters: Dict[str, float] + ) -> np.array: + """ + Predict values from the trained emulator contained within this object. + In cases where the error estimates are not required, this method is + significantly faster than predict_values(). + + Parameters + ---------- + + independent, np.array + Independent continuous variables to evaluate the emulator + at. If the emulator is discrete, these are only allowed to be + the discrete independent variables that the emulator was trained at + (disregarding the additional 'independent' model parameters, below). + These can be found in this object in the ``bin_centers`` attribute. + + model_parameters: Dict[str, float] + The point in model parameter space to create predicted + values at. + + Returns + ------- + + dependent_predictions, np.array + Array of predictions, if the emulator is a function f, these + are the predicted values of f(independent) evaluted at the position + of the input ``model_parameters``. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction, or when attempting to evaluate the model at + disallowed independent variables. + """ + + if self.bin_gaussian_process is None: + raise AttributeError( + "Please train the emulator with fit_model before attempting " + "to make predictions." + ) + + # First calculate which indices in bin_centers (and hence + # bin_gaussian_processes) correspond to the requested ``independent`` + # variables. + + array_centers = np.array(self.bin_centers) + gpe_ordering = [] + + for requested_independent_variable in independent: + try: + gpe_ordering.append( + np.where(array_centers == requested_independent_variable)[0][0] + ) + except IndexError: + raise AttributeError( + f"Requested independent variable {independent} not valid, ", + f"this instance of GPE Bins is only valid at {array_centers}.", + ) + + model_parameter_array = np.array( + [model_parameters[parameter] for parameter in self.parameter_order] + ) + + # George must predict a value for more than one point at a time, so + # generate two fake points either side of the one of interest. + model_parameter_array_sample = np.append( + 0.98 * model_parameter_array, model_parameter_array + ) + model_parameter_array_sample = np.append( + model_parameter_array_sample, 1.02 * model_parameter_array + ).reshape(3, len(model_parameter_array)) + + dependent_predictions = [] + + for emulator_index in gpe_ordering: + gp = self.bin_gaussian_process[emulator_index] + model_values = self.bin_model_values[emulator_index] + + model = gp.predict( + y=model_values["dependent"], + t=model_parameter_array_sample, + return_cov=False, + return_var=False, + ) + + # Remove fake points required to ensure george returns a prediction. + dependent_predictions.append(model[1]) + + return np.array(dependent_predictions) diff --git a/swiftemulator/emulators/gaussian_process_mcmc.py b/swiftemulator/emulators/gaussian_process_mcmc.py index 82a3891..736e566 100644 --- a/swiftemulator/emulators/gaussian_process_mcmc.py +++ b/swiftemulator/emulators/gaussian_process_mcmc.py @@ -341,7 +341,8 @@ def predict_values( model_parameters: Dict[str, float], ) -> np.array: """ - Predict values from the trained emulator contained within this object. + Predict values and the associated variance from the trained emulator contained + within this object. Parameters ---------- @@ -364,6 +365,16 @@ def predict_values( dependent_prediction_errors, np.array Variance on the model predictions. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction. + + ValueError + When the number of subsamples is larger than the number of samples. """ if self.emulator is None: @@ -420,3 +431,75 @@ def predict_values( variance += hyper_variance return model, variance + + def predict_values_no_error( + self, + independent: np.array, + model_parameters: Dict[str, float], + ) -> np.array: + """ + Predict values from the trained emulator contained within this object. + In cases where the error estimates are not required, this method is + significantly faster than predict_values(). + + Parameters + ---------- + + independent, np.array + Independent continuous variables to evaluate the emulator + at. + + model_parameters: Dict[str, float] + The point in model parameter space to create predicted + values at. + + Returns + ------- + + dependent_predictions, np.array + Array of predictions, if the emulator is a function f, these + are the predicted values of f(independent) evaluted at the position + of the input model_parameters. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction. + + ValueError + When the number of subsamples is larger than the number of samples. + """ + + if self.emulator is None: + raise AttributeError( + "Please train the emulator with fit_model before attempting " + "to make predictions." + ) + + if ( + len(self.hyperparameter_samples[:, 0]) < self.samples_for_error + and self.use_hyperparameter_error + ): + raise ValueError( + "Number of subsamples must be less then the total number of samples" + ) + + model_parameter_array = np.array( + [model_parameters[parameter] for parameter in self.parameter_order] + ) + + t = np.empty( + (len(independent), len(model_parameter_array) + 1), dtype=np.float32 + ) + + for line, value in enumerate(independent): + t[line][0] = value + t[line][1:] = model_parameter_array + + model = self.emulator.predict( + y=self.dependent_variables, t=t, return_cov=False, return_var=False + ) + + return model diff --git a/swiftemulator/emulators/gaussian_process_one_dim.py b/swiftemulator/emulators/gaussian_process_one_dim.py index 439ddbf..1defae7 100644 --- a/swiftemulator/emulators/gaussian_process_one_dim.py +++ b/swiftemulator/emulators/gaussian_process_one_dim.py @@ -228,8 +228,8 @@ def grad_negative_log_likelihood(p): def predict_values(self, model_parameters: Dict[str, float]) -> np.array: """ - Predict a value from the trained emulator contained within this object. - returns the value at the input model parameters. + Predict a value and associated variance from the trained emulator contained within + this object. Returns the value at the input model parameters. Parameters ---------- @@ -247,7 +247,14 @@ def predict_values(self, model_parameters: Dict[str, float]) -> np.array: of the input model_parameters. dependent_prediction_error, float - Error on the model prediction. + Error (variance) on the model prediction. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction. """ if self.emulator is None: @@ -271,3 +278,53 @@ def predict_values(self, model_parameters: Dict[str, float]) -> np.array: ) return model[0], errors[0] + + def predict_values_no_error(self, model_parameters: Dict[str, float]) -> np.array: + """ + Predict a value and associated variance from the trained emulator contained within + this object. Returns the value at the input model parameters. + + Parameters + ---------- + + model_parameters: Dict[str, float] + The point in model parameter space to create predicted + values at. + + Returns + ------- + + dependent_prediction, float + Value of predictions, if the emulator is a function f, this + is the predicted value of f(independent) evaluted at the position + of the input model_parameters. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction. + """ + + if self.emulator is None: + raise AttributeError( + "Please train the emulator with fit_model before attempting " + "to make predictions." + ) + + model_parameter_array = np.array( + [model_parameters[parameter] for parameter in self.parameter_order] + ) + + # Create a fake duplicate as george always needs two points to predict + t = np.empty((2, len(model_parameter_array)), dtype=np.float32) + + t[0] = model_parameter_array + t[1] = model_parameter_array + + model = self.emulator.predict( + y=self.dependent_variables, t=t, return_cov=False, return_var=False + ) + + return model[0] diff --git a/swiftemulator/emulators/linear_model.py b/swiftemulator/emulators/linear_model.py index 671e409..eafe28b 100644 --- a/swiftemulator/emulators/linear_model.py +++ b/swiftemulator/emulators/linear_model.py @@ -166,7 +166,8 @@ def predict_values( self, independent: np.array, model_parameters: Dict[str, float] ) -> np.array: """ - Predict values from the trained emulator contained within this object. + Predict values and the associated variance from the trained emulator contained + within this object. Parameters ---------- @@ -190,6 +191,13 @@ def predict_values( dependent_prediction_errors, np.array Errors on the model predictions. For the linear model these are all zeroes, as the errors are unconstrained. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction. """ if self.emulator is None: @@ -213,3 +221,60 @@ def predict_values( model = self.emulator.predict(X=t) return model, np.zeros_like(model) + + def predict_values_no_error( + self, independent: np.array, model_parameters: Dict[str, float] + ) -> np.array: + """ + Predict values from the trained emulator contained within this object. + In cases where the error estimates are not required, this method is + significantly faster than predict_values(). + + Parameters + ---------- + + independent, np.array + Independent continuous variables to evaluate the emulator + at. + + model_parameters: Dict[str, float] + The point in model parameter space to create predicted + values at. + + Returns + ------- + + dependent_predictions, np.array + Array of predictions, if the emulator is a function f, these + are the predicted values of f(independent) evaluted at the position + of the input model_parameters. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction. + """ + + if self.emulator is None: + raise AttributeError( + "Please train the emulator with fit_model before attempting " + "to make predictions." + ) + + model_parameter_array = np.array( + [model_parameters[parameter] for parameter in self.parameter_order] + ) + + t = np.empty( + (len(independent), len(model_parameter_array) + 1), dtype=np.float32 + ) + + for line, value in enumerate(independent): + t[line][0] = value + t[line][1:] = model_parameter_array + + model = self.emulator.predict(X=t) + + return model diff --git a/swiftemulator/emulators/multi_gaussian_process.py b/swiftemulator/emulators/multi_gaussian_process.py index 3802a4f..1a93d2c 100644 --- a/swiftemulator/emulators/multi_gaussian_process.py +++ b/swiftemulator/emulators/multi_gaussian_process.py @@ -184,7 +184,8 @@ def predict_values( model_parameters: Dict[str, float], ) -> np.array: """ - Predict values from the trained emulator contained within this object. + Predict values and the associated variance from the trained emulator contained + within this object. Parameters ---------- @@ -208,6 +209,14 @@ def predict_values( dependent_prediction_errors, np.array Errors on the model predictions. + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction, or when attempting to evaluate the model at + disallowed independent variables. + Notes ----- @@ -320,3 +329,140 @@ def predict_values( ] return dependent_predictions, dependent_prediction_errors + + def predict_values_no_error( + self, + independent: np.array, + model_parameters: Dict[str, float], + ) -> np.array: + """ + Predict values from the trained emulator contained within this object. + In cases where the error estimates are not required, this method is + significantly faster than predict_values(). + + Parameters + ---------- + + independent, np.array + Independent continuous variables to evaluate the emulator + at. + + model_parameters: Dict[str, float] + The point in model parameter space to create predicted + values at. + + Returns + ------- + + dependent_predictions, np.array + Array of predictions, if the emulator is a function f, these + are the predicted values of f(independent) evaluted at the position + of the input model_parameters. + + Raises + ------ + + AttributeError + When the model has not been trained before trying to make a + prediction, or when attempting to evaluate the model at + disallowed independent variables. + + Notes + ----- + + This will use the originally defined regions and overlaps will + be calculated by using the weighted linear sum corresponding + to the independent variable's distance to the adjacent boundary. + The errors use a weighted square sum. + """ + + if self.emulators is None: + raise AttributeError( + "Please train the emulator with fit_model before attempting " + "to make predictions." + ) + + # First, do individual predictions. + + inputs = [] + output = [] + + for index, (low, high) in enumerate(self.independent_regions): + mask = np.logical_and( + ( + independent > low + if low is not None + else np.ones_like(independent).astype(bool) + ), + ( + independent < high + if high is not None + else np.ones_like(independent).astype(bool) + ), + ) + + predicted = self.emulators[index].predict_values_no_error( + independent=independent[mask], model_parameters=model_parameters + ) + + inputs.append(list(independent[mask])) + output.append(list(predicted)) + + # Now that we've predicted it all, we need to explicitly deal + # with overlap and non-overlap. + + overlap_ranges = {} + + for index in range(1, len(self.independent_regions)): + left = self.independent_regions[index][0] + right = self.independent_regions[index - 1][1] + + if right is None or left is None: + continue + elif right > left: + overlap_ranges[index - 1] = [left, right] + + dependent_predictions = np.empty_like(independent) + + current_emulator = 0 + + for index, x in enumerate(independent): + if x not in inputs[current_emulator]: + current_emulator += 1 + + # Is it in the prior overlap? + low, high = overlap_ranges.get(current_emulator - 1, [float("inf")] * 2) + + if low <= x <= high: + # We have already counted this independent variable. + continue + + # Is it in this emulator's overlap? + low, high = overlap_ranges.get(current_emulator, [float("inf")] * 2) + + if low <= x <= high: + dependent_index_left = inputs[current_emulator].index(x) + dependent_index_right = inputs[current_emulator + 1].index(x) + + ind_left = inputs[current_emulator][dependent_index_left] + ind_right = inputs[current_emulator + 1][dependent_index_right] + + left_weight = (high - x) / (high - low) + right_weight = (x - low) / (high - low) + + dependent_left = output[current_emulator][dependent_index_left] + dependent_right = output[current_emulator + 1][dependent_index_right] + + dependent_predictions[index] = ( + dependent_left * left_weight + dependent_right * right_weight + ) + dependent_prediction_errors[index] = math.sqrt( + left_weight * dependent_error_left * dependent_error_left + + right_weight * dependent_error_right * dependent_error_right + ) + else: + # Easy! + dependent_index = inputs[current_emulator].index(x) + dependent_predictions[index] = output[current_emulator][dependent_index] + + return dependent_predictions diff --git a/tests/test_emulator_bins.py b/tests/test_emulator_bins.py index a138fc4..76b03cf 100644 --- a/tests/test_emulator_bins.py +++ b/tests/test_emulator_bins.py @@ -66,3 +66,4 @@ def test_basic_emulator_generator(): ) gpe.predict_values([0, 7], {"x": 1.5, "y": 1.0}) + gpe.predict_values_no_error([0, 7], {"x": 1.5, "y": 1.0}) diff --git a/tests/test_emulator_multiple.py b/tests/test_emulator_multiple.py index 1163393..d775235 100644 --- a/tests/test_emulator_multiple.py +++ b/tests/test_emulator_multiple.py @@ -122,3 +122,4 @@ def test_basic_emulator_generator_multiple(): ) gpe.predict_values(np.array([0.2, 0.9, 9.9, 5.0]), {"x": 0.5, "y": 1}) + gpe.predict_values_no_error(np.array([0.2, 0.9, 9.9, 5.0]), {"x": 0.5, "y": 1})