diff --git a/CHANGELOG.md b/CHANGELOG.md index 078596306..9294fb5a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - - Fix native networks to work with generated future equals to horizon ([#936](https://github.com/tinkoff-ai/etna/pull/936)) -- +- Fix `SARIMAXModel` to work with exogenous data on `pmdarima>=2.0` ([#940](https://github.com/tinkoff-ai/etna/pull/940)) - ## [1.12.0] - 2022-09-05 diff --git a/etna/libs/pmdarima_utils/arima.py b/etna/libs/pmdarima_utils/arima.py index ae7bdb3be..6149e319c 100644 --- a/etna/libs/pmdarima_utils/arima.py +++ b/etna/libs/pmdarima_utils/arima.py @@ -21,16 +21,51 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -# Note: Copied from pmdarima package (https://github.com/blue-yonder/tsfresh/blob/https://github.com/alkaline-ml/pmdarima/blob/v1.8.5/pmdarima/arima/arima.py) import numpy as np import numpy.polynomial.polynomial as np_polynomial -from sklearn.utils.validation import check_array from pmdarima.utils import diff from pmdarima.utils import diff_inv -from pmdarima.utils import check_endog +from sklearn.utils.validation import check_array, column_or_1d +DTYPE = np.float64 + +# Note: Copied from pmdarima package (https://github.com/alkaline-ml/pmdarima/blob/v1.8.5/pmdarima/utils/array.py) +def check_endog(y, dtype=DTYPE, copy=True, force_all_finite=False): + """Wrapper for ``check_array`` and ``column_or_1d`` from sklearn + + Parameters + ---------- + y : array-like, shape=(n_samples,) + The 1d endogenous array. + + dtype : string, type or None (default=np.float64) + Data type of result. If None, the dtype of the input is preserved. + If "numeric", dtype is preserved unless array.dtype is object. + + copy : bool, optional (default=False) + Whether a forced copy will be triggered. If copy=False, a copy might + still be triggered by a conversion. + + force_all_finite : bool, optional (default=False) + Whether to raise an error on np.inf and np.nan in an array. The + possibilities are: + + - True: Force all values of array to be finite. + - False: accept both np.inf and np.nan in array. + + Returns + ------- + y : np.ndarray, shape=(n_samples,) + A 1d numpy ndarray + """ + return column_or_1d( + check_array(y, ensure_2d=False, force_all_finite=force_all_finite, + copy=copy, dtype=dtype)) # type: np.ndarray + + +# Note: Copied from pmdarima package (https://github.com/alkaline-ml/pmdarima/blob/v1.8.5/pmdarima/arima/arima.py) def ARMAtoMA(ar, ma, max_deg): r""" Convert ARMA coefficients to infinite MA coefficients. @@ -92,7 +127,7 @@ def ARMAtoMA(ar, ma, max_deg): return ema -# Note: Originally copied from pmdarima package (https://github.com/blue-yonder/tsfresh/blob/https://github.com/alkaline-ml/pmdarima/blob/v1.8.5/pmdarima/arima/arima.py) +# Note: Copied from pmdarima package (https://github.com/alkaline-ml/pmdarima/blob/v1.8.5/pmdarima/arima/arima.py) def seasonal_prediction_with_confidence(arima_res, start, end, diff --git a/etna/libs/pmdarima_utils/arima.pyi b/etna/libs/pmdarima_utils/arima.pyi index 22309ef77..0b2a802c8 100644 --- a/etna/libs/pmdarima_utils/arima.pyi +++ b/etna/libs/pmdarima_utils/arima.pyi @@ -2,6 +2,13 @@ from typing import Any from numpy import ndarray +def check_endog( + y: Any, + dtype: Any, + copy: bool, + force_all_finite: bool, + ) -> ndarray: ... + def ARMAtoMA( ar: ndarray, ma: ndarray, diff --git a/etna/models/autoarima.py b/etna/models/autoarima.py index bb6641de4..bef5691fb 100644 --- a/etna/models/autoarima.py +++ b/etna/models/autoarima.py @@ -44,7 +44,8 @@ def __init__( super().__init__() def _get_fit_results(self, endog: pd.Series, exog: pd.DataFrame) -> SARIMAXResultsWrapper: - model = pm.auto_arima(endog, X=exog, **self.kwargs) + endog_np = endog.values + model = pm.auto_arima(endog_np, X=exog, **self.kwargs) return model.arima_res_ diff --git a/etna/models/sarimax.py b/etna/models/sarimax.py index 1b5a9d880..be8752d63 100644 --- a/etna/models/sarimax.py +++ b/etna/models/sarimax.py @@ -324,7 +324,7 @@ def __init__( super().__init__() def _get_fit_results(self, endog: pd.Series, exog: pd.DataFrame): - # make it a numpy array for forgetting about indices, it is necessary for _seasonal_prediction_with_confidence + # make it a numpy array for forgetting about indices, it is necessary for seasonal_prediction_with_confidence endog_np = endog.values model = SARIMAX( endog=endog_np,