Skip to content

Commit

Permalink
Merge branch 'main' into feature/docs_faq_api_call
Browse files Browse the repository at this point in the history
  • Loading branch information
elephaint authored Aug 30, 2024
2 parents 85546d3 + f44938a commit 955df2e
Show file tree
Hide file tree
Showing 4 changed files with 502 additions and 174 deletions.
105 changes: 72 additions & 33 deletions nbs/docs/capabilities/forecast/02_exogenous_variables.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,27 +48,16 @@
"source": [
"# Add exogenous variables\n",
"\n",
"To model with exogenous features, include them in the DataFrame you pass to the `forecast` method. Provide the future values of these exogenous features over the forecast horizon using the `X_df` parameter."
"To model with exogenous features, you have two options:\n",
"1. Use historical exogenous variables: include these variables in the DataFrame you pass to the `forecast` method\n",
"2. Use future exogenous variables: include these variables in the DataFrame you pass to the `forecast` method and provide the future values of these exogenous features over the forecast horizon using the `X_df` parameter."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Nixtla/nixtla/blob/main/nbs/docs/capabilities/forecast/02_exogenous_variables.ipynb)"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"#| echo: false\n",
"if not IN_COLAB:\n",
Expand Down Expand Up @@ -120,25 +109,47 @@
" nixtla_client = NixtlaClient()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Historical exogenous variables"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Read data\n",
"df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-with-ex-vars.csv')\n",
"\n",
"# Forecast\n",
"forecast_df = nixtla_client.forecast(\n",
" df=df, \n",
" h=24,\n",
" id_col='unique_id',\n",
" target_col='y',\n",
" time_col='ds'\n",
")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Future exogenous variables"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:nixtla.nixtla_client:Validating inputs...\n",
"INFO:nixtla.nixtla_client:Preprocessing dataframes...\n",
"INFO:nixtla.nixtla_client:Inferred freq: H\n",
"INFO:nixtla.nixtla_client:Using the following exogenous variables: Exogenous1, Exogenous2, day_0, day_1, day_2, day_3, day_4, day_5, day_6\n",
"INFO:nixtla.nixtla_client:Calling Forecast Endpoint...\n"
]
}
],
"outputs": [],
"source": [
"# Read data\n",
"import numpy as np\n",
"df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-with-ex-vars.csv')\n",
"\n",
"# Load the future value of exogenous variables over the forecast horizon\n",
Expand All @@ -155,6 +166,39 @@
")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Historical and future exogenous variables"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Read data\n",
"df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-with-ex-vars.csv')\n",
"\n",
"# Load the future value of exogenous variables over the forecast horizon\n",
"future_ex_vars_df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short-future-ex-vars.csv')\n",
"\n",
"# We will only use 2 exogenous of future_ex_vars_df. The columns not included in future_ex_vars_df will be considered as historical exogenous variables, as the future values have not been supplied.\n",
"future_ex_vars_df = future_ex_vars_df[[\"unique_id\", \"ds\", \"Exogenous1\", \"Exogenous2\"]]\n",
"\n",
"# Forecast\n",
"forecast_df = nixtla_client.forecast(\n",
" df=df, \n",
" X_df=future_ex_vars_df, \n",
" h=24,\n",
" id_col='unique_id',\n",
" target_col='y',\n",
" time_col='ds'\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -176,11 +220,6 @@
"source": [
"For more details on using exogenous features with TimeGPT, read our in-depth tutorials on [Exogenous variables](https://docs.nixtla.io/docs/tutorials-exogenous_variables) and on [Categorical variables](https://docs.nixtla.io/docs/tutorials-categorical_variables)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
Expand Down
395 changes: 338 additions & 57 deletions nbs/docs/tutorials/01_exogenous_variables.ipynb

Large diffs are not rendered by default.

96 changes: 44 additions & 52 deletions nbs/nixtla_client.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -419,33 +419,28 @@
" time_col: str,\n",
" target_col: str,\n",
") -> Tuple[DFType, Optional[DFType]]:\n",
" exogs_df = [c for c in df.columns if c not in (id_col, time_col, target_col)]\n",
"\n",
" exog_list = [c for c in df.columns if c not in (id_col, time_col, target_col)]\n",
"\n",
" if X_df is None:\n",
" if exogs_df:\n",
" warnings.warn(\n",
" f'`df` contains the following exogenous features: {exogs_df}, '\n",
" 'but `X_df` was not provided. They will be ignored.'\n",
" )\n",
" df = df[[id_col, time_col, target_col]]\n",
" df = df[[id_col, time_col, target_col, *exog_list]]\n",
" return df, None\n",
" exogs_X = [c for c in X_df.columns if c not in (id_col, time_col)]\n",
" missing_df = set(exogs_X) - set(exogs_df)\n",
" if missing_df:\n",
"\n",
" futr_exog_list = [c for c in X_df.columns if c not in (id_col, time_col)]\n",
" hist_exog_list = list(set(exog_list) - set(futr_exog_list))\n",
"\n",
" # Capture case where future exogenous are provided in X_df that are not in df\n",
" missing_futr = set(futr_exog_list) - set(exog_list)\n",
" if missing_futr:\n",
" raise ValueError(\n",
" 'The following exogenous features are present in `X_df` '\n",
" f'but not in `df`: {missing_df}.'\n",
" \"The following exogenous features are present in `X_df` \"\n",
" f\"but not in `df`: {missing_futr}.\"\n",
" )\n",
" missing_X_df = set(exogs_df) - set(exogs_X)\n",
" if missing_X_df:\n",
" warnings.warn(\n",
" 'The following exogenous features are present in `df` '\n",
" f'but not in `X_df`: {missing_X_df}. They will be ignored'\n",
" )\n",
" exogs_df = [c for c in exogs_df if c in exogs_X]\n",
" df = df[[id_col, time_col, target_col, *exogs_df]]\n",
" if exogs_df != exogs_X:\n",
" # rearrange columns\n",
" X_df = X_df[[id_col, time_col, *exogs_df]]\n",
"\n",
" # Make sure df and X_df are in right order\n",
" df = df[[id_col, time_col, target_col, *futr_exog_list, *hist_exog_list]]\n",
" X_df = X_df[[id_col, time_col, *futr_exog_list]]\n",
"\n",
" return df, X_df\n",
"\n",
"def _validate_input_size(\n",
Expand Down Expand Up @@ -530,10 +525,12 @@
" df=X_df, id_col=id_col, time_col=time_col, target_col=None,\n",
" )\n",
" X_future = processed_X.data.T\n",
" futr_cols = [c for c in X_df.columns if c not in (id_col, time_col)]\n",
" else:\n",
" X_future = None\n",
" futr_cols = None\n",
" x_cols = [c for c in df.columns if c not in (id_col, time_col, target_col)]\n",
" return processed, X_future, x_cols\n",
" return processed, X_future, x_cols, futr_cols\n",
"\n",
"def _forecast_payload_to_in_sample(payload):\n",
" in_sample_payload = {\n",
Expand Down Expand Up @@ -681,7 +678,16 @@
" def ensure_contiguous_arrays(d: Dict[str, Any]) -> None:\n",
" for k, v in d.items():\n",
" if isinstance(v, np.ndarray):\n",
" d[k] = np.ascontiguousarray(v)\n",
" if np.issubdtype(v.dtype, np.floating):\n",
" v_cont = np.ascontiguousarray(v, dtype=np.float32)\n",
" d[k] = np.nan_to_num(v_cont, \n",
" nan=np.nan, \n",
" posinf=np.finfo(np.float32).max, \n",
" neginf=np.finfo(np.float32).min,\n",
" copy=False)\n",
" else:\n",
" d[k] = np.ascontiguousarray(v)\n",
"\n",
" elif isinstance(v, dict):\n",
" ensure_contiguous_arrays(v) \n",
"\n",
Expand Down Expand Up @@ -737,7 +743,7 @@
" offsets = [0] + [sum(p['series']['sizes']) for p in payloads[:-1]]\n",
" resp['idxs'] = np.hstack(\n",
" [\n",
" np.array(res['idxs']) + offset\n",
" np.array(res['idxs'], dtype=np.int64) + offset\n",
" for res, offset in zip(results, offsets)\n",
" ]\n",
" )\n",
Expand Down Expand Up @@ -980,7 +986,7 @@
" )\n",
"\n",
" logger.info('Preprocessing dataframes...')\n",
" processed, X_future, x_cols = _preprocess(\n",
" processed, X_future, x_cols, futr_cols = _preprocess(\n",
" df=df,\n",
" X_df=X_df,\n",
" h=h,\n",
Expand All @@ -1003,7 +1009,13 @@
" processed = _tail(processed, new_input_size)\n",
" if processed.data.shape[1] > 1:\n",
" X = processed.data[:, 1:].T\n",
" logger.info(f'Using the following exogenous features: {x_cols}')\n",
" if futr_cols is not None:\n",
" hist_exog_set= set(x_cols) - set(futr_cols)\n",
" if hist_exog_set:\n",
" logger.info(f'Using historical exogenous features: {list(hist_exog_set)}')\n",
" logger.info(f'Using future exogenous features: {futr_cols}')\n",
" else:\n",
" logger.info(f'Using historical exogenous features: {x_cols}')\n",
" else:\n",
" X = None\n",
"\n",
Expand Down Expand Up @@ -1173,7 +1185,7 @@
" model_input_size, model_horizon = self._get_model_params(model, standard_freq)\n",
"\n",
" logger.info('Preprocessing dataframes...')\n",
" processed, _, x_cols = _preprocess(\n",
" processed, _, x_cols, _ = _preprocess(\n",
" df=df,\n",
" X_df=None,\n",
" h=0,\n",
Expand Down Expand Up @@ -1358,7 +1370,7 @@
" step_size = h\n",
"\n",
" logger.info('Preprocessing dataframes...')\n",
" processed, _, x_cols = _preprocess(\n",
" processed, _, x_cols, _ = _preprocess(\n",
" df=df,\n",
" X_df=None,\n",
" h=0,\n",
Expand Down Expand Up @@ -1428,8 +1440,8 @@
" resp = self._make_partitioned_requests(client, 'v2/cross_validation', payloads)\n",
"\n",
" # assemble result\n",
" idxs = np.array(resp['idxs'])\n",
" sizes = np.array(resp['sizes'])\n",
" idxs = np.array(resp['idxs'], dtype=np.int64)\n",
" sizes = np.array(resp['sizes'], dtype=np.int64)\n",
" window_starts = np.arange(0, sizes.sum(), h)\n",
" cutoff_idxs = np.repeat(idxs[window_starts] - 1, h)\n",
" out = type(df)(\n",
Expand Down Expand Up @@ -2462,26 +2474,6 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"# test for showing the correct warning if X_df is missing but df has exogenous columns\n",
"df = generate_series(n_series=2, min_length=5, max_length=20, n_static_features=3)\n",
"missing_exogenous = df.columns.drop(['unique_id', 'ds', 'y']).tolist()\n",
"expected_warning = (\n",
" f'`df` contains the following exogenous features: {missing_exogenous}, '\n",
" 'but `X_df` was not provided. They will be ignored.' \n",
")\n",
"\n",
"with warnings.catch_warnings(record=True) as w:\n",
" forecasts = nixtla_client.forecast(df, h=5)\n",
" assert any(expected_warning in str(warning.message) for warning in w)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
Loading

0 comments on commit 955df2e

Please sign in to comment.