From 1ee263683f7c3e5e7b32e3f5ec87d3cbb15605bf Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Sat, 4 Apr 2020 10:51:19 +0200 Subject: [PATCH 1/7] fix bug in trendline in the case of missing values --- packages/python/plotly/plotly/express/_core.py | 10 +++++++--- .../plotly/tests/test_core/test_px/test_trendline.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 613920d05f..9a9ea82dd0 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -241,18 +241,22 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): sorted_trace_data = trace_data.sort_values(by=args["x"]) y = sorted_trace_data[args["y"]] x = sorted_trace_data[args["x"]] - trace_patch["x"] = x + # trace_patch["x"] = x if x.dtype.type == np.datetime64: x = x.astype(int) / 10 ** 9 # convert to unix epoch seconds if attr_value == "lowess": - trendline = sm.nonparametric.lowess(y, x) + # missing ='drop' is the default value for lowess but not for OLS (None) + # we force it here in case statsmodels change their defaults + trendline = sm.nonparametric.lowess(y, x, missing='drop') + trace_patch["x"] = trendline[:, 0] trace_patch["y"] = trendline[:, 1] hover_header = "LOWESS trendline

" elif attr_value == "ols": - fit_results = sm.OLS(y.values, sm.add_constant(x.values)).fit() + fit_results = sm.OLS(y.values, sm.add_constant(x.values), missing='drop').fit() trace_patch["y"] = fit_results.predict() + trace_patch["x"] = x[np.logical_not(np.logical_or(np.isnan(y), np.isnan(x)))] hover_header = "OLS trendline
" hover_header += "%s = %g * %s + %g
" % ( args["y"], diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py new file mode 100644 index 0000000000..5760735fff --- /dev/null +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py @@ -0,0 +1,12 @@ +import plotly.express as px +import numpy as np + + +def test_trendline_nan_values(): + df = px.data.gapminder().query("continent == 'Oceania'") + start_date = 1970 + df['pop'][df['year'] < start_date] = np.nan + fig = px.scatter(df, x='year', y='pop', color='country', trendline='ols') + country_numbers = len(fig['data']) // 2 + for trendline in fig['data'][1::2]: + assert trendline.x[0] >= start_date From 94557b2a03ae8ccf8fc66c14b432f1d2552dcdcc Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Sat, 4 Apr 2020 10:54:26 +0200 Subject: [PATCH 2/7] paint it black --- packages/python/plotly/plotly/express/_core.py | 10 +++++++--- .../plotly/tests/test_core/test_px/test_trendline.py | 8 ++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 9a9ea82dd0..9f0afd3c5d 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -249,14 +249,18 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): if attr_value == "lowess": # missing ='drop' is the default value for lowess but not for OLS (None) # we force it here in case statsmodels change their defaults - trendline = sm.nonparametric.lowess(y, x, missing='drop') + trendline = sm.nonparametric.lowess(y, x, missing="drop") trace_patch["x"] = trendline[:, 0] trace_patch["y"] = trendline[:, 1] hover_header = "LOWESS trendline

" elif attr_value == "ols": - fit_results = sm.OLS(y.values, sm.add_constant(x.values), missing='drop').fit() + fit_results = sm.OLS( + y.values, sm.add_constant(x.values), missing="drop" + ).fit() trace_patch["y"] = fit_results.predict() - trace_patch["x"] = x[np.logical_not(np.logical_or(np.isnan(y), np.isnan(x)))] + trace_patch["x"] = x[ + np.logical_not(np.logical_or(np.isnan(y), np.isnan(x))) + ] hover_header = "OLS trendline
" hover_header += "%s = %g * %s + %g
" % ( args["y"], diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py index 5760735fff..5204390856 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py @@ -5,8 +5,8 @@ def test_trendline_nan_values(): df = px.data.gapminder().query("continent == 'Oceania'") start_date = 1970 - df['pop'][df['year'] < start_date] = np.nan - fig = px.scatter(df, x='year', y='pop', color='country', trendline='ols') - country_numbers = len(fig['data']) // 2 - for trendline in fig['data'][1::2]: + df["pop"][df["year"] < start_date] = np.nan + fig = px.scatter(df, x="year", y="pop", color="country", trendline="ols") + country_numbers = len(fig["data"]) // 2 + for trendline in fig["data"][1::2]: assert trendline.x[0] >= start_date From 67467043e9f198f56d00875b63a5c99e63456d8a Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Sat, 4 Apr 2020 11:10:29 +0200 Subject: [PATCH 3/7] added statsmodels to dependencies for CI --- .circleci/create_conda_optional_env.sh | 2 +- packages/python/plotly/tox.ini | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/create_conda_optional_env.sh b/.circleci/create_conda_optional_env.sh index c27cd43db3..17f3c999af 100755 --- a/.circleci/create_conda_optional_env.sh +++ b/.circleci/create_conda_optional_env.sh @@ -16,7 +16,7 @@ if [ ! -d $HOME/miniconda/envs/circle_optional ]; then # Create environment # PYTHON_VERSION=2.7 or 3.5 $HOME/miniconda/bin/conda create -n circle_optional --yes python=$PYTHON_VERSION \ -requests nbformat six retrying psutil pandas decorator pytest mock nose poppler xarray scikit-image ipython jupyter ipykernel ipywidgets +requests nbformat six retrying psutil pandas decorator pytest mock nose poppler xarray scikit-image ipython jupyter ipykernel ipywidgets statsmodels # Install orca into environment $HOME/miniconda/bin/conda install --yes -n circle_optional -c plotly plotly-orca==1.3.1 diff --git a/packages/python/plotly/tox.ini b/packages/python/plotly/tox.ini index 6297713789..12f9089a14 100644 --- a/packages/python/plotly/tox.ini +++ b/packages/python/plotly/tox.ini @@ -59,6 +59,7 @@ deps= pytest==3.5.1 pandas==0.24.2 xarray==0.10.9 + statsmodels==0.11.1 backports.tempfile==1.0 optional: --editable=file:///{toxinidir}/../plotly-geo optional: numpy==1.16.5 From 8dec372e8e33b3c5e2c917c042bf769cba7b693c Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Sat, 4 Apr 2020 11:22:04 +0200 Subject: [PATCH 4/7] version for py2 --- packages/python/plotly/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/python/plotly/tox.ini b/packages/python/plotly/tox.ini index 12f9089a14..99e29f470b 100644 --- a/packages/python/plotly/tox.ini +++ b/packages/python/plotly/tox.ini @@ -59,7 +59,7 @@ deps= pytest==3.5.1 pandas==0.24.2 xarray==0.10.9 - statsmodels==0.11.1 + statsmodels==0.10.2 backports.tempfile==1.0 optional: --editable=file:///{toxinidir}/../plotly-geo optional: numpy==1.16.5 From dcc879ab364d88913cd6ade3e57a5352a7b41ea9 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Mon, 27 Apr 2020 17:44:16 +0200 Subject: [PATCH 5/7] Update packages/python/plotly/plotly/express/_core.py Co-Authored-By: Nicolas Kruchten --- packages/python/plotly/plotly/express/_core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 9f0afd3c5d..4047f71a3e 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -241,7 +241,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref): sorted_trace_data = trace_data.sort_values(by=args["x"]) y = sorted_trace_data[args["y"]] x = sorted_trace_data[args["x"]] - # trace_patch["x"] = x if x.dtype.type == np.datetime64: x = x.astype(int) / 10 ** 9 # convert to unix epoch seconds From 08b9d97cb2e198df35fcd0fbc9f7fc34cb51e937 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Mon, 27 Apr 2020 18:03:33 +0200 Subject: [PATCH 6/7] extended test to lowess, and more precise check of attribute length --- .../plotly/tests/test_core/test_px/test_trendline.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py index 5204390856..428e359eac 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py @@ -6,7 +6,10 @@ def test_trendline_nan_values(): df = px.data.gapminder().query("continent == 'Oceania'") start_date = 1970 df["pop"][df["year"] < start_date] = np.nan - fig = px.scatter(df, x="year", y="pop", color="country", trendline="ols") - country_numbers = len(fig["data"]) // 2 - for trendline in fig["data"][1::2]: - assert trendline.x[0] >= start_date + modes = ["ols", "lowess"] + for mode in modes: + fig = px.scatter(df, x="year", y="pop", color="country", trendline=mode) + country_numbers = len(fig["data"]) // 2 + for trendline in fig["data"][1::2]: + assert trendline.x[0] >= start_date + assert len(trendline.x) == len(trendline.y) From 8409887396985b83c139cbc97ea46a9d08fcb3d3 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Mon, 27 Apr 2020 21:37:46 +0200 Subject: [PATCH 7/7] Update packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py --- .../plotly/plotly/tests/test_core/test_px/test_trendline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py index 428e359eac..4c151148c1 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py +++ b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py @@ -9,7 +9,6 @@ def test_trendline_nan_values(): modes = ["ols", "lowess"] for mode in modes: fig = px.scatter(df, x="year", y="pop", color="country", trendline=mode) - country_numbers = len(fig["data"]) // 2 for trendline in fig["data"][1::2]: assert trendline.x[0] >= start_date assert len(trendline.x) == len(trendline.y)