From c6084173f2eadf312bf309bb35396a14fcb510e7 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Thu, 15 Jun 2023 08:12:53 -0400 Subject: [PATCH 1/6] Make transformed_data public --- altair/vegalite/v5/api.py | 24 ++++++++++++------------ tests/test_transformed_data.py | 6 +++--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py index 9e97e3dff..d01449ab1 100644 --- a/altair/vegalite/v5/api.py +++ b/altair/vegalite/v5/api.py @@ -2658,7 +2658,7 @@ def to_dict( validate=validate, format=format, ignore=ignore, context=context ) - def _transformed_data( + def transformed_data( self, row_limit: Optional[int] = None, exclude: Optional[Iterable[str]] = None, @@ -2859,7 +2859,7 @@ def __init__( **kwds, ) - def _transformed_data( + def transformed_data( self, row_limit: Optional[int] = None, exclude: Optional[Iterable[str]] = None, @@ -2970,7 +2970,7 @@ def __or__(self, other): copy |= other return copy - def _transformed_data( + def transformed_data( self, row_limit: Optional[int] = None, exclude: Optional[Iterable[str]] = None, @@ -2992,7 +2992,7 @@ def _transformed_data( list of DataFrame Transformed data for each subplot as a list of DataFrames """ - from altair.utils._transformed_data import transformed_data + from altair.utils.transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) @@ -3067,7 +3067,7 @@ def __or__(self, other): copy |= other return copy - def _transformed_data( + def transformed_data( self, row_limit: Optional[int] = None, exclude: Optional[Iterable[str]] = None, @@ -3089,7 +3089,7 @@ def _transformed_data( list of DataFrame Transformed data for each subplot as a list of DataFrames """ - from altair.utils._transformed_data import transformed_data + from altair.utils.transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) @@ -3164,7 +3164,7 @@ def __and__(self, other): copy &= other return copy - def _transformed_data( + def transformed_data( self, row_limit: Optional[int] = None, exclude: Optional[Iterable[str]] = None, @@ -3186,7 +3186,7 @@ def _transformed_data( list of DataFrame Transformed data for each subplot as a list of DataFrames """ - from altair.utils._transformed_data import transformed_data + from altair.utils.transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) @@ -3260,7 +3260,7 @@ def __init__(self, data=Undefined, layer=(), **kwargs): for prop in combined_dict: self[prop] = combined_dict[prop] - def _transformed_data( + def transformed_data( self, row_limit: Optional[int] = None, exclude: Optional[Iterable[str]] = None, @@ -3282,7 +3282,7 @@ def _transformed_data( list of DataFrame Transformed data for each layer as a list of DataFrames """ - from altair.utils._transformed_data import transformed_data + from altair.utils.transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) @@ -3375,7 +3375,7 @@ def __init__( data=data, spec=spec, facet=facet, params=params, **kwargs ) - def _transformed_data( + def transformed_data( self, row_limit: Optional[int] = None, exclude: Optional[Iterable[str]] = None, @@ -3397,7 +3397,7 @@ def _transformed_data( DataFrame Transformed data as a DataFrame """ - from altair.utils._transformed_data import transformed_data + from altair.utils.transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) diff --git a/tests/test_transformed_data.py b/tests/test_transformed_data.py index 1604f2026..928b539f5 100644 --- a/tests/test_transformed_data.py +++ b/tests/test_transformed_data.py @@ -60,7 +60,7 @@ def test_primitive_chart_examples(filename, rows, cols): source = pkgutil.get_data(examples_methods_syntax.__name__, filename) chart = eval_block(source) - df = chart._transformed_data() + df = chart.transformed_data() assert len(df) == rows assert set(cols).issubset(set(df.columns)) @@ -101,7 +101,7 @@ def test_compound_chart_examples(filename, all_rows, all_cols): chart = eval_block(source) print(chart) - dfs = chart._transformed_data() + dfs = chart.transformed_data() assert len(dfs) == len(all_rows) for df, rows, cols in zip(dfs, all_rows, all_cols): assert len(df) == rows @@ -119,7 +119,7 @@ def test_transformed_data_exclude(): ) chart = (bar + rule + some_annotation).properties(width=600) - datasets = chart._transformed_data(exclude=["some_annotation"]) + datasets = chart.transformed_data(exclude=["some_annotation"]) assert len(datasets) == 2 assert len(datasets[0]) == 52 From 3fa7e786b4cf966b712125f0c481748ca90c3f60 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Thu, 15 Jun 2023 08:51:49 -0400 Subject: [PATCH 2/6] Add ipython sphinx extension to evaluate Python snippets that don't render Altair charts --- doc/conf.py | 2 ++ pyproject.toml | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 78a4b7a1a..7d8ab8165 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -41,6 +41,8 @@ "sphinxext.schematable", "sphinx_copybutton", "sphinx_design", + 'IPython.sphinxext.ipython_console_highlighting', + 'IPython.sphinxext.ipython_directive' ] altair_plot_links = {"editor": True, "source": False, "export": False} diff --git a/pyproject.toml b/pyproject.toml index 10e3768b4..5476405fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,8 @@ doc = [ "geopandas", "myst-parser", "sphinx_copybutton", - "sphinx-design" + "sphinx-design", + "ipython" ] [tool.hatch.version] From ed90271c113042bef446d27182fd6a3ca351a577 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Thu, 15 Jun 2023 08:52:38 -0400 Subject: [PATCH 3/6] Add initial transformed_data documentation section --- doc/user_guide/transform/index.rst | 57 ++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/doc/user_guide/transform/index.rst b/doc/user_guide/transform/index.rst index 7084ca50b..ee71d3845 100644 --- a/doc/user_guide/transform/index.rst +++ b/doc/user_guide/transform/index.rst @@ -47,6 +47,63 @@ Transform Method :ref:`user-guide-window-transform` :meth:`~Chart.transform_window` Compute a windowed aggregation ========================================= ========================================= ================================================================================ +Accessing Transformed Data +~~~~~~~~~~~~~~~~~~~~~~~~~~ +When charts are displayed, data transformations are performed in the browser by +the Vega JavaScript library. To make the transformed data available in Python, +Altair provides the :meth:`~Chart.transformed_data` Chart method which integrates with +`VegaFusion `_ to evaluate these data transformations in +the Python kernel. + +First, install VegaFusion with the embed extras enabled. + +.. code-block:: none + + pip install "vegafusion[embed]" + +Then create an Altair chart and call the :meth:`~Chart.transformed_data` method +to extract a pandas DataFrame containing the transformed data. + +.. ipython:: python + + import altair as alt + from vega_datasets import data + + cars = data.cars.url + chart = alt.Chart(cars).mark_bar().encode( + y='Cylinders:O', + x='mean_acc:Q' + ).transform_aggregate( + mean_acc='mean(Acceleration)', + groupby=["Cylinders"] + ) + chart.transformed_data() + +The :meth:`~Chart.transformed_data` method currently supports most, but not all, +of Altair's transforms. See the table below. + +========================================= ========= +Transform Supported +========================================= ========= +:ref:`user-guide-aggregate-transform` ✔ +:ref:`user-guide-bin-transform` ✔ +:ref:`user-guide-calculate-transform` ✔ +:ref:`user-guide-density-transform` +:ref:`user-guide-filter-transform` ✔ +:ref:`user-guide-flatten-transform` +:ref:`user-guide-fold-transform` ✔ +:ref:`user-guide-impute-transform` ✔ +:ref:`user-guide-joinaggregate-transform` ✔ +:ref:`user-guide-loess-transform` +:ref:`user-guide-lookup-transform` +:ref:`user-guide-pivot-transform` ✔ +:ref:`user-guide-quantile-transform` +:ref:`user-guide-regression-transform` +:ref:`user-guide-sample-transform` +:ref:`user-guide-stack-transform` ✔ +:ref:`user-guide-timeunit-transform` ✔ +:ref:`user-guide-window-transform` ✔ +========================================= ========= .. toctree:: :hidden: From 40ab881442b94249cde06867bd11800b96a07bbf Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Thu, 15 Jun 2023 09:02:01 -0400 Subject: [PATCH 4/6] fix imports --- altair/vegalite/v5/api.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py index d01449ab1..218bf0e79 100644 --- a/altair/vegalite/v5/api.py +++ b/altair/vegalite/v5/api.py @@ -2992,7 +2992,7 @@ def transformed_data( list of DataFrame Transformed data for each subplot as a list of DataFrames """ - from altair.utils.transformed_data import transformed_data + from altair.utils._transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) @@ -3089,7 +3089,7 @@ def transformed_data( list of DataFrame Transformed data for each subplot as a list of DataFrames """ - from altair.utils.transformed_data import transformed_data + from altair.utils._transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) @@ -3186,7 +3186,7 @@ def transformed_data( list of DataFrame Transformed data for each subplot as a list of DataFrames """ - from altair.utils.transformed_data import transformed_data + from altair.utils._transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) @@ -3282,7 +3282,7 @@ def transformed_data( list of DataFrame Transformed data for each layer as a list of DataFrames """ - from altair.utils.transformed_data import transformed_data + from altair.utils._transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) @@ -3397,7 +3397,7 @@ def transformed_data( DataFrame Transformed data as a DataFrame """ - from altair.utils.transformed_data import transformed_data + from altair.utils._transformed_data import transformed_data return transformed_data(self, row_limit=row_limit, exclude=exclude) From 5c8196c2b0c44635cceaab5003afe6fad69da3a3 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Fri, 16 Jun 2023 09:26:39 -0400 Subject: [PATCH 5/6] Mention approach of displaying transformed data with text marks --- doc/user_guide/transform/index.rst | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/user_guide/transform/index.rst b/doc/user_guide/transform/index.rst index ee71d3845..85841a12d 100644 --- a/doc/user_guide/transform/index.rst +++ b/doc/user_guide/transform/index.rst @@ -50,10 +50,16 @@ Transform Method Accessing Transformed Data ~~~~~~~~~~~~~~~~~~~~~~~~~~ When charts are displayed, data transformations are performed in the browser by -the Vega JavaScript library. To make the transformed data available in Python, -Altair provides the :meth:`~Chart.transformed_data` Chart method which integrates with -`VegaFusion `_ to evaluate these data transformations in -the Python kernel. +the Vega JavaScript library. It's often helpful to inspect transformed data +results in the process of building a chart. One approach is to display the +transformed data results in a table composed of :ref:`Text` +marks as in the :ref:`gallery_scatter_linked_table` gallery example. + +While this approach works, it's somewhat cumbersome, and still does not make it +possible to access the transformed data from Python. To make transformed data +results available in Python, Altair provides the :meth:`~Chart.transformed_data` +Chart method which integrates with `VegaFusion `_ +to evaluate data transformations in the Python kernel. First, install VegaFusion with the embed extras enabled. From be59e62d7a1995774bf4ac017200ce76883df1c9 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Fri, 16 Jun 2023 18:45:47 -0400 Subject: [PATCH 6/6] Remove ipython directive and use `altair-plot::` directive with `:output: repr` --- doc/conf.py | 2 -- doc/user_guide/transform/index.rst | 3 ++- pyproject.toml | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 7d8ab8165..78a4b7a1a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -41,8 +41,6 @@ "sphinxext.schematable", "sphinx_copybutton", "sphinx_design", - 'IPython.sphinxext.ipython_console_highlighting', - 'IPython.sphinxext.ipython_directive' ] altair_plot_links = {"editor": True, "source": False, "export": False} diff --git a/doc/user_guide/transform/index.rst b/doc/user_guide/transform/index.rst index 85841a12d..90ec75182 100644 --- a/doc/user_guide/transform/index.rst +++ b/doc/user_guide/transform/index.rst @@ -70,7 +70,8 @@ First, install VegaFusion with the embed extras enabled. Then create an Altair chart and call the :meth:`~Chart.transformed_data` method to extract a pandas DataFrame containing the transformed data. -.. ipython:: python +.. altair-plot:: + :output: repr import altair as alt from vega_datasets import data diff --git a/pyproject.toml b/pyproject.toml index 5476405fc..10e3768b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,8 +85,7 @@ doc = [ "geopandas", "myst-parser", "sphinx_copybutton", - "sphinx-design", - "ipython" + "sphinx-design" ] [tool.hatch.version]