Fix accidental requirement of Pandas 1.5. Bump minimum Pandas version…

… to 0.25. Run tests with it (#3130) * Add test with lowest supported Pandas version * pin numpy to 1.17.5 and pandas to 0.25.3 * Wrap signature in TYPE_CHECKING for older pandas * pyarrow doesn't work with pandas <1.0, so uninstall it for Python 3.8 job instead of 3.9 * Remove compatibility function infer_dtype which was needed for Pandas < 0.21 * Remove conversion of categorical dtype columns * Add changelog entry * Readd missing skipna argument to infer_dtype --------- Co-authored-by: Jon Mease <[email protected]>
vega · Aug 1, 2023 · bd89d2f · bd89d2f
1 parent d32d053
commit bd89d2f
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 37 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -31,11 +31,18 @@ jobs:
       - name: Maybe uninstall optional dependencies
         # We uninstall pyarrow and vegafusion for one job to test that we have not
         # accidentally introduced a hard dependency on these libraries.
-        # Uninstalling for Python 3.9 is an arbitrary choice.
+        # Uninstalling for Python 3.8 is an arbitrary choice.
         # Also see https://github.com/altair-viz/altair/pull/3114
-        if: ${{ matrix.python-version }}=="3.9"
+        if: ${{ matrix.python-version == '3.8' }}
         run: |
           pip uninstall -y pyarrow vegafusion vegafusion-python-embed
+      - name: Maybe install lowest supported Pandas version
+        # We install the lowest supported Pandas version for one job to test that
+        # it still works. Downgrade to the oldest versions of pandas and numpy that include
+        # Python 3.8 wheels, so only run this job for Python 3.8
+        if: ${{ matrix.python-version == '3.8' }}
+        run: |
+          pip install pandas==0.25.3 numpy==1.17.5
       - name: Test that schema generation has no effect
         run: |
           python tools/generate_schema_wrapper.py

diff --git a/altair/utils/core.py b/altair/utils/core.py
@@ -15,7 +15,7 @@
 import jsonschema
 import pandas as pd
 import numpy as np
-from pandas.core.interchange.dataframe_protocol import Column as PandasColumn
+from pandas.api.types import infer_dtype
 
 from altair.utils.schemapi import SchemaBase
 from altair.utils._dfi_types import Column, DtypeKind, DataFrame as DfiDataFrame
@@ -25,13 +25,10 @@
 else:
     from typing_extensions import ParamSpec
 
-from typing import Literal, Protocol
+from typing import Literal, Protocol, TYPE_CHECKING
 
-try:
-    from pandas.api.types import infer_dtype as _infer_dtype
-except ImportError:
-    # Import for pandas < 0.20.0
-    from pandas.lib import infer_dtype as _infer_dtype  # type: ignore[no-redef]
+if TYPE_CHECKING:
+    from pandas.core.interchange.dataframe_protocol import Column as PandasColumn
 
 _V = TypeVar("_V")
 _P = ParamSpec("_P")
@@ -42,26 +39,6 @@ def __dataframe__(self, *args, **kwargs) -> DfiDataFrame:
         ...
 
 
-def infer_dtype(value: object) -> str:
-    """Infer the dtype of the value.
-
-    This is a compatibility function for pandas infer_dtype,
-    with skipna=False regardless of the pandas version.
-    """
-    if not hasattr(infer_dtype, "_supports_skipna"):
-        try:
-            _infer_dtype([1], skipna=False)
-        except TypeError:
-            # pandas < 0.21.0 don't support skipna keyword
-            infer_dtype._supports_skipna = False  # type: ignore[attr-defined]
-        else:
-            infer_dtype._supports_skipna = True  # type: ignore[attr-defined]
-    if infer_dtype._supports_skipna:  # type: ignore[attr-defined]
-        return _infer_dtype(value, skipna=False)
-    else:
-        return _infer_dtype(value)
-
-
 TYPECODE_MAP = {
     "ordinal": "O",
     "nominal": "N",
@@ -214,7 +191,7 @@ def infer_vegalite_type(
     ----------
     data: object
     """
-    typ = infer_dtype(data)
+    typ = infer_dtype(data, skipna=False)
 
     if typ in [
         "floating",
@@ -348,9 +325,9 @@ def to_list_if_array(val):
     for col_name, dtype in df.dtypes.items():
         dtype_name = str(dtype)
         if dtype_name == "category":
-            # Work around bug in to_json for categorical types in older versions of pandas
-            # https://github.com/pydata/pandas/issues/10778
-            # https://github.com/altair-viz/altair/pull/2170
+            # Work around bug in to_json for categorical types in older versions
+            # of pandas as they do not properly convert NaN values to null in to_json.
+            # We can probably remove this part once we require Pandas >= 1.0
             col = df[col_name].astype(object)
             df[col_name] = col.where(col.notnull(), None)
         elif dtype_name == "string":
@@ -630,7 +607,7 @@ def parse_shorthand(
 
 
 def infer_vegalite_type_for_dfi_column(
-    column: Union[Column, PandasColumn],
+    column: Union[Column, "PandasColumn"],
 ) -> Union[_InferredVegaLiteType, Tuple[_InferredVegaLiteType, list]]:
     from pyarrow.interchange.from_dataframe import column_to_array
 

diff --git a/doc/releases/changes.rst b/doc/releases/changes.rst
@@ -18,7 +18,8 @@ Bug Fixes
 
 Backward-Incompatible Changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Drop support for Python 3.7 which is end-of-life (#3100).
+- Drop support for Python 3.7 which is end-of-life (#3100)
+- Increase minimum required Pandas version to 0.25 (#3130)
 
 Version 5.0.1 (released May 26, 2023)
 -------------------------------------

diff --git a/pyproject.toml b/pyproject.toml
@@ -18,9 +18,11 @@ authors = [ {name = "Vega-Altair Contributors"} ]
 dependencies = [
     "typing_extensions>=4.0.1; python_version<\"3.11\"",
     "jinja2",
+    # If you update the minimum required jsonschema version, also update it in build.yml
     "jsonschema>=3.0",
     "numpy",
-    "pandas>=0.18",
+    # If you update the minimum required pandas version, also update it in build.yml
+    "pandas>=0.25",
     "toolz"
 ]
 description = "Vega-Altair: A declarative statistical visualization library for Python."

diff --git a/tests/utils/test_core.py b/tests/utils/test_core.py
@@ -74,7 +74,7 @@ class StrokeWidthValue(ValueChannel, schemapi.SchemaBase):
     ],
 )
 def test_infer_dtype(value, expected_type):
-    assert infer_dtype(value) == expected_type
+    assert infer_dtype(value, skipna=False) == expected_type
 
 
 def test_parse_shorthand():