Skip to content

Commit

Permalink
Fix accidental requirement of Pandas 1.5. Bump minimum Pandas version…
Browse files Browse the repository at this point in the history
… to 0.25. Run tests with it (#3130)

* Add test with lowest supported Pandas version

* pin numpy to 1.17.5 and pandas to 0.25.3

* Wrap signature in TYPE_CHECKING for older pandas

* pyarrow doesn't work with pandas <1.0, so uninstall it for Python 3.8 job instead of 3.9

* Remove compatibility function infer_dtype which was needed for Pandas < 0.21

* Remove conversion of categorical dtype columns

* Add changelog entry

* Readd missing skipna argument to infer_dtype

---------

Co-authored-by: Jon Mease <[email protected]>
  • Loading branch information
binste and jonmmease authored Aug 1, 2023
1 parent d32d053 commit bd89d2f
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 37 deletions.
11 changes: 9 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,18 @@ jobs:
- name: Maybe uninstall optional dependencies
# We uninstall pyarrow and vegafusion for one job to test that we have not
# accidentally introduced a hard dependency on these libraries.
# Uninstalling for Python 3.9 is an arbitrary choice.
# Uninstalling for Python 3.8 is an arbitrary choice.
# Also see https://github.com/altair-viz/altair/pull/3114
if: ${{ matrix.python-version }}=="3.9"
if: ${{ matrix.python-version == '3.8' }}
run: |
pip uninstall -y pyarrow vegafusion vegafusion-python-embed
- name: Maybe install lowest supported Pandas version
# We install the lowest supported Pandas version for one job to test that
# it still works. Downgrade to the oldest versions of pandas and numpy that include
# Python 3.8 wheels, so only run this job for Python 3.8
if: ${{ matrix.python-version == '3.8' }}
run: |
pip install pandas==0.25.3 numpy==1.17.5
- name: Test that schema generation has no effect
run: |
python tools/generate_schema_wrapper.py
Expand Down
41 changes: 9 additions & 32 deletions altair/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import jsonschema
import pandas as pd
import numpy as np
from pandas.core.interchange.dataframe_protocol import Column as PandasColumn
from pandas.api.types import infer_dtype

from altair.utils.schemapi import SchemaBase
from altair.utils._dfi_types import Column, DtypeKind, DataFrame as DfiDataFrame
Expand All @@ -25,13 +25,10 @@
else:
from typing_extensions import ParamSpec

from typing import Literal, Protocol
from typing import Literal, Protocol, TYPE_CHECKING

try:
from pandas.api.types import infer_dtype as _infer_dtype
except ImportError:
# Import for pandas < 0.20.0
from pandas.lib import infer_dtype as _infer_dtype # type: ignore[no-redef]
if TYPE_CHECKING:
from pandas.core.interchange.dataframe_protocol import Column as PandasColumn

_V = TypeVar("_V")
_P = ParamSpec("_P")
Expand All @@ -42,26 +39,6 @@ def __dataframe__(self, *args, **kwargs) -> DfiDataFrame:
...


def infer_dtype(value: object) -> str:
"""Infer the dtype of the value.
This is a compatibility function for pandas infer_dtype,
with skipna=False regardless of the pandas version.
"""
if not hasattr(infer_dtype, "_supports_skipna"):
try:
_infer_dtype([1], skipna=False)
except TypeError:
# pandas < 0.21.0 don't support skipna keyword
infer_dtype._supports_skipna = False # type: ignore[attr-defined]
else:
infer_dtype._supports_skipna = True # type: ignore[attr-defined]
if infer_dtype._supports_skipna: # type: ignore[attr-defined]
return _infer_dtype(value, skipna=False)
else:
return _infer_dtype(value)


TYPECODE_MAP = {
"ordinal": "O",
"nominal": "N",
Expand Down Expand Up @@ -214,7 +191,7 @@ def infer_vegalite_type(
----------
data: object
"""
typ = infer_dtype(data)
typ = infer_dtype(data, skipna=False)

if typ in [
"floating",
Expand Down Expand Up @@ -348,9 +325,9 @@ def to_list_if_array(val):
for col_name, dtype in df.dtypes.items():
dtype_name = str(dtype)
if dtype_name == "category":
# Work around bug in to_json for categorical types in older versions of pandas
# https://github.com/pydata/pandas/issues/10778
# https://github.com/altair-viz/altair/pull/2170
# Work around bug in to_json for categorical types in older versions
# of pandas as they do not properly convert NaN values to null in to_json.
# We can probably remove this part once we require Pandas >= 1.0
col = df[col_name].astype(object)
df[col_name] = col.where(col.notnull(), None)
elif dtype_name == "string":
Expand Down Expand Up @@ -630,7 +607,7 @@ def parse_shorthand(


def infer_vegalite_type_for_dfi_column(
column: Union[Column, PandasColumn],
column: Union[Column, "PandasColumn"],
) -> Union[_InferredVegaLiteType, Tuple[_InferredVegaLiteType, list]]:
from pyarrow.interchange.from_dataframe import column_to_array

Expand Down
3 changes: 2 additions & 1 deletion doc/releases/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ Bug Fixes

Backward-Incompatible Changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Drop support for Python 3.7 which is end-of-life (#3100).
- Drop support for Python 3.7 which is end-of-life (#3100)
- Increase minimum required Pandas version to 0.25 (#3130)

Version 5.0.1 (released May 26, 2023)
-------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ authors = [ {name = "Vega-Altair Contributors"} ]
dependencies = [
"typing_extensions>=4.0.1; python_version<\"3.11\"",
"jinja2",
# If you update the minimum required jsonschema version, also update it in build.yml
"jsonschema>=3.0",
"numpy",
"pandas>=0.18",
# If you update the minimum required pandas version, also update it in build.yml
"pandas>=0.25",
"toolz"
]
description = "Vega-Altair: A declarative statistical visualization library for Python."
Expand Down
2 changes: 1 addition & 1 deletion tests/utils/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class StrokeWidthValue(ValueChannel, schemapi.SchemaBase):
],
)
def test_infer_dtype(value, expected_type):
assert infer_dtype(value) == expected_type
assert infer_dtype(value, skipna=False) == expected_type


def test_parse_shorthand():
Expand Down

0 comments on commit bd89d2f

Please sign in to comment.