From 8fb0823f4b141cdee8675587810d0a70332f654f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 21 Aug 2024 17:43:24 -0500 Subject: [PATCH 1/3] Drop Python 3.9 support --- README.md | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- cpp/cmake/thirdparty/get_arrow.cmake | 2 +- dependencies.yaml | 6 +----- python/cudf/CMakeLists.txt | 2 +- python/cudf/pyproject.toml | 3 +-- python/cudf_kafka/pyproject.toml | 2 +- python/cudf_polars/pyproject.toml | 5 ++--- python/custreamz/pyproject.toml | 3 +-- python/dask_cudf/pyproject.toml | 3 +-- python/pylibcudf/CMakeLists.txt | 2 +- python/pylibcudf/pyproject.toml | 3 +-- 13 files changed, 14 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index fd8b0365807..f1b010394d6 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ conda install -c rapidsai -c conda-forge -c nvidia \ We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD of our latest development branch. -Note: cuDF is supported only on Linux, and with Python versions 3.9 and later. +Note: cuDF is supported only on Linux, and with Python versions 3.10 and later. See the [RAPIDS installation guide](https://docs.rapids.ai/install) for more OS and version info. diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 018162bd848..09dabda71ad 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -76,7 +76,7 @@ dependencies: - pytest-xdist - pytest<8 - python-confluent-kafka>=1.9.0,<1.10.0a0 -- python>=3.9,<3.12 +- python>=3.10,<3.12 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 - rapids-dask-dependency==24.10.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index c60ffa7aaa5..6b9d82fe85e 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -74,7 +74,7 @@ dependencies: - pytest-xdist - pytest<8 - python-confluent-kafka>=1.9.0,<1.10.0a0 -- python>=3.9,<3.12 +- python>=3.10,<3.12 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 - rapids-dask-dependency==24.10.*,>=0.0.0a0 diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 0afdc526981..e3e6a07661a 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -45,7 +45,7 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION) APPEND initial_code_block [=[ -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) +find_package(Python 3.10 REQUIRED COMPONENTS Interpreter) execute_process( COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR diff --git a/dependencies.yaml b/dependencies.yaml index 150d03be021..672c1396f24 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -548,10 +548,6 @@ dependencies: specific: - output_types: conda matrices: - - matrix: - py: "3.9" - packages: - - python=3.9 - matrix: py: "3.10" packages: @@ -562,7 +558,7 @@ dependencies: - python=3.11 - matrix: packages: - - python>=3.9,<3.12 + - python>=3.10,<3.12 run_common: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index e11d62b3bd5..c3e2c4086cf 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -31,7 +31,7 @@ option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OF mark_as_advanced(USE_LIBARROW_FROM_PYARROW) # Find Python early so that later commands can use it -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) +find_package(Python 3.10 REQUIRED COMPONENTS Interpreter) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 9db52164eca..f5e007b9527 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -16,7 +16,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "cachetools", "cubinlinker", @@ -41,7 +41,6 @@ classifiers = [ "Topic :: Scientific/Engineering", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 63c5b07c5f3..37fcb7082df 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -16,7 +16,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "cudf==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index c380853035d..5397d51ca56 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -17,7 +17,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "polars>=1.0,<1.3", "pylibcudf==24.10.*,>=0.0.0a0", @@ -28,7 +28,6 @@ classifiers = [ "Topic :: Scientific/Engineering", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] @@ -62,7 +61,7 @@ exclude_also = [ [tool.ruff] line-length = 88 indent-width = 4 -target-version = "py39" +target-version = "py310" fix = true [tool.ruff.lint] diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index d6b88167262..be5331236a5 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -17,7 +17,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "confluent-kafka>=1.9.0,<1.10.0a0", "cudf==24.10.*,>=0.0.0a0", @@ -31,7 +31,6 @@ classifiers = [ "Topic :: Apache Kafka", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 872ecd35c28..342d45dcbab 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -17,7 +17,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "cudf==24.10.*,>=0.0.0a0", "cupy-cuda11x>=12.0.0", @@ -32,7 +32,6 @@ classifiers = [ "Topic :: Scientific/Engineering", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt index 424d8372280..6046e86184a 100644 --- a/python/pylibcudf/CMakeLists.txt +++ b/python/pylibcudf/CMakeLists.txt @@ -31,7 +31,7 @@ option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OF mark_as_advanced(USE_LIBARROW_FROM_PYARROW) # Find Python early so that later commands can use it -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) +find_package(Python 3.10 REQUIRED COMPONENTS Interpreter) # If the user requested it we attempt to find CUDF. if(FIND_CUDF_CPP) diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index b037508d03f..5e4ef8a0ff9 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -16,7 +16,7 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "cuda-python>=11.7.1,<12.0a0", "nvtx>=0.2.1", @@ -31,7 +31,6 @@ classifiers = [ "Topic :: Scientific/Engineering", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] From 8ef260eb2fe5c4b76fba0bfe8bc05a40b9aeb9af Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 22 Aug 2024 11:31:42 -0500 Subject: [PATCH 2/3] address new ruff errors --- .../cudf_polars/containers/dataframe.py | 11 +++++--- python/cudf_polars/cudf_polars/dsl/ir.py | 27 ++++++++++++------- .../cudf_polars/typing/__init__.py | 4 +-- .../cudf_polars/cudf_polars/utils/sorting.py | 2 +- python/cudf_polars/pyproject.toml | 7 +++++ 5 files changed, 35 insertions(+), 16 deletions(-) diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py index 7c28e7b9a6c..586f5213625 100644 --- a/python/cudf_polars/cudf_polars/containers/dataframe.py +++ b/python/cudf_polars/cudf_polars/containers/dataframe.py @@ -105,7 +105,9 @@ def from_polars(cls, df: pl.DataFrame) -> Self: return cls( [ NamedColumn(column, h_col.name).copy_metadata(h_col) - for column, h_col in zip(d_table.columns(), df.iter_columns()) + for column, h_col in zip( + d_table.columns(), df.iter_columns(), strict=True + ) ] ) @@ -135,7 +137,10 @@ def from_table(cls, table: plc.Table, names: Sequence[str]) -> Self: raise ValueError("Mismatching name and table length.") return cls( # TODO: strict=True when we drop py39 - [NamedColumn(c, name) for c, name in zip(table.columns(), names)] + [ + NamedColumn(c, name) + for c, name in zip(table.columns(), names, strict=True) + ] ) def sorted_like( @@ -166,7 +171,7 @@ def sorted_like( self.columns = [ c.sorted_like(other) if c.name in subset else c # TODO: strict=True when we drop py39 - for c, other in zip(self.columns, like.columns) + for c, other in zip(self.columns, like.columns, strict=True) ] return self diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index 019f00f4fca..ebc7dee6bfb 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -310,7 +310,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: *( (piece.tbl, piece.column_names(include_children=False)) for piece in pieces - ) + ), + strict=True, ) df = DataFrame.from_table( plc.concatenate.concatenate(list(tables)), @@ -426,7 +427,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: pdf = pdf.select(self.projection) df = DataFrame.from_polars(pdf) assert all( - c.obj.type() == dtype for c, dtype in zip(df.columns, self.schema.values()) + c.obj.type() == dtype + for c, dtype in zip(df.columns, self.schema.values(), strict=True) ) if self.predicate is not None: (mask,) = broadcast(self.predicate.evaluate(df), target_length=df.num_rows) @@ -600,9 +602,10 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: for i, table in enumerate(raw_tables): (column,) = table.columns() raw_columns.append(NamedColumn(column, f"tmp{i}")) - mapping = dict(zip(replacements, raw_columns)) + mapping = dict(zip(replacements, raw_columns, strict=True)) result_keys = [ - NamedColumn(gk, k.name) for gk, k in zip(group_keys.columns(), keys) + NamedColumn(gk, k.name) + for gk, k in zip(group_keys.columns(), keys, strict=True) ] result_subs = DataFrame(raw_columns) results = [ @@ -752,7 +755,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: columns = plc.join.cross_join(left.table, right.table).columns() left_cols = [ NamedColumn(new, old.name).sorted_like(old) - for new, old in zip(columns[: left.num_columns], left.columns) + for new, old in zip( + columns[: left.num_columns], left.columns, strict=True + ) ] right_cols = [ NamedColumn( @@ -761,7 +766,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: if old.name not in left.column_names_set else f"{old.name}{suffix}", ) - for new, old in zip(columns[left.num_columns :], right.columns) + for new, old in zip( + columns[left.num_columns :], right.columns, strict=True + ) ] return DataFrame([*left_cols, *right_cols]) # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184 @@ -803,6 +810,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: for left_col, right_col in zip( left.select_columns(left_on.column_names_set), right.select_columns(right_on.column_names_set), + strict=True, ) ) ) @@ -909,7 +917,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: result = DataFrame( [ NamedColumn(c, old.name).sorted_like(old) - for c, old in zip(table.columns(), df.columns) + for c, old in zip(table.columns(), df.columns, strict=True) ] ) if keys_sorted or self.stable: @@ -974,7 +982,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: self.null_order, ) columns = [ - NamedColumn(c, old.name) for c, old in zip(table.columns(), df.columns) + NamedColumn(c, old.name) + for c, old in zip(table.columns(), df.columns, strict=True) ] # If a sort key is in the result table, set the sortedness property for k, i in enumerate(keys_in_result): @@ -1089,7 +1098,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame: # final tag is "swapping" which is useful for the # optimiser (it blocks some pushdown operations) old, new, _ = self.options - return df.rename_columns(dict(zip(old, new))) + return df.rename_columns(dict(zip(old, new, strict=True))) elif self.name == "explode": df = self.df.evaluate(cache=cache) ((to_explode,),) = self.options diff --git a/python/cudf_polars/cudf_polars/typing/__init__.py b/python/cudf_polars/cudf_polars/typing/__init__.py index 02440e67fde..5276073e62a 100644 --- a/python/cudf_polars/cudf_polars/typing/__init__.py +++ b/python/cudf_polars/cudf_polars/typing/__init__.py @@ -13,9 +13,7 @@ from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir if TYPE_CHECKING: - from typing import Callable - - from typing_extensions import TypeAlias + from typing import Callable, TypeAlias import polars as pl diff --git a/python/cudf_polars/cudf_polars/utils/sorting.py b/python/cudf_polars/cudf_polars/utils/sorting.py index 17ea44e5b1b..6ce216cbf8f 100644 --- a/python/cudf_polars/cudf_polars/utils/sorting.py +++ b/python/cudf_polars/cudf_polars/utils/sorting.py @@ -45,7 +45,7 @@ def sort_order( null_precedence = [] if len(descending) != len(nulls_last) or len(descending) != num_keys: raise ValueError("Mismatching length of arguments in sort_order") - for asc, null_last in zip(column_order, nulls_last): + for asc, null_last in zip(column_order, nulls_last, strict=True): if (asc == plc.types.Order.ASCENDING) ^ (not null_last): null_precedence.append(plc.types.NullOrder.AFTER) elif (asc == plc.types.Order.ASCENDING) ^ null_last: diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 5397d51ca56..0382e3ce6a2 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -114,6 +114,9 @@ ignore = [ "TD003", # Missing issue link on the line following this TODO # tryceratops "TRY003", # Avoid specifying long messages outside the exception class + # pyupgrade + "UP035", # Import from `collections.abc` instead: `Callable` + "UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)` # Lints below are turned off because of conflicts with the ruff # formatter # See https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules @@ -136,6 +139,10 @@ fixable = ["ALL"] [tool.ruff.lint.per-file-ignores] "**/tests/**/*.py" = ["D"] +"**/cudf_polars/typing/__init__.py" = [ + # pyupgrade + "UP007", # Use `X | Y` for type annotations +] [tool.ruff.lint.flake8-pytest-style] # https://docs.astral.sh/ruff/settings/#lintflake8-pytest-style From ec74d27eab45704937af42c0428b0b7c4bd32dd7 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 22 Aug 2024 12:17:14 -0500 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Bradley Dice --- python/cudf_polars/cudf_polars/containers/dataframe.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py index 586f5213625..a5c99e2bc11 100644 --- a/python/cudf_polars/cudf_polars/containers/dataframe.py +++ b/python/cudf_polars/cudf_polars/containers/dataframe.py @@ -136,7 +136,6 @@ def from_table(cls, table: plc.Table, names: Sequence[str]) -> Self: if table.num_columns() != len(names): raise ValueError("Mismatching name and table length.") return cls( - # TODO: strict=True when we drop py39 [ NamedColumn(c, name) for c, name in zip(table.columns(), names, strict=True) @@ -170,7 +169,6 @@ def sorted_like( subset = self.column_names_set if subset is None else subset self.columns = [ c.sorted_like(other) if c.name in subset else c - # TODO: strict=True when we drop py39 for c, other in zip(self.columns, like.columns, strict=True) ] return self