From 8fb0823f4b141cdee8675587810d0a70332f654f Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Wed, 21 Aug 2024 17:43:24 -0500
Subject: [PATCH 1/3] Drop Python 3.9 support

---
 README.md                                        | 2 +-
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +-
 cpp/cmake/thirdparty/get_arrow.cmake             | 2 +-
 dependencies.yaml                                | 6 +-----
 python/cudf/CMakeLists.txt                       | 2 +-
 python/cudf/pyproject.toml                       | 3 +--
 python/cudf_kafka/pyproject.toml                 | 2 +-
 python/cudf_polars/pyproject.toml                | 5 ++---
 python/custreamz/pyproject.toml                  | 3 +--
 python/dask_cudf/pyproject.toml                  | 3 +--
 python/pylibcudf/CMakeLists.txt                  | 2 +-
 python/pylibcudf/pyproject.toml                  | 3 +--
 13 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index fd8b0365807..f1b010394d6 100644
--- a/README.md
+++ b/README.md
@@ -89,7 +89,7 @@ conda install -c rapidsai -c conda-forge -c nvidia \
 We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
 of our latest development branch.
 
-Note: cuDF is supported only on Linux, and with Python versions 3.9 and later.
+Note: cuDF is supported only on Linux, and with Python versions 3.10 and later.
 
 See the [RAPIDS installation guide](https://docs.rapids.ai/install) for more OS and version info.
 
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 018162bd848..09dabda71ad 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -76,7 +76,7 @@ dependencies:
 - pytest-xdist
 - pytest<8
 - python-confluent-kafka>=1.9.0,<1.10.0a0
-- python>=3.9,<3.12
+- python>=3.10,<3.12
 - pytorch>=2.1.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index c60ffa7aaa5..6b9d82fe85e 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -74,7 +74,7 @@ dependencies:
 - pytest-xdist
 - pytest<8
 - python-confluent-kafka>=1.9.0,<1.10.0a0
-- python>=3.9,<3.12
+- python>=3.10,<3.12
 - pytorch>=2.1.0
 - rapids-build-backend>=0.3.0,<0.4.0.dev0
 - rapids-dask-dependency==24.10.*,>=0.0.0a0
diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index 0afdc526981..e3e6a07661a 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -45,7 +45,7 @@ function(find_libarrow_in_python_wheel PYARROW_VERSION)
     APPEND
     initial_code_block
     [=[
-find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
+find_package(Python 3.10 REQUIRED COMPONENTS Interpreter)
 execute_process(
     COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])"
     OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR
diff --git a/dependencies.yaml b/dependencies.yaml
index 150d03be021..672c1396f24 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -548,10 +548,6 @@ dependencies:
     specific:
       - output_types: conda
         matrices:
-          - matrix:
-              py: "3.9"
-            packages:
-              - python=3.9
           - matrix:
               py: "3.10"
             packages:
@@ -562,7 +558,7 @@ dependencies:
               - python=3.11
           - matrix:
             packages:
-              - python>=3.9,<3.12
+              - python>=3.10,<3.12
   run_common:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index e11d62b3bd5..c3e2c4086cf 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -31,7 +31,7 @@ option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OF
 mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
 
 # Find Python early so that later commands can use it
-find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
+find_package(Python 3.10 REQUIRED COMPONENTS Interpreter)
 
 # If the user requested it we attempt to find CUDF.
 if(FIND_CUDF_CPP)
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 9db52164eca..f5e007b9527 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -16,7 +16,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cachetools",
     "cubinlinker",
@@ -41,7 +41,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index 63c5b07c5f3..37fcb7082df 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -16,7 +16,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cudf==24.10.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index c380853035d..5397d51ca56 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "polars>=1.0,<1.3",
     "pylibcudf==24.10.*,>=0.0.0a0",
@@ -28,7 +28,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
@@ -62,7 +61,7 @@ exclude_also = [
 [tool.ruff]
 line-length = 88
 indent-width = 4
-target-version = "py39"
+target-version = "py310"
 fix = true
 
 [tool.ruff.lint]
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index d6b88167262..be5331236a5 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "confluent-kafka>=1.9.0,<1.10.0a0",
     "cudf==24.10.*,>=0.0.0a0",
@@ -31,7 +31,6 @@ classifiers = [
     "Topic :: Apache Kafka",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 872ecd35c28..342d45dcbab 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cudf==24.10.*,>=0.0.0a0",
     "cupy-cuda11x>=12.0.0",
@@ -32,7 +32,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt
index 424d8372280..6046e86184a 100644
--- a/python/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/CMakeLists.txt
@@ -31,7 +31,7 @@ option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OF
 mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
 
 # Find Python early so that later commands can use it
-find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)
+find_package(Python 3.10 REQUIRED COMPONENTS Interpreter)
 
 # If the user requested it we attempt to find CUDF.
 if(FIND_CUDF_CPP)
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index b037508d03f..5e4ef8a0ff9 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -16,7 +16,7 @@ authors = [
     { name = "NVIDIA Corporation" },
 ]
 license = { text = "Apache 2.0" }
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "cuda-python>=11.7.1,<12.0a0",
     "nvtx>=0.2.1",
@@ -31,7 +31,6 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]

From 8ef260eb2fe5c4b76fba0bfe8bc05a40b9aeb9af Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 22 Aug 2024 11:31:42 -0500
Subject: [PATCH 2/3] address new ruff errors

---
 .../cudf_polars/containers/dataframe.py       | 11 +++++---
 python/cudf_polars/cudf_polars/dsl/ir.py      | 27 ++++++++++++-------
 .../cudf_polars/typing/__init__.py            |  4 +--
 .../cudf_polars/cudf_polars/utils/sorting.py  |  2 +-
 python/cudf_polars/pyproject.toml             |  7 +++++
 5 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index 7c28e7b9a6c..586f5213625 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -105,7 +105,9 @@ def from_polars(cls, df: pl.DataFrame) -> Self:
         return cls(
             [
                 NamedColumn(column, h_col.name).copy_metadata(h_col)
-                for column, h_col in zip(d_table.columns(), df.iter_columns())
+                for column, h_col in zip(
+                    d_table.columns(), df.iter_columns(), strict=True
+                )
             ]
         )
 
@@ -135,7 +137,10 @@ def from_table(cls, table: plc.Table, names: Sequence[str]) -> Self:
             raise ValueError("Mismatching name and table length.")
         return cls(
             # TODO: strict=True when we drop py39
-            [NamedColumn(c, name) for c, name in zip(table.columns(), names)]
+            [
+                NamedColumn(c, name)
+                for c, name in zip(table.columns(), names, strict=True)
+            ]
         )
 
     def sorted_like(
@@ -166,7 +171,7 @@ def sorted_like(
         self.columns = [
             c.sorted_like(other) if c.name in subset else c
             # TODO: strict=True when we drop py39
-            for c, other in zip(self.columns, like.columns)
+            for c, other in zip(self.columns, like.columns, strict=True)
         ]
         return self
 
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 019f00f4fca..ebc7dee6bfb 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -310,7 +310,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 *(
                     (piece.tbl, piece.column_names(include_children=False))
                     for piece in pieces
-                )
+                ),
+                strict=True,
             )
             df = DataFrame.from_table(
                 plc.concatenate.concatenate(list(tables)),
@@ -426,7 +427,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             pdf = pdf.select(self.projection)
         df = DataFrame.from_polars(pdf)
         assert all(
-            c.obj.type() == dtype for c, dtype in zip(df.columns, self.schema.values())
+            c.obj.type() == dtype
+            for c, dtype in zip(df.columns, self.schema.values(), strict=True)
         )
         if self.predicate is not None:
             (mask,) = broadcast(self.predicate.evaluate(df), target_length=df.num_rows)
@@ -600,9 +602,10 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         for i, table in enumerate(raw_tables):
             (column,) = table.columns()
             raw_columns.append(NamedColumn(column, f"tmp{i}"))
-        mapping = dict(zip(replacements, raw_columns))
+        mapping = dict(zip(replacements, raw_columns, strict=True))
         result_keys = [
-            NamedColumn(gk, k.name) for gk, k in zip(group_keys.columns(), keys)
+            NamedColumn(gk, k.name)
+            for gk, k in zip(group_keys.columns(), keys, strict=True)
         ]
         result_subs = DataFrame(raw_columns)
         results = [
@@ -752,7 +755,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             columns = plc.join.cross_join(left.table, right.table).columns()
             left_cols = [
                 NamedColumn(new, old.name).sorted_like(old)
-                for new, old in zip(columns[: left.num_columns], left.columns)
+                for new, old in zip(
+                    columns[: left.num_columns], left.columns, strict=True
+                )
             ]
             right_cols = [
                 NamedColumn(
@@ -761,7 +766,9 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                     if old.name not in left.column_names_set
                     else f"{old.name}{suffix}",
                 )
-                for new, old in zip(columns[left.num_columns :], right.columns)
+                for new, old in zip(
+                    columns[left.num_columns :], right.columns, strict=True
+                )
             ]
             return DataFrame([*left_cols, *right_cols])
         # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184
@@ -803,6 +810,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                         for left_col, right_col in zip(
                             left.select_columns(left_on.column_names_set),
                             right.select_columns(right_on.column_names_set),
+                            strict=True,
                         )
                     )
                 )
@@ -909,7 +917,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         result = DataFrame(
             [
                 NamedColumn(c, old.name).sorted_like(old)
-                for c, old in zip(table.columns(), df.columns)
+                for c, old in zip(table.columns(), df.columns, strict=True)
             ]
         )
         if keys_sorted or self.stable:
@@ -974,7 +982,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             self.null_order,
         )
         columns = [
-            NamedColumn(c, old.name) for c, old in zip(table.columns(), df.columns)
+            NamedColumn(c, old.name)
+            for c, old in zip(table.columns(), df.columns, strict=True)
         ]
         # If a sort key is in the result table, set the sortedness property
         for k, i in enumerate(keys_in_result):
@@ -1089,7 +1098,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             # final tag is "swapping" which is useful for the
             # optimiser (it blocks some pushdown operations)
             old, new, _ = self.options
-            return df.rename_columns(dict(zip(old, new)))
+            return df.rename_columns(dict(zip(old, new, strict=True)))
         elif self.name == "explode":
             df = self.df.evaluate(cache=cache)
             ((to_explode,),) = self.options
diff --git a/python/cudf_polars/cudf_polars/typing/__init__.py b/python/cudf_polars/cudf_polars/typing/__init__.py
index 02440e67fde..5276073e62a 100644
--- a/python/cudf_polars/cudf_polars/typing/__init__.py
+++ b/python/cudf_polars/cudf_polars/typing/__init__.py
@@ -13,9 +13,7 @@
 from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir
 
 if TYPE_CHECKING:
-    from typing import Callable
-
-    from typing_extensions import TypeAlias
+    from typing import Callable, TypeAlias
 
     import polars as pl
 
diff --git a/python/cudf_polars/cudf_polars/utils/sorting.py b/python/cudf_polars/cudf_polars/utils/sorting.py
index 17ea44e5b1b..6ce216cbf8f 100644
--- a/python/cudf_polars/cudf_polars/utils/sorting.py
+++ b/python/cudf_polars/cudf_polars/utils/sorting.py
@@ -45,7 +45,7 @@ def sort_order(
     null_precedence = []
     if len(descending) != len(nulls_last) or len(descending) != num_keys:
         raise ValueError("Mismatching length of arguments in sort_order")
-    for asc, null_last in zip(column_order, nulls_last):
+    for asc, null_last in zip(column_order, nulls_last, strict=True):
         if (asc == plc.types.Order.ASCENDING) ^ (not null_last):
             null_precedence.append(plc.types.NullOrder.AFTER)
         elif (asc == plc.types.Order.ASCENDING) ^ null_last:
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index 5397d51ca56..0382e3ce6a2 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -114,6 +114,9 @@ ignore = [
   "TD003", # Missing issue link on the line following this TODO
   # tryceratops
   "TRY003", # Avoid specifying long messages outside the exception class
+  # pyupgrade
+  "UP035",  # Import from `collections.abc` instead: `Callable`
+  "UP038",  # Use `X | Y` in `isinstance` call instead of `(X, Y)`
   # Lints below are turned off because of conflicts with the ruff
   # formatter
   # See https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
@@ -136,6 +139,10 @@ fixable = ["ALL"]
 
 [tool.ruff.lint.per-file-ignores]
 "**/tests/**/*.py" = ["D"]
+"**/cudf_polars/typing/__init__.py" = [
+  # pyupgrade
+  "UP007", # Use `X | Y` for type annotations
+]
 
 [tool.ruff.lint.flake8-pytest-style]
 # https://docs.astral.sh/ruff/settings/#lintflake8-pytest-style

From ec74d27eab45704937af42c0428b0b7c4bd32dd7 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 22 Aug 2024 12:17:14 -0500
Subject: [PATCH 3/3] Apply suggestions from code review

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 python/cudf_polars/cudf_polars/containers/dataframe.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index 586f5213625..a5c99e2bc11 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -136,7 +136,6 @@ def from_table(cls, table: plc.Table, names: Sequence[str]) -> Self:
         if table.num_columns() != len(names):
             raise ValueError("Mismatching name and table length.")
         return cls(
-            # TODO: strict=True when we drop py39
             [
                 NamedColumn(c, name)
                 for c, name in zip(table.columns(), names, strict=True)
@@ -170,7 +169,6 @@ def sorted_like(
         subset = self.column_names_set if subset is None else subset
         self.columns = [
             c.sorted_like(other) if c.name in subset else c
-            # TODO: strict=True when we drop py39
             for c, other in zip(self.columns, like.columns, strict=True)
         ]
         return self