From f1d6570e565ad6b16f1620b2455fe302f15861db Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Tue, 7 May 2024 17:18:03 -0700 Subject: [PATCH] [CHORE] Drop Python 3.7 (#2250) * Drop support for Python 3.7 which has been EOL for about a year now. --- .github/workflows/notebook-checker.yml | 2 +- .github/workflows/property-based-tests.yml | 2 +- .github/workflows/python-package.yml | 10 +++++----- .ruff.toml | 4 ++-- Cargo.toml | 2 +- daft/api_annotations.py | 7 +------ daft/dataframe/dataframe.py | 5 ++++- daft/execution/execution_step.py | 8 +------- daft/expressions/expressions.py | 8 +------- daft/filesystem.py | 10 ++-------- daft/logical/schema.py | 7 ------- daft/runners/partitioning.py | 6 ------ daft/series.py | 8 +------- daft/udf_library/url_udfs.py | 7 +------ pyproject.toml | 5 ++--- tests/expressions/typing/conftest.py | 9 +-------- tests/integration/iceberg/conftest.py | 5 +++++ tests/io/iceberg/test_iceberg_writes.py | 4 ++++ tests/io/lancedb/test_lancedb_reads.py | 11 ++++------- 19 files changed, 37 insertions(+), 83 deletions(-) diff --git a/.github/workflows/notebook-checker.yml b/.github/workflows/notebook-checker.yml index 4c2624d103..852e3c7c89 100644 --- a/.github/workflows/notebook-checker.yml +++ b/.github/workflows/notebook-checker.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.7'] + python-version: ['3.8'] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/property-based-tests.yml b/.github/workflows/property-based-tests.yml index 468d9f2a3b..ebbaecd04b 100644 --- a/.github/workflows/property-based-tests.yml +++ b/.github/workflows/property-based-tests.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.7'] + python-version: ['3.8'] daft_runner: [py] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6c2244945d..dd7292abb6 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.7', '3.10'] + python-version: ['3.8', '3.10'] daft-runner: [py, ray] pyarrow-version: [7.0.0, 12.0] os: [ubuntu, windows] @@ -31,7 +31,7 @@ jobs: pyarrow-version: 7.0.0 os: ubuntu - os: windows - python-version: '3.7' + python-version: '3.8' - os: windows pyarrow-version: 7.0.0 steps: @@ -146,7 +146,7 @@ jobs: package-name: getdaft strategy: matrix: - python-version: ['3.7'] + python-version: ['3.8'] steps: - uses: actions/checkout@v4 with: @@ -184,7 +184,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.7'] + python-version: ['3.8'] daft-runner: [py, ray] steps: - uses: actions/checkout@v4 @@ -656,7 +656,7 @@ jobs: fail-fast: false matrix: os: [ubuntu, windows] - python-version: ['3.7'] + python-version: ['3.8'] steps: - uses: actions/checkout@v4 - uses: moonrepo/setup-rust@v1 diff --git a/.ruff.toml b/.ruff.toml index 8e37a84b9a..d0ea1cb625 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -1,7 +1,7 @@ fix = true indent-width = 4 line-length = 120 -target-version = "py37" +target-version = "py38" [format] # Like Black, indent with spaces, rather than tabs. @@ -14,7 +14,7 @@ quote-style = "double" skip-magic-trailing-comma = false [lint] -exclude = ["daft/pickle/cloudpickle.py", "daft/pickle/cloudpickle_fast.py"] +exclude = ["daft/pickle/cloudpickle.py", "daft/pickle/cloudpickle_fast.py", "daft/pickle/compat.py"] extend-select = [ "UP", # pyupgrade "LOG", # flake8-logging diff --git a/Cargo.toml b/Cargo.toml index 3b7a28da77..d9623c1db0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,7 +147,7 @@ package = "parquet2" rev = "d4c24086c4cc824fbabef093ab2fda95d3aacb78" [workspace.dependencies.pyo3] -features = ["extension-module", "multiple-pymethods", "abi3-py37"] +features = ["extension-module", "multiple-pymethods", "abi3-py38"] version = "0.19.2" [workspace.dependencies.pyo3-log] diff --git a/daft/api_annotations.py b/daft/api_annotations.py index 701486c668..351fcbd31d 100644 --- a/daft/api_annotations.py +++ b/daft/api_annotations.py @@ -3,12 +3,7 @@ import functools import inspect import sys -from typing import Any, Callable, ForwardRef, TypeVar, Union - -if sys.version_info < (3, 8): - from typing_extensions import get_args, get_origin -else: - from typing import get_args, get_origin +from typing import Any, Callable, ForwardRef, TypeVar, Union, get_args, get_origin if sys.version_info < (3, 10): from typing_extensions import ParamSpec diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index f2b13a7413..0e98f61a9e 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -461,12 +461,16 @@ def write_iceberg(self, table: "IcebergTable", mode: str = "append") -> "DataFra if len(table.spec().fields) > 0: raise ValueError("Cannot write to partitioned Iceberg tables") + import pyarrow as pa import pyiceberg from packaging.version import parse if parse(pyiceberg.__version__) < parse("0.6.0"): raise ValueError(f"Write Iceberg is only supported on pyiceberg>=0.6.0, found {pyiceberg.__version__}") + if parse(pa.__version__) < parse("8.0.0"): + raise ValueError(f"Write Iceberg is only supported on pyarrow>=8.0.0, found {pa.__version__}") + from pyiceberg.table import _MergingSnapshotProducer from pyiceberg.table.snapshots import Operation @@ -514,7 +518,6 @@ def write_iceberg(self, table: "IcebergTable", mode: str = "append") -> "DataFra size.append(data_file.file_size_in_bytes) merge.commit() - import pyarrow as pa from daft import from_pydict diff --git a/daft/execution/execution_step.py b/daft/execution/execution_step.py index 7d297ef80c..cbc7056f94 100644 --- a/daft/execution/execution_step.py +++ b/daft/execution/execution_step.py @@ -2,14 +2,8 @@ import itertools import pathlib -import sys from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Generic - -if sys.version_info < (3, 8): - from typing_extensions import Protocol -else: - from typing import Protocol +from typing import TYPE_CHECKING, Generic, Protocol from daft.daft import FileFormat, IOConfig, JoinType, ResourceRequest, ScanTask from daft.expressions import Expression, ExpressionsProjection, col diff --git a/daft/expressions/expressions.py b/daft/expressions/expressions.py index 765b95308e..54936e9785 100644 --- a/daft/expressions/expressions.py +++ b/daft/expressions/expressions.py @@ -2,10 +2,9 @@ import builtins import os -import sys from datetime import date, datetime, time from decimal import Decimal -from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, TypeVar, overload +from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal, TypeVar, overload import pyarrow as pa @@ -25,11 +24,6 @@ from daft.logical.schema import Field, Schema from daft.series import Series, item_to_series -if sys.version_info < (3, 8): - from typing_extensions import Literal -else: - from typing import Literal - if TYPE_CHECKING: from daft.io import IOConfig diff --git a/daft/filesystem.py b/daft/filesystem.py index 0c2ef86345..792f70e844 100644 --- a/daft/filesystem.py +++ b/daft/filesystem.py @@ -1,17 +1,11 @@ from __future__ import annotations import dataclasses +import logging import pathlib import sys import urllib.parse - -if sys.version_info < (3, 8): - from typing_extensions import Literal -else: - from typing import Literal - -import logging -from typing import Any +from typing import Any, Literal import fsspec from fsspec.registry import get_filesystem_class diff --git a/daft/logical/schema.py b/daft/logical/schema.py index b9334acce8..0009e51c85 100644 --- a/daft/logical/schema.py +++ b/daft/logical/schema.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from typing import TYPE_CHECKING, Iterator from daft.daft import CsvParseOptions, JsonParseOptions @@ -14,12 +13,6 @@ if TYPE_CHECKING: import pyarrow as pa -if sys.version_info < (3, 8): - pass -else: - pass - -if TYPE_CHECKING: from daft.io import IOConfig diff --git a/daft/runners/partitioning.py b/daft/runners/partitioning.py index 986e562779..186ffdd5af 100644 --- a/daft/runners/partitioning.py +++ b/daft/runners/partitioning.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys import threading import weakref from abc import abstractmethod @@ -15,11 +14,6 @@ from daft.logical.schema import Schema from daft.table import MicroPartition -if sys.version_info < (3, 8): - pass -else: - pass - if TYPE_CHECKING: import pandas as pd diff --git a/daft/series.py b/daft/series.py index 596a0d3648..f7b2f07aa8 100644 --- a/daft/series.py +++ b/daft/series.py @@ -1,12 +1,6 @@ from __future__ import annotations -import sys -from typing import Any, TypeVar - -if sys.version_info < (3, 8): - from typing_extensions import Literal -else: - from typing import Literal +from typing import Any, Literal, TypeVar import pyarrow as pa diff --git a/daft/udf_library/url_udfs.py b/daft/udf_library/url_udfs.py index 568902650e..2a87cdbc59 100644 --- a/daft/udf_library/url_udfs.py +++ b/daft/udf_library/url_udfs.py @@ -1,20 +1,15 @@ from __future__ import annotations import logging -import sys import threading from concurrent.futures import ThreadPoolExecutor, as_completed +from typing import Literal from daft import filesystem from daft.datatype import DataType from daft.series import Series from daft.udf import udf -if sys.version_info < (3, 8): - from typing_extensions import Literal -else: - from typing import Literal - thread_local = threading.local() diff --git a/pyproject.toml b/pyproject.toml index 6fe2bd8cc7..78f921cb7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,8 +8,7 @@ dependencies = [ "pyarrow >= 7.0.0", "fsspec", "tqdm", - "typing-extensions >= 4.0.0; python_version < '3.10'", - "pickle5 >= 0.0.12; python_version < '3.8'" + "typing-extensions >= 4.0.0; python_version < '3.10'" ] description = "Distributed Dataframes for Multimodal Data" dynamic = ["version"] @@ -20,7 +19,7 @@ maintainers = [ ] name = "getdaft" readme = "README.rst" -requires-python = ">=3.7" +requires-python = ">=3.8" [project.optional-dependencies] all = ["getdaft[aws, azure, gcp, ray, pandas, numpy, iceberg, deltalake, sql]"] diff --git a/tests/expressions/typing/conftest.py b/tests/expressions/typing/conftest.py index 3e36f68559..686a559da2 100644 --- a/tests/expressions/typing/conftest.py +++ b/tests/expressions/typing/conftest.py @@ -2,18 +2,11 @@ import datetime import itertools -import sys - -import pytz - -if sys.version_info < (3, 8): - pass -else: - pass from typing import Callable import pyarrow as pa import pytest +import pytz from daft.datatype import DataType from daft.expressions import Expression, ExpressionsProjection diff --git a/tests/integration/iceberg/conftest.py b/tests/integration/iceberg/conftest.py index c9a0cf00b9..690b9c199d 100644 --- a/tests/integration/iceberg/conftest.py +++ b/tests/integration/iceberg/conftest.py @@ -2,10 +2,15 @@ from typing import Generator, TypeVar +import pyarrow as pa import pytest pyiceberg = pytest.importorskip("pyiceberg") +PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0) +pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="iceberg writes only supported if pyarrow >= 8.0.0") + + import tenacity from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.table import Table diff --git a/tests/io/iceberg/test_iceberg_writes.py b/tests/io/iceberg/test_iceberg_writes.py index a731be4119..35c2eaffa2 100644 --- a/tests/io/iceberg/test_iceberg_writes.py +++ b/tests/io/iceberg/test_iceberg_writes.py @@ -5,6 +5,10 @@ pyiceberg = pytest.importorskip("pyiceberg") +PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0) +pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="iceberg only supported if pyarrow >= 8.0.0") + + from pyiceberg.catalog.sql import SqlCatalog import daft diff --git a/tests/io/lancedb/test_lancedb_reads.py b/tests/io/lancedb/test_lancedb_reads.py index 6875b1f88b..ad3062ee19 100644 --- a/tests/io/lancedb/test_lancedb_reads.py +++ b/tests/io/lancedb/test_lancedb_reads.py @@ -1,15 +1,9 @@ -import sys - +import lance import pyarrow as pa import pytest import daft -if sys.version_info[:2] < (3, 8): - pytest.skip(allow_module_level=True, reason="LanceDB does not support Python 3.7 and below") -else: - import lance - TABLE_NAME = "my_table" data = { "vector": [[1.1, 1.2], [0.2, 1.8]], @@ -17,6 +11,9 @@ "long": [-122.7, -74.1], } +PYARROW_LE_8_0_0 = tuple(int(s) for s in pa.__version__.split(".") if s.isnumeric()) < (8, 0, 0) +pytestmark = pytest.mark.skipif(PYARROW_LE_8_0_0, reason="lance only supported if pyarrow >= 8.0.0") + @pytest.fixture(scope="function") def lance_dataset_path(tmp_path_factory):