From 3261b26d701cdd171031ec94617e738e4b1f0399 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Mon, 6 May 2024 18:03:59 +0200 Subject: [PATCH] chore: basic structure for implementation of tabular containers with polars (#731) Closes partially #712 ### Summary of Changes Add (mostly empty) classes for a new implementation of the tabular containers with `polars`. --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> --- .coveragerc | 5 - .github/linters/.ruff.toml | 114 --- benchmarks/__init__.py | 0 benchmarks/table/__init__.py | 0 benchmarks/table/row_operations.py | 130 ++++ benchmarks/table/row_operations_polars.py | 130 ++++ benchmarks/table/utils/__init__.py | 7 + .../table/utils/create_synthetic_table.py | 36 + .../utils/create_synthetic_table_polars.py | 36 + poetry.lock | 246 ++++-- pyproject.toml | 129 +++- src/resources/to_excel_file.xlsx | Bin 4967 -> 4968 bytes src/safeds/_utils/__init__.py | 3 + src/safeds/_utils/_file_io.py | 51 ++ src/safeds/_utils/_hashing.py | 6 +- src/safeds/_utils/_random.py | 10 + .../data/tabular/containers/__init__.py | 12 + .../containers/_experimental_polars_cell.py | 10 + .../containers/_experimental_polars_column.py | 12 + .../containers/_experimental_polars_row.py | 7 + .../containers/_experimental_polars_table.py | 723 ++++++++++++++++++ src/safeds/exceptions/_data.py | 2 +- 22 files changed, 1461 insertions(+), 208 deletions(-) delete mode 100644 .coveragerc delete mode 100644 .github/linters/.ruff.toml create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/table/__init__.py create mode 100644 benchmarks/table/row_operations.py create mode 100644 benchmarks/table/row_operations_polars.py create mode 100644 benchmarks/table/utils/__init__.py create mode 100644 benchmarks/table/utils/create_synthetic_table.py create mode 100644 benchmarks/table/utils/create_synthetic_table_polars.py create mode 100644 src/safeds/_utils/_file_io.py create mode 100644 src/safeds/_utils/_random.py create mode 100644 src/safeds/data/tabular/containers/_experimental_polars_cell.py create mode 100644 src/safeds/data/tabular/containers/_experimental_polars_column.py create mode 100644 src/safeds/data/tabular/containers/_experimental_polars_row.py create mode 100644 src/safeds/data/tabular/containers/_experimental_polars_table.py diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index f4afc3ce5..000000000 --- a/.coveragerc +++ /dev/null @@ -1,5 +0,0 @@ -[report] -exclude_lines = - pragma: no cover - if\s+(typing\.)?TYPE_CHECKING: - \.\.\. diff --git a/.github/linters/.ruff.toml b/.github/linters/.ruff.toml deleted file mode 100644 index ab5f93f72..000000000 --- a/.github/linters/.ruff.toml +++ /dev/null @@ -1,114 +0,0 @@ -line-length = 120 -target-version = "py311" - -[lint] -ignore-init-module-imports = true - -select = [ - "F", - "E", - "W", - "I", - "N", - "D", - "UP", - "YTT", - "BLE", - "FBT", - "B", - "A", - "COM", - "C4", - "DTZ", - "T10", - "ISC", - "ICN", - "G", - "INP", - "PIE", - "T20", - "PYI", - "PT", - "Q", - "RSE", - "RET", - "SLF", - "SIM", - "TID", - "TCH", - "INT", - "ARG", - "PTH", - "PD", - "PGH", - "PL", - "TRY", - "NPY", - "RUF" -] -ignore = [ - # line-too-long (handled by black) - "E501", - # tab-indentation (handled by black) - "W191", - # trailing-whitespace (handled by black) - "W291", - # missing-newline-at-end-of-file (handled by black) - "W292", - # blank-line-with-witespace (handled by black) - "W293", - # boolean-positional-arg-in-function-definition (we leave it to the call-site) - "FBT001", - # boolean-default-value-in-function-definition (we leave it to the call-site) - "FBT002", - # builtin-attribute-shadowing (not an issue) - "A003", - # implicit-return (can add a return even though all cases are covered) - "RET503", - # superfluous-else-return (sometimes it's more readable) - "RET505", - # superfluous-else-raise (sometimes it's more readable) - "RET506", - # superfluous-else-continue (sometimes it's more readable) - "RET507", - # superfluous-else-break (sometimes it's more readable) - "RET508", - # private-member-access (we cannot always avoid it if we want a clean API) - "SLF001", - # if-else-block-instead-of-if-exp (an if-else block can be more readable) - "SIM108", - # compare-to-empty-string (sometimes it's better to be explicit) - "PLC1901", - # too-many-return-statements - "PLR0911", - # too-many-branches - "PLR0912", - # too-many-arguments - "PLR0913", - # too-many-statements - "PLR0915", - # magic-value-comparison - "PLR2004", - # raise-vanilla-args - "TRY003", -] - -[lint.per-file-ignores] -"*test*.py" = [ - # Undocumented declarations - "D100", - "D101", - "D102", - "D103", - "D104", - "D105", - "D106", - "D107", -] -"__init__.py" = [ - # runtime-import-in-type-checking-block: Does not work with apipkg. - "TCH004", -] - -[lint.pydocstyle] -convention = "numpy" diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/table/__init__.py b/benchmarks/table/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/table/row_operations.py b/benchmarks/table/row_operations.py new file mode 100644 index 000000000..d81c8fbe9 --- /dev/null +++ b/benchmarks/table/row_operations.py @@ -0,0 +1,130 @@ +from timeit import timeit + +from safeds.data.tabular.containers import Table + +from benchmarks.table.utils import create_synthetic_table + +REPETITIONS = 10 + + +def _run_add_rows() -> None: + table.add_rows(table) + + +def _run_get_row() -> None: + table.get_row(0) + + +def _run_group_rows() -> None: + table.group_rows(lambda row: row.get_value("column_0") % 2 == 0) + + +def _run_keep_only_rows() -> None: + table.keep_only_rows(lambda row: row.get_value("column_0") % 2 == 0) + + +def _run_remove_duplicate_rows() -> None: + table.remove_duplicate_rows() + + +def _run_remove_rows_with_missing_values() -> None: + table.remove_rows_with_missing_values() + + +def _run_remove_rows_with_outliers() -> None: + table.remove_rows_with_outliers() + + +def _run_remove_rows() -> None: + table.remove_rows(lambda row: row.get_value("column_0") % 2 == 0) + + +def _run_shuffle_rows() -> None: + table.shuffle_rows() + + +def _run_slice_rows() -> None: + table.slice_rows(end=table.number_of_rows // 2) + + +def _run_sort_rows() -> None: + table.sort_rows(lambda row1, row2: row1.get_value("column_0") - row2.get_value("column_0")) + + +def _run_split_rows() -> None: + table.split_rows(0.5) + + +def _run_to_rows() -> None: + table.to_rows() + + +if __name__ == "__main__": + # Create a synthetic Table + table = create_synthetic_table(1000, 50) + + # Run the benchmarks + timings: dict[str, float] = { + "add_rows": timeit( + _run_add_rows, + number=REPETITIONS, + ), + "get_row": timeit( + _run_get_row, + number=REPETITIONS, + ), + "group_rows": timeit( + _run_group_rows, + number=REPETITIONS, + ), + "keep_only_rows": timeit( + _run_keep_only_rows, + number=REPETITIONS, + ), + "remove_duplicate_rows": timeit( + _run_remove_duplicate_rows, + number=REPETITIONS, + ), + "remove_rows_with_missing_values": timeit( + _run_remove_rows_with_missing_values, + number=REPETITIONS, + ), + "remove_rows_with_outliers": timeit( + _run_remove_rows_with_outliers, + number=REPETITIONS, + ), + "remove_rows": timeit( + _run_remove_rows, + number=REPETITIONS, + ), + "shuffle_rows": timeit( + _run_shuffle_rows, + number=REPETITIONS, + ), + "slice_rows": timeit( + _run_slice_rows, + number=REPETITIONS, + ), + "sort_rows": timeit( + _run_sort_rows, + number=REPETITIONS, + ), + "split_rows": timeit( + _run_split_rows, + number=REPETITIONS, + ), + "to_rows": timeit( + _run_to_rows, + number=REPETITIONS, + ), + } + + # Print the timings + print( + Table( + { # noqa: T201 + "method": list(timings.keys()), + "timing": list(timings.values()), + } + ) + ) diff --git a/benchmarks/table/row_operations_polars.py b/benchmarks/table/row_operations_polars.py new file mode 100644 index 000000000..5c2e63cf1 --- /dev/null +++ b/benchmarks/table/row_operations_polars.py @@ -0,0 +1,130 @@ +from timeit import timeit + +from safeds.data.tabular.containers import Table + +from benchmarks.table.utils import create_synthetic_table_polars + +REPETITIONS = 10 + + +# def _run_add_rows() -> None: +# table.add_rows(table) +# +# +# def _run_get_row() -> None: +# table.get_row(0) +# +# +# def _run_group_rows() -> None: +# table.group_rows(lambda row: row.get_value("column_0") % 2 == 0) +# +# +# def _run_keep_only_rows() -> None: +# table.keep_only_rows(lambda row: row.get_value("column_0") % 2 == 0) + + +def _run_remove_duplicate_rows() -> None: + table.remove_duplicate_rows()._lazy_frame.collect() + + +def _run_remove_rows_with_missing_values() -> None: + table.remove_rows_with_missing_values()._lazy_frame.collect() + + +# def _run_remove_rows_with_outliers() -> None: +# table.remove_rows_with_outliers() +# +# +# def _run_remove_rows() -> None: +# table.remove_rows(lambda row: row.get_value("column_0") % 2 == 0) +# +# +# def _run_shuffle_rows() -> None: +# table.shuffle_rows() +# +# +# def _run_slice_rows() -> None: +# table.slice_rows(end=table.number_of_rows // 2) +# +# +# def _run_sort_rows() -> None: +# table.sort_rows(lambda row1, row2: row1.get_value("column_0") - row2.get_value("column_0")) +# +# +# def _run_split_rows() -> None: +# table.split_rows(0.5) +# +# +# def _run_to_rows() -> None: +# table.to_rows() + + +if __name__ == "__main__": + # Create a synthetic Table + table = create_synthetic_table_polars(1000, 50) + + # Run the benchmarks + timings: dict[str, float] = { + # "add_rows": timeit( + # _run_add_rows, + # number=REPETITIONS, + # ), + # "get_row": timeit( + # _run_get_row, + # number=REPETITIONS, + # ), + # "group_rows": timeit( + # _run_group_rows, + # number=REPETITIONS, + # ), + # "keep_only_rows": timeit( + # _run_keep_only_rows, + # number=REPETITIONS, + # ), + "remove_duplicate_rows": timeit( + _run_remove_duplicate_rows, + number=REPETITIONS, + ), + "remove_rows_with_missing_values": timeit( + _run_remove_rows_with_missing_values, + number=REPETITIONS, + ), + # "remove_rows_with_outliers": timeit( + # _run_remove_rows_with_outliers, + # number=REPETITIONS, + # ), + # "remove_rows": timeit( + # _run_remove_rows, + # number=REPETITIONS, + # ), + # "shuffle_rows": timeit( + # _run_shuffle_rows, + # number=REPETITIONS, + # ), + # "slice_rows": timeit( + # _run_slice_rows, + # number=REPETITIONS, + # ), + # "sort_rows": timeit( + # _run_sort_rows, + # number=REPETITIONS, + # ), + # "split_rows": timeit( + # _run_split_rows, + # number=REPETITIONS, + # ), + # "to_rows": timeit( + # _run_to_rows, + # number=REPETITIONS, + # ), + } + + # Print the timings + print( + Table( + { # noqa: T201 + "method": list(timings.keys()), + "timing": list(timings.values()), + } + ) + ) diff --git a/benchmarks/table/utils/__init__.py b/benchmarks/table/utils/__init__.py new file mode 100644 index 000000000..bb2b4ee56 --- /dev/null +++ b/benchmarks/table/utils/__init__.py @@ -0,0 +1,7 @@ +from .create_synthetic_table import create_synthetic_table +from .create_synthetic_table_polars import create_synthetic_table_polars + +__all__ = [ + "create_synthetic_table", + "create_synthetic_table_polars", +] diff --git a/benchmarks/table/utils/create_synthetic_table.py b/benchmarks/table/utils/create_synthetic_table.py new file mode 100644 index 000000000..d1ad47d6e --- /dev/null +++ b/benchmarks/table/utils/create_synthetic_table.py @@ -0,0 +1,36 @@ +from random import randrange + +from safeds.data.tabular.containers import Table + + +def create_synthetic_table( + number_of_rows: int, + number_of_columns: int, + *, + min_value: int = 0, + max_value: int = 1000, +) -> Table: + """Create a synthetic Table with random numerical data. + + Parameters + ---------- + number_of_rows: + Number of rows in the Table. + number_of_columns: + Number of columns in the Table. + min_value: + Minimum value of the random data. + max_value: + Maximum value of the random data. + + Returns + ------- + Table + A Table with random numerical data. + """ + return Table( + { + f"column_{i}": [randrange(min_value, max_value) for _ in range(number_of_rows)] + for i in range(number_of_columns) + } + ) diff --git a/benchmarks/table/utils/create_synthetic_table_polars.py b/benchmarks/table/utils/create_synthetic_table_polars.py new file mode 100644 index 000000000..501079edc --- /dev/null +++ b/benchmarks/table/utils/create_synthetic_table_polars.py @@ -0,0 +1,36 @@ +from random import randrange + +from safeds.data.tabular.containers import ExperimentalPolarsTable + + +def create_synthetic_table_polars( + number_of_rows: int, + number_of_columns: int, + *, + min_value: int = 0, + max_value: int = 1000, +) -> ExperimentalPolarsTable: + """Create a synthetic Table with random numerical data. + + Parameters + ---------- + number_of_rows: + Number of rows in the Table. + number_of_columns: + Number of columns in the Table. + min_value: + Minimum value of the random data. + max_value: + Maximum value of the random data. + + Returns + ------- + Table + A Table with random numerical data. + """ + return ExperimentalPolarsTable( + { + f"column_{i}": [randrange(min_value, max_value) for _ in range(number_of_rows)] + for i in range(number_of_columns) + } + ) diff --git a/poetry.lock b/poetry.lock index 2fced980b..fd787d4f3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -61,13 +61,13 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p [[package]] name = "babel" -version = "2.14.0" +version = "2.15.0" description = "Internationalization utilities" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "Babel-2.14.0-py3-none-any.whl", hash = "sha256:efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287"}, - {file = "Babel-2.14.0.tar.gz", hash = "sha256:6919867db036398ba21eb5c7a0f6b28ab8cbc3ae7a73a44ebe34ae74a4e7d363"}, + {file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"}, + {file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"}, ] [package.extras] @@ -393,63 +393,63 @@ test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" -version = "7.5.0" +version = "7.5.1" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:432949a32c3e3f820af808db1833d6d1631664d53dd3ce487aa25d574e18ad1c"}, - {file = "coverage-7.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2bd7065249703cbeb6d4ce679c734bef0ee69baa7bff9724361ada04a15b7e3b"}, - {file = "coverage-7.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbfe6389c5522b99768a93d89aca52ef92310a96b99782973b9d11e80511f932"}, - {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39793731182c4be939b4be0cdecde074b833f6171313cf53481f869937129ed3"}, - {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85a5dbe1ba1bf38d6c63b6d2c42132d45cbee6d9f0c51b52c59aa4afba057517"}, - {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:357754dcdfd811462a725e7501a9b4556388e8ecf66e79df6f4b988fa3d0b39a"}, - {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a81eb64feded34f40c8986869a2f764f0fe2db58c0530d3a4afbcde50f314880"}, - {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:51431d0abbed3a868e967f8257c5faf283d41ec882f58413cf295a389bb22e58"}, - {file = "coverage-7.5.0-cp310-cp310-win32.whl", hash = "sha256:f609ebcb0242d84b7adeee2b06c11a2ddaec5464d21888b2c8255f5fd6a98ae4"}, - {file = "coverage-7.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:6782cd6216fab5a83216cc39f13ebe30adfac2fa72688c5a4d8d180cd52e8f6a"}, - {file = "coverage-7.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e768d870801f68c74c2b669fc909839660180c366501d4cc4b87efd6b0eee375"}, - {file = "coverage-7.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:84921b10aeb2dd453247fd10de22907984eaf80901b578a5cf0bb1e279a587cb"}, - {file = "coverage-7.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:710c62b6e35a9a766b99b15cdc56d5aeda0914edae8bb467e9c355f75d14ee95"}, - {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c379cdd3efc0658e652a14112d51a7668f6bfca7445c5a10dee7eabecabba19d"}, - {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fea9d3ca80bcf17edb2c08a4704259dadac196fe5e9274067e7a20511fad1743"}, - {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:41327143c5b1d715f5f98a397608f90ab9ebba606ae4e6f3389c2145410c52b1"}, - {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:565b2e82d0968c977e0b0f7cbf25fd06d78d4856289abc79694c8edcce6eb2de"}, - {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cf3539007202ebfe03923128fedfdd245db5860a36810136ad95a564a2fdffff"}, - {file = "coverage-7.5.0-cp311-cp311-win32.whl", hash = "sha256:bf0b4b8d9caa8d64df838e0f8dcf68fb570c5733b726d1494b87f3da85db3a2d"}, - {file = "coverage-7.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c6384cc90e37cfb60435bbbe0488444e54b98700f727f16f64d8bfda0b84656"}, - {file = "coverage-7.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fed7a72d54bd52f4aeb6c6e951f363903bd7d70bc1cad64dd1f087980d309ab9"}, - {file = "coverage-7.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cbe6581fcff7c8e262eb574244f81f5faaea539e712a058e6707a9d272fe5b64"}, - {file = "coverage-7.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad97ec0da94b378e593ef532b980c15e377df9b9608c7c6da3506953182398af"}, - {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd4bacd62aa2f1a1627352fe68885d6ee694bdaebb16038b6e680f2924a9b2cc"}, - {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf032b6c105881f9d77fa17d9eebe0ad1f9bfb2ad25777811f97c5362aa07f2"}, - {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ba01d9ba112b55bfa4b24808ec431197bb34f09f66f7cb4fd0258ff9d3711b1"}, - {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f0bfe42523893c188e9616d853c47685e1c575fe25f737adf473d0405dcfa7eb"}, - {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a9a7ef30a1b02547c1b23fa9a5564f03c9982fc71eb2ecb7f98c96d7a0db5cf2"}, - {file = "coverage-7.5.0-cp312-cp312-win32.whl", hash = "sha256:3c2b77f295edb9fcdb6a250f83e6481c679335ca7e6e4a955e4290350f2d22a4"}, - {file = "coverage-7.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:427e1e627b0963ac02d7c8730ca6d935df10280d230508c0ba059505e9233475"}, - {file = "coverage-7.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9dd88fce54abbdbf4c42fb1fea0e498973d07816f24c0e27a1ecaf91883ce69e"}, - {file = "coverage-7.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a898c11dca8f8c97b467138004a30133974aacd572818c383596f8d5b2eb04a9"}, - {file = "coverage-7.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07dfdd492d645eea1bd70fb1d6febdcf47db178b0d99161d8e4eed18e7f62fe7"}, - {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3d117890b6eee85887b1eed41eefe2e598ad6e40523d9f94c4c4b213258e4a4"}, - {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6afd2e84e7da40fe23ca588379f815fb6dbbb1b757c883935ed11647205111cb"}, - {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a9960dd1891b2ddf13a7fe45339cd59ecee3abb6b8326d8b932d0c5da208104f"}, - {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ced268e82af993d7801a9db2dbc1d2322e786c5dc76295d8e89473d46c6b84d4"}, - {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7c211f25777746d468d76f11719e64acb40eed410d81c26cefac641975beb88"}, - {file = "coverage-7.5.0-cp38-cp38-win32.whl", hash = "sha256:262fffc1f6c1a26125d5d573e1ec379285a3723363f3bd9c83923c9593a2ac25"}, - {file = "coverage-7.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:eed462b4541c540d63ab57b3fc69e7d8c84d5957668854ee4e408b50e92ce26a"}, - {file = "coverage-7.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d0194d654e360b3e6cc9b774e83235bae6b9b2cac3be09040880bb0e8a88f4a1"}, - {file = "coverage-7.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33c020d3322662e74bc507fb11488773a96894aa82a622c35a5a28673c0c26f5"}, - {file = "coverage-7.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbdf2cae14a06827bec50bd58e49249452d211d9caddd8bd80e35b53cb04631"}, - {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3235d7c781232e525b0761730e052388a01548bd7f67d0067a253887c6e8df46"}, - {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2de4e546f0ec4b2787d625e0b16b78e99c3e21bc1722b4977c0dddf11ca84e"}, - {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0e206259b73af35c4ec1319fd04003776e11e859936658cb6ceffdeba0f5be"}, - {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2055c4fb9a6ff624253d432aa471a37202cd8f458c033d6d989be4499aed037b"}, - {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:075299460948cd12722a970c7eae43d25d37989da682997687b34ae6b87c0ef0"}, - {file = "coverage-7.5.0-cp39-cp39-win32.whl", hash = "sha256:280132aada3bc2f0fac939a5771db4fbb84f245cb35b94fae4994d4c1f80dae7"}, - {file = "coverage-7.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:c58536f6892559e030e6924896a44098bc1290663ea12532c78cef71d0df8493"}, - {file = "coverage-7.5.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:2b57780b51084d5223eee7b59f0d4911c31c16ee5aa12737c7a02455829ff067"}, - {file = "coverage-7.5.0.tar.gz", hash = "sha256:cf62d17310f34084c59c01e027259076479128d11e4661bb6c9acb38c5e19bb8"}, + {file = "coverage-7.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0884920835a033b78d1c73b6d3bbcda8161a900f38a488829a83982925f6c2e"}, + {file = "coverage-7.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:39afcd3d4339329c5f58de48a52f6e4e50f6578dd6099961cf22228feb25f38f"}, + {file = "coverage-7.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a7b0ceee8147444347da6a66be737c9d78f3353b0681715b668b72e79203e4a"}, + {file = "coverage-7.5.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a9ca3f2fae0088c3c71d743d85404cec8df9be818a005ea065495bedc33da35"}, + {file = "coverage-7.5.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd215c0c7d7aab005221608a3c2b46f58c0285a819565887ee0b718c052aa4e"}, + {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4bf0655ab60d754491004a5efd7f9cccefcc1081a74c9ef2da4735d6ee4a6223"}, + {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:61c4bf1ba021817de12b813338c9be9f0ad5b1e781b9b340a6d29fc13e7c1b5e"}, + {file = "coverage-7.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db66fc317a046556a96b453a58eced5024af4582a8dbdc0c23ca4dbc0d5b3146"}, + {file = "coverage-7.5.1-cp310-cp310-win32.whl", hash = "sha256:b016ea6b959d3b9556cb401c55a37547135a587db0115635a443b2ce8f1c7228"}, + {file = "coverage-7.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:df4e745a81c110e7446b1cc8131bf986157770fa405fe90e15e850aaf7619bc8"}, + {file = "coverage-7.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:796a79f63eca8814ca3317a1ea443645c9ff0d18b188de470ed7ccd45ae79428"}, + {file = "coverage-7.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fc84a37bfd98db31beae3c2748811a3fa72bf2007ff7902f68746d9757f3746"}, + {file = "coverage-7.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6175d1a0559986c6ee3f7fccfc4a90ecd12ba0a383dcc2da30c2b9918d67d8a3"}, + {file = "coverage-7.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fc81d5878cd6274ce971e0a3a18a8803c3fe25457165314271cf78e3aae3aa2"}, + {file = "coverage-7.5.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:556cf1a7cbc8028cb60e1ff0be806be2eded2daf8129b8811c63e2b9a6c43bca"}, + {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9981706d300c18d8b220995ad22627647be11a4276721c10911e0e9fa44c83e8"}, + {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d7fed867ee50edf1a0b4a11e8e5d0895150e572af1cd6d315d557758bfa9c057"}, + {file = "coverage-7.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef48e2707fb320c8f139424a596f5b69955a85b178f15af261bab871873bb987"}, + {file = "coverage-7.5.1-cp311-cp311-win32.whl", hash = "sha256:9314d5678dcc665330df5b69c1e726a0e49b27df0461c08ca12674bcc19ef136"}, + {file = "coverage-7.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fa567e99765fe98f4e7d7394ce623e794d7cabb170f2ca2ac5a4174437e90dd"}, + {file = "coverage-7.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b6cf3764c030e5338e7f61f95bd21147963cf6aa16e09d2f74f1fa52013c1206"}, + {file = "coverage-7.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ec92012fefebee89a6b9c79bc39051a6cb3891d562b9270ab10ecfdadbc0c34"}, + {file = "coverage-7.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16db7f26000a07efcf6aea00316f6ac57e7d9a96501e990a36f40c965ec7a95d"}, + {file = "coverage-7.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beccf7b8a10b09c4ae543582c1319c6df47d78fd732f854ac68d518ee1fb97fa"}, + {file = "coverage-7.5.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8748731ad392d736cc9ccac03c9845b13bb07d020a33423fa5b3a36521ac6e4e"}, + {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7352b9161b33fd0b643ccd1f21f3a3908daaddf414f1c6cb9d3a2fd618bf2572"}, + {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7a588d39e0925f6a2bff87154752481273cdb1736270642aeb3635cb9b4cad07"}, + {file = "coverage-7.5.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:68f962d9b72ce69ea8621f57551b2fa9c70509af757ee3b8105d4f51b92b41a7"}, + {file = "coverage-7.5.1-cp312-cp312-win32.whl", hash = "sha256:f152cbf5b88aaeb836127d920dd0f5e7edff5a66f10c079157306c4343d86c19"}, + {file = "coverage-7.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:5a5740d1fb60ddf268a3811bcd353de34eb56dc24e8f52a7f05ee513b2d4f596"}, + {file = "coverage-7.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e2213def81a50519d7cc56ed643c9e93e0247f5bbe0d1247d15fa520814a7cd7"}, + {file = "coverage-7.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5037f8fcc2a95b1f0e80585bd9d1ec31068a9bcb157d9750a172836e98bc7a90"}, + {file = "coverage-7.5.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3721c2c9e4c4953a41a26c14f4cef64330392a6d2d675c8b1db3b645e31f0e"}, + {file = "coverage-7.5.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca498687ca46a62ae590253fba634a1fe9836bc56f626852fb2720f334c9e4e5"}, + {file = "coverage-7.5.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cdcbc320b14c3e5877ee79e649677cb7d89ef588852e9583e6b24c2e5072661"}, + {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:57e0204b5b745594e5bc14b9b50006da722827f0b8c776949f1135677e88d0b8"}, + {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8fe7502616b67b234482c3ce276ff26f39ffe88adca2acf0261df4b8454668b4"}, + {file = "coverage-7.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9e78295f4144f9dacfed4f92935fbe1780021247c2fabf73a819b17f0ccfff8d"}, + {file = "coverage-7.5.1-cp38-cp38-win32.whl", hash = "sha256:1434e088b41594baa71188a17533083eabf5609e8e72f16ce8c186001e6b8c41"}, + {file = "coverage-7.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:0646599e9b139988b63704d704af8e8df7fa4cbc4a1f33df69d97f36cb0a38de"}, + {file = "coverage-7.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4cc37def103a2725bc672f84bd939a6fe4522310503207aae4d56351644682f1"}, + {file = "coverage-7.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fc0b4d8bfeabd25ea75e94632f5b6e047eef8adaed0c2161ada1e922e7f7cece"}, + {file = "coverage-7.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d0a0f5e06881ecedfe6f3dd2f56dcb057b6dbeb3327fd32d4b12854df36bf26"}, + {file = "coverage-7.5.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9735317685ba6ec7e3754798c8871c2f49aa5e687cc794a0b1d284b2389d1bd5"}, + {file = "coverage-7.5.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d21918e9ef11edf36764b93101e2ae8cc82aa5efdc7c5a4e9c6c35a48496d601"}, + {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c3e757949f268364b96ca894b4c342b41dc6f8f8b66c37878aacef5930db61be"}, + {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:79afb6197e2f7f60c4824dd4b2d4c2ec5801ceb6ba9ce5d2c3080e5660d51a4f"}, + {file = "coverage-7.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d1d0d98d95dd18fe29dc66808e1accf59f037d5716f86a501fc0256455219668"}, + {file = "coverage-7.5.1-cp39-cp39-win32.whl", hash = "sha256:1cc0fe9b0b3a8364093c53b0b4c0c2dd4bb23acbec4c9240b5f284095ccf7981"}, + {file = "coverage-7.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:dde0070c40ea8bb3641e811c1cfbf18e265d024deff6de52c5950677a8fb1e0f"}, + {file = "coverage-7.5.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:6537e7c10cc47c595828b8a8be04c72144725c383c4702703ff4e42e44577312"}, + {file = "coverage-7.5.1.tar.gz", hash = "sha256:54de9ef3a9da981f7af93eafde4ede199e0846cd819eb27c88e2b712aae9708c"}, ] [package.extras] @@ -836,13 +836,13 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] [[package]] name = "jinja2" -version = "3.1.3" +version = "3.1.4" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, - {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, + {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, + {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, ] [package.dependencies] @@ -952,13 +952,13 @@ files = [ [[package]] name = "jupytext" -version = "1.16.1" +version = "1.16.2" description = "Jupyter notebooks as Markdown documents, Julia, Python or R scripts" optional = false python-versions = ">=3.8" files = [ - {file = "jupytext-1.16.1-py3-none-any.whl", hash = "sha256:796ec4f68ada663569e5d38d4ef03738a01284bfe21c943c485bc36433898bd0"}, - {file = "jupytext-1.16.1.tar.gz", hash = "sha256:68c7b68685e870e80e60fda8286fbd6269e9c74dc1df4316df6fe46eabc94c99"}, + {file = "jupytext-1.16.2-py3-none-any.whl", hash = "sha256:197a43fef31dca612b68b311e01b8abd54441c7e637810b16b6cb8f2ab66065e"}, + {file = "jupytext-1.16.2.tar.gz", hash = "sha256:8627dd9becbbebd79cc4a4ed4727d89d78e606b4b464eab72357b3b029023a14"}, ] [package.dependencies] @@ -967,16 +967,15 @@ mdit-py-plugins = "*" nbformat = "*" packaging = "*" pyyaml = "*" -toml = "*" [package.extras] -dev = ["jupytext[test-cov,test-external]"] +dev = ["autopep8", "black", "flake8", "gitpython", "ipykernel", "isort", "jupyter-fs (<0.4.0)", "jupyter-server (!=2.11)", "nbconvert", "pre-commit", "pytest", "pytest-cov (>=2.6.1)", "pytest-randomly", "pytest-xdist", "sphinx-gallery (<0.8)"] docs = ["myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] test = ["pytest", "pytest-randomly", "pytest-xdist"] -test-cov = ["jupytext[test-integration]", "pytest-cov (>=2.6.1)"] -test-external = ["autopep8", "black", "flake8", "gitpython", "isort", "jupyter-fs (<0.4.0)", "jupytext[test-integration]", "pre-commit", "sphinx-gallery (<0.8)"] -test-functional = ["jupytext[test]"] -test-integration = ["ipykernel", "jupyter-server (!=2.11)", "jupytext[test-functional]", "nbconvert"] +test-cov = ["ipykernel", "jupyter-server (!=2.11)", "nbconvert", "pytest", "pytest-cov (>=2.6.1)", "pytest-randomly", "pytest-xdist"] +test-external = ["autopep8", "black", "flake8", "gitpython", "ipykernel", "isort", "jupyter-fs (<0.4.0)", "jupyter-server (!=2.11)", "nbconvert", "pre-commit", "pytest", "pytest-randomly", "pytest-xdist", "sphinx-gallery (<0.8)"] +test-functional = ["pytest", "pytest-randomly", "pytest-xdist"] +test-integration = ["ipykernel", "jupyter-server (!=2.11)", "nbconvert", "pytest", "pytest-randomly", "pytest-xdist"] test-ui = ["calysto-bash"] [[package]] @@ -1604,13 +1603,13 @@ mkdocs = ">=1.2" [[package]] name = "mkdocstrings" -version = "0.25.0" +version = "0.25.1" description = "Automatic documentation from sources, for MkDocs." optional = false python-versions = ">=3.8" files = [ - {file = "mkdocstrings-0.25.0-py3-none-any.whl", hash = "sha256:df1b63f26675fcde8c1b77e7ea996cd2f93220b148e06455428f676f5dc838f1"}, - {file = "mkdocstrings-0.25.0.tar.gz", hash = "sha256:066986b3fb5b9ef2d37c4417255a808f7e63b40ff8f67f6cab8054d903fbc91d"}, + {file = "mkdocstrings-0.25.1-py3-none-any.whl", hash = "sha256:da01fcc2670ad61888e8fe5b60afe9fee5781017d67431996832d63e887c2e51"}, + {file = "mkdocstrings-0.25.1.tar.gz", hash = "sha256:c3a2515f31577f311a9ee58d089e4c51fc6046dbd9e9b4c3de4c3194667fe9bf"}, ] [package.dependencies] @@ -2267,6 +2266,48 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "polars" +version = "0.20.23" +description = "Blazingly fast DataFrame library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "polars-0.20.23-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9b1001a29e785126f0e189786223c45bf9c7696ed3d221a61dd629ff5e8229d3"}, + {file = "polars-0.20.23-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:7de724f9b7f94c76008023b1ef9319e7dccada97e98d48d548be487be8dc2ea6"}, + {file = "polars-0.20.23-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f109512a456f9f8bdc20e5b19e5794471d4a1a507f99daf1afe1b41eb3227c41"}, + {file = "polars-0.20.23-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:bfd10ffabafecba3bb836f9e267405abc8259da5fb8d5e74096d46eea802a295"}, + {file = "polars-0.20.23-cp38-abi3-win_amd64.whl", hash = "sha256:de69adcfe4a92821f28c0223b801e56a36682a7aac32df8e860e6df7678f4c8a"}, + {file = "polars-0.20.23.tar.gz", hash = "sha256:4503c446c7771d5b52d5bff4f2dbf2e999a87a1cc3c89931db255cff43218436"}, +] + +[package.dependencies] +pyarrow = {version = ">=7.0.0", optional = true, markers = "extra == \"pyarrow\""} + +[package.extras] +adbc = ["adbc-driver-manager", "adbc-driver-sqlite"] +all = ["polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,pyiceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"] +async = ["nest-asyncio"] +cloudpickle = ["cloudpickle"] +connectorx = ["connectorx (>=0.3.2)"] +deltalake = ["deltalake (>=0.15.0)"] +fastexcel = ["fastexcel (>=0.9)"] +fsspec = ["fsspec"] +gevent = ["gevent"] +matplotlib = ["matplotlib"] +numpy = ["numpy (>=1.16.0)"] +openpyxl = ["openpyxl (>=3.0.0)"] +pandas = ["pandas", "pyarrow (>=7.0.0)"] +plot = ["hvplot (>=0.9.1)"] +pyarrow = ["pyarrow (>=7.0.0)"] +pydantic = ["pydantic"] +pyiceberg = ["pyiceberg (>=0.5.0)"] +pyxlsb = ["pyxlsb (>=1.0)"] +sqlalchemy = ["pandas", "sqlalchemy"] +timezone = ["backports-zoneinfo", "tzdata"] +xlsx2csv = ["xlsx2csv (>=0.8.0)"] +xlsxwriter = ["xlsxwriter"] + [[package]] name = "prompt-toolkit" version = "3.0.43" @@ -2334,6 +2375,54 @@ files = [ [package.extras] tests = ["pytest"] +[[package]] +name = "pyarrow" +version = "16.0.0" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyarrow-16.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:22a1fdb1254e5095d629e29cd1ea98ed04b4bbfd8e42cc670a6b639ccc208b60"}, + {file = "pyarrow-16.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:574a00260a4ed9d118a14770edbd440b848fcae5a3024128be9d0274dbcaf858"}, + {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0815d0ddb733b8c1b53a05827a91f1b8bde6240f3b20bf9ba5d650eb9b89cdf"}, + {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df0080339387b5d30de31e0a149c0c11a827a10c82f0c67d9afae3981d1aabb7"}, + {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:edf38cce0bf0dcf726e074159c60516447e4474904c0033f018c1f33d7dac6c5"}, + {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91d28f9a40f1264eab2af7905a4d95320ac2f287891e9c8b0035f264fe3c3a4b"}, + {file = "pyarrow-16.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:99af421ee451a78884d7faea23816c429e263bd3618b22d38e7992c9ce2a7ad9"}, + {file = "pyarrow-16.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d22d0941e6c7bafddf5f4c0662e46f2075850f1c044bf1a03150dd9e189427ce"}, + {file = "pyarrow-16.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:266ddb7e823f03733c15adc8b5078db2df6980f9aa93d6bb57ece615df4e0ba7"}, + {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cc23090224b6594f5a92d26ad47465af47c1d9c079dd4a0061ae39551889efe"}, + {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56850a0afe9ef37249d5387355449c0f94d12ff7994af88f16803a26d38f2016"}, + {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:705db70d3e2293c2f6f8e84874b5b775f690465798f66e94bb2c07bab0a6bb55"}, + {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:5448564754c154997bc09e95a44b81b9e31ae918a86c0fcb35c4aa4922756f55"}, + {file = "pyarrow-16.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:729f7b262aa620c9df8b9967db96c1575e4cfc8c25d078a06968e527b8d6ec05"}, + {file = "pyarrow-16.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:fb8065dbc0d051bf2ae2453af0484d99a43135cadabacf0af588a3be81fbbb9b"}, + {file = "pyarrow-16.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:20ce707d9aa390593ea93218b19d0eadab56390311cb87aad32c9a869b0e958c"}, + {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5823275c8addbbb50cd4e6a6839952682a33255b447277e37a6f518d6972f4e1"}, + {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ab8b9050752b16a8b53fcd9853bf07d8daf19093533e990085168f40c64d978"}, + {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:42e56557bc7c5c10d3e42c3b32f6cff649a29d637e8f4e8b311d334cc4326730"}, + {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a7abdee4a4a7cfa239e2e8d721224c4b34ffe69a0ca7981354fe03c1328789b"}, + {file = "pyarrow-16.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:ef2f309b68396bcc5a354106741d333494d6a0d3e1951271849787109f0229a6"}, + {file = "pyarrow-16.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:ed66e5217b4526fa3585b5e39b0b82f501b88a10d36bd0d2a4d8aa7b5a48e2df"}, + {file = "pyarrow-16.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc8814310486f2a73c661ba8354540f17eef51e1b6dd090b93e3419d3a097b3a"}, + {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c2f5e239db7ed43e0ad2baf46a6465f89c824cc703f38ef0fde927d8e0955f7"}, + {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f293e92d1db251447cb028ae12f7bc47526e4649c3a9924c8376cab4ad6b98bd"}, + {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:dd9334a07b6dc21afe0857aa31842365a62eca664e415a3f9536e3a8bb832c07"}, + {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d91073d1e2fef2c121154680e2ba7e35ecf8d4969cc0af1fa6f14a8675858159"}, + {file = "pyarrow-16.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:71d52561cd7aefd22cf52538f262850b0cc9e4ec50af2aaa601da3a16ef48877"}, + {file = "pyarrow-16.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b93c9a50b965ee0bf4fef65e53b758a7e8dcc0c2d86cebcc037aaaf1b306ecc0"}, + {file = "pyarrow-16.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d831690844706e374c455fba2fb8cfcb7b797bfe53ceda4b54334316e1ac4fa4"}, + {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35692ce8ad0b8c666aa60f83950957096d92f2a9d8d7deda93fb835e6053307e"}, + {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dd3151d098e56f16a8389c1247137f9e4c22720b01c6f3aa6dec29a99b74d80"}, + {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:bd40467bdb3cbaf2044ed7a6f7f251c8f941c8b31275aaaf88e746c4f3ca4a7a"}, + {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:00a1dcb22ad4ceb8af87f7bd30cc3354788776c417f493089e0a0af981bc8d80"}, + {file = "pyarrow-16.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:fda9a7cebd1b1d46c97b511f60f73a5b766a6de4c5236f144f41a5d5afec1f35"}, + {file = "pyarrow-16.0.0.tar.gz", hash = "sha256:59bb1f1edbbf4114c72415f039f1359f1a57d166a331c3229788ccbfbb31689a"}, +] + +[package.dependencies] +numpy = ">=1.16.6" + [[package]] name = "pycparser" version = "2.22" @@ -3246,17 +3335,6 @@ webencodings = ">=0.4" doc = ["sphinx", "sphinx_rtd_theme"] test = ["pytest", "ruff"] -[[package]] -name = "toml" -version = "0.10.2" -description = "Python Library for Tom's Obvious, Minimal Language" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, - {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, -] - [[package]] name = "torch" version = "2.3.0" @@ -3709,4 +3787,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11,<3.13" -content-hash = "8ec5a113155aa0f1f8cd24130f0c312e03356673a2230b74f318a23c86b27a1c" +content-hash = "549f6d3cbd6e0393d4848063f4023ad2f20a5953a59e2ade16315b008e2e9dfa" diff --git a/pyproject.toml b/pyproject.toml index 8d4c8f99e..223b5ef8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ matplotlib = "^3.6.3" openpyxl = "^3.1.2" pandas = "^2.0.0" pillow = ">=9.5,<11.0" +polars = {extras = ["pyarrow"], version = "^0.20.23"} scikit-learn = "^1.2.0" seaborn = "^0.13.0" statsmodels = "^0.14.1" @@ -64,6 +65,18 @@ priority = "explicit" requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" +[tool.black] +line-length = 120 + +[tool.coverage.report] +exclude_also = [ + "if\\s+(typing\\.)?TYPE_CHECKING:", + "\\.\\.\\.", +] +omit = [ + "*experimental*.py" +] + [tool.pytest.ini_options] addopts = "--snapshot-warn-unused" filterwarnings = [ @@ -71,5 +84,119 @@ filterwarnings = [ "ignore:Jupyter is migrating its paths to use standard platformdirs" ] -[tool.black] +[tool.ruff] +extend-exclude = ["benchmarks/**/*.py"] line-length = 120 +target-version = "py311" + +[tool.ruff.lint] +ignore-init-module-imports = true + +select = [ + "F", + "E", + "W", + "I", + "N", + "D", + "UP", + "YTT", + "BLE", + "FBT", + "B", + "A", + "COM", + "C4", + "DTZ", + "T10", + "ISC", + "ICN", + "G", + "INP", + "PIE", + "T20", + "PYI", + "PT", + "Q", + "RSE", + "RET", + "SLF", + "SIM", + "TID", + "TCH", + "INT", + "ARG", + "PTH", + "PD", + "PGH", + "PL", + "TRY", + "NPY", + "RUF" +] +ignore = [ + # line-too-long (handled by black) + "E501", + # tab-indentation (handled by black) + "W191", + # trailing-whitespace (handled by black) + "W291", + # missing-newline-at-end-of-file (handled by black) + "W292", + # blank-line-with-witespace (handled by black) + "W293", + # boolean-positional-arg-in-function-definition (we leave it to the call-site) + "FBT001", + # boolean-default-value-in-function-definition (we leave it to the call-site) + "FBT002", + # builtin-attribute-shadowing (not an issue) + "A003", + # implicit-return (can add a return even though all cases are covered) + "RET503", + # superfluous-else-return (sometimes it's more readable) + "RET505", + # superfluous-else-raise (sometimes it's more readable) + "RET506", + # superfluous-else-continue (sometimes it's more readable) + "RET507", + # superfluous-else-break (sometimes it's more readable) + "RET508", + # private-member-access (we cannot always avoid it if we want a clean API) + "SLF001", + # if-else-block-instead-of-if-exp (an if-else block can be more readable) + "SIM108", + # compare-to-empty-string (sometimes it's better to be explicit) + "PLC1901", + # too-many-return-statements + "PLR0911", + # too-many-branches + "PLR0912", + # too-many-arguments + "PLR0913", + # too-many-statements + "PLR0915", + # magic-value-comparison + "PLR2004", + # raise-vanilla-args + "TRY003", +] + +[tool.ruff.lint.per-file-ignores] +"*test*.py" = [ + # Undocumented declarations + "D100", + "D101", + "D102", + "D103", + "D104", + "D105", + "D106", + "D107", +] +"__init__.py" = [ + # runtime-import-in-type-checking-block: Does not work with apipkg. + "TCH004", +] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" diff --git a/src/resources/to_excel_file.xlsx b/src/resources/to_excel_file.xlsx index 13038e60140709fd8fa81a7eb52587ce1e5a3230..1e9177e04bd8277f073699129859030e073a90f6 100644 GIT binary patch delta 600 zcmaE^_Ck#>z?+#xgn@y9gF(Aw6Rf0LKV_Gxd}2~qr7R`++2w}FyI6xsmyBf&c-NOs zVsDxBSyV!0rK+by>&*GRZx&t23j5W6!Ix<*gSAP9rqe|6f@IF-&U!PSlRkVK=dRrM z{`0ZoC~ z`8uO58w)6uHcN2qVT7=5aGqlVvnH?Nc?uDl%o_=jocw|BAw=jZzZX#hSp^!r#CoAO+XJz|g?>m~ZkwKAFh^!aNXt3c^y1Cnj49tFeiI(h36r&QRZz delta 628 zcmaE%_FRoGz?+#xgn@y9gJEU!^{|P2YwAJt#&>5rUI9fJui+V0k2Fc`xkZlE9=Z@7{WzzSj8Q znBhPB`fXF$_8zgj5d6ZHHBWP$+wGki7xOnUZ=TK|$e&WGBlIw6$Lr>+bLJ)T$33p! zeNVV!Q%tk!HMh-eo-=+QoiTR`oA}cI9CvjLIIc`oJubLvQ4{xRk(@u96aB=Bdfqzm z9uDC7dS+>_Rz+=1#k}d~qs1ewm-v{nsj^+V!)_Y;q(Axkf$d4fv;KxEZ1xgXk+;p= zRQxIE{=MAy#;RU{&!1X}f8Dlg(j&L9>-m#kyUYJ~&8WWo?pD3Td8Vre*4GEmM-8IQ zHyCZ%SU`caS(0N9BZPI6^BfbHHF-79Q;5(M-bje#xaHaGsnl zXa>=|P*95Ts$ObiUU z+zbr7Fa6alPIBb=Lu)QZb-*plV4O3# zPDl#kqB%lRjIEQm3&|;fY%_lSc-LB>!rwqwNx?OMY Path: # pragma: no cover + """ + Check if the provided path is a valid file path and normalize it. + + Parameters + ---------- + path: + Path to check and normalize. + canonical_file_extension: + If the path has no extension, this extension will be added. Should include the leading dot. + valid_file_extensions: + If the path has an extension, it must be in this set. Should include the leading dots. + check_if_file_exists: + Whether to also check if the path points to an existing file. + + Returns + ------- + normalized_path: + The normalized path. + + Raises + ------ + ValueError + If the path has an extension that is not in the `valid_file_extensions` list. + FileNotFoundError + If `check_if_file_exists` is True and the file does not exist. + """ + path = Path(path) + + # Normalize and check file extension + if not path.suffix: + path = path.with_suffix(canonical_file_extension) + elif path.suffix not in valid_file_extensions: + raise WrongFileExtensionError(path, valid_file_extensions) + + # Check if file exists + if check_if_file_exists and not path.is_file(): + raise FileNotFoundError(f"File not found: {path}") + + return path diff --git a/src/safeds/_utils/_hashing.py b/src/safeds/_utils/_hashing.py index 9518e15ae..546ee1021 100644 --- a/src/safeds/_utils/_hashing.py +++ b/src/safeds/_utils/_hashing.py @@ -4,13 +4,13 @@ from typing import Any -def _structural_hash(*value: Any) -> int: +def _structural_hash(*values: Any) -> int: """ Calculate a deterministic hash value, based on the provided values. Parameters ---------- - value: + values: Variable amount of values to hash Returns @@ -20,7 +20,7 @@ def _structural_hash(*value: Any) -> int: """ import xxhash - return xxhash.xxh3_64(_value_to_bytes(value)).intdigest() + return xxhash.xxh3_64(_value_to_bytes(values)).intdigest() def _value_to_bytes(value: Any) -> bytes: diff --git a/src/safeds/_utils/_random.py b/src/safeds/_utils/_random.py new file mode 100644 index 000000000..8ecda3d53 --- /dev/null +++ b/src/safeds/_utils/_random.py @@ -0,0 +1,10 @@ +def _get_random_seed() -> int: # pragma: no cover + """ + Get the random seed for deterministic randomness. + + Returns + ------- + seed: + Random seed. + """ + return 42 diff --git a/src/safeds/data/tabular/containers/__init__.py b/src/safeds/data/tabular/containers/__init__.py index 6eb38e8a5..4d2a37901 100644 --- a/src/safeds/data/tabular/containers/__init__.py +++ b/src/safeds/data/tabular/containers/__init__.py @@ -6,6 +6,10 @@ if TYPE_CHECKING: from ._column import Column + from ._experimental_polars_cell import ExperimentalPolarsCell + from ._experimental_polars_column import ExperimentalPolarsColumn + from ._experimental_polars_row import ExperimentalPolarsRow + from ._experimental_polars_table import ExperimentalPolarsTable from ._row import Row from ._table import Table from ._time_series import TimeSeries @@ -14,6 +18,10 @@ __name__, { "Column": "._column:Column", + "ExperimentalPolarsCell": "._experimental_polars_cell:ExperimentalPolarsCell", + "ExperimentalPolarsColumn": "._experimental_polars_column:ExperimentalPolarsColumn", + "ExperimentalPolarsRow": "._experimental_polars_row:ExperimentalPolarsRow", + "ExperimentalPolarsTable": "._experimental_polars_table:ExperimentalPolarsTable", "Row": "._row:Row", "Table": "._table:Table", "TimeSeries": "._time_series:TimeSeries", @@ -22,6 +30,10 @@ __all__ = [ "Column", + "ExperimentalPolarsCell", + "ExperimentalPolarsColumn", + "ExperimentalPolarsRow", + "ExperimentalPolarsTable", "Row", "Table", "TimeSeries", diff --git a/src/safeds/data/tabular/containers/_experimental_polars_cell.py b/src/safeds/data/tabular/containers/_experimental_polars_cell.py new file mode 100644 index 000000000..4b67734a3 --- /dev/null +++ b/src/safeds/data/tabular/containers/_experimental_polars_cell.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from abc import ABC +from typing import Generic, TypeVar + +T = TypeVar("T") + + +class ExperimentalPolarsCell(ABC, Generic[T]): + pass diff --git a/src/safeds/data/tabular/containers/_experimental_polars_column.py b/src/safeds/data/tabular/containers/_experimental_polars_column.py new file mode 100644 index 000000000..db34d863b --- /dev/null +++ b/src/safeds/data/tabular/containers/_experimental_polars_column.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from abc import ABC +from collections.abc import Sequence +from typing import TypeVar + +T = TypeVar("T") + + +# TODO: should not be abstract +class ExperimentalPolarsColumn(ABC, Sequence[T]): + pass diff --git a/src/safeds/data/tabular/containers/_experimental_polars_row.py b/src/safeds/data/tabular/containers/_experimental_polars_row.py new file mode 100644 index 000000000..c6b43b06c --- /dev/null +++ b/src/safeds/data/tabular/containers/_experimental_polars_row.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from abc import ABC + + +class ExperimentalPolarsRow(ABC): # noqa: B024 + pass diff --git a/src/safeds/data/tabular/containers/_experimental_polars_table.py b/src/safeds/data/tabular/containers/_experimental_polars_table.py new file mode 100644 index 000000000..cfff17824 --- /dev/null +++ b/src/safeds/data/tabular/containers/_experimental_polars_table.py @@ -0,0 +1,723 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal + +from safeds._utils import _check_and_normalize_file_path +from safeds.data.tabular.containers import Table +from safeds.exceptions import ColumnLengthMismatchError + +if TYPE_CHECKING: + from collections.abc import Callable, Mapping, Sequence + from pathlib import Path + + from polars import DataFrame, LazyFrame + + from safeds.data.image.containers import Image + from safeds.data.labeled.containers import TabularDataset + from safeds.data.tabular.transformation import InvertibleTableTransformer, TableTransformer + from safeds.data.tabular.typing import ColumnType, Schema + + from ._experimental_polars_cell import ExperimentalPolarsCell + from ._experimental_polars_column import ExperimentalPolarsColumn + from ._experimental_polars_row import ExperimentalPolarsRow + + +class ExperimentalPolarsTable: + """ + A table is a two-dimensional collection of data. It can either be seen as a list of rows or as a list of columns. + + To create a `Table` call the constructor or use one of the following static methods: + + | Method | Description | + | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------- | + | [from_csv_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_csv_file] | Create a table from a CSV file. | + | [from_json_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_json_file] | Create a table from a JSON file. | + | [from_dict][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_dict] | Create a table from a dictionary. | + | [from_columns][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_columns] | Create a table from a list of columns. | + + Parameters + ---------- + data: + The data. If None, an empty table is created. + + Raises + ------ + ColumnLengthMismatchError + If columns have different lengths. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 3], "b": [4, 5, 6]}) + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Import + # ------------------------------------------------------------------------------------------------------------------ + + @staticmethod + def from_columns(columns: list[ExperimentalPolarsColumn]) -> ExperimentalPolarsTable: + raise NotImplementedError + + @staticmethod + def from_csv_file(path: str | Path) -> ExperimentalPolarsTable: + """ + Create a table from a CSV file. + + Parameters + ---------- + path: + The path to the CSV file. If the file extension is omitted, it is assumed to be ".csv". + + Returns + ------- + table: + The created table. + + Raises + ------ + FileNotFoundError + If no file exists at the given path. + ValueError + If the path has an extension that is not ".csv". + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> ExperimentalPolarsTable.from_csv_file("./src/resources/from_csv_file.csv") + shape: (2, 3) + ┌─────┬─────┬─────┐ + │ a ┆ b ┆ c │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 2 ┆ 1 │ + │ 0 ┆ 0 ┆ 7 │ + └─────┴─────┴─────┘ + """ + import polars as pl + + path = _check_and_normalize_file_path(path, ".csv", [".csv"], check_if_file_exists=True) + return ExperimentalPolarsTable._from_polars_lazy_frame(pl.scan_csv(path)) + + @staticmethod + def from_dict(data: dict[str, list[Any]]) -> ExperimentalPolarsTable: + """ + Create a table from a dictionary that maps column names to column values. + + Parameters + ---------- + data: + The data. + + Returns + ------- + table: + The generated table. + + Raises + ------ + ColumnLengthMismatchError + If columns have different lengths. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> data = {'a': [1, 2, 3], 'b': [4, 5, 6]} + >>> ExperimentalPolarsTable.from_dict(data) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + │ 2 ┆ 5 │ + │ 3 ┆ 6 │ + └─────┴─────┘ + """ + return ExperimentalPolarsTable(data) + + @staticmethod + def from_json_file(path: str | Path) -> ExperimentalPolarsTable: + raise NotImplementedError + + @staticmethod + def from_parquet_file(path: str | Path) -> ExperimentalPolarsTable: + raise NotImplementedError + + @staticmethod + def _from_polars_dataframe(data: DataFrame) -> ExperimentalPolarsTable: + result = object.__new__(ExperimentalPolarsTable) + result._lazy_frame = data.lazy() + result._data_frame = data + return result + + @staticmethod + def _from_polars_lazy_frame(data: LazyFrame) -> ExperimentalPolarsTable: + result = object.__new__(ExperimentalPolarsTable) + result._lazy_frame = data + result._data_frame = None + return result + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, data: Mapping[str, Sequence[Any]] | None = None) -> None: + import polars as pl + + if data is None: + data = {} + + # Validation + expected_length: int | None = None + for column_values in data.values(): + if expected_length is None: + expected_length = len(column_values) + elif len(column_values) != expected_length: + raise ColumnLengthMismatchError( + "\n".join(f"{column_name}: {len(column_values)}" for column_name, column_values in data.items()), + ) + + # Implementation + self._lazy_frame: pl.LazyFrame = pl.LazyFrame(data) + self._data_frame: pl.DataFrame | None = None + + def __eq__(self, other: object) -> bool: + raise NotImplementedError + + def __hash__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + if self._data_frame is None: + self._data_frame = self._lazy_frame.collect() + + return self._data_frame.__repr__() + + def __sizeof__(self) -> int: + raise NotImplementedError + + def __str__(self) -> str: + if self._data_frame is None: + self._data_frame = self._lazy_frame.collect() + + return self._data_frame.__str__() + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def column_names(self) -> list[str]: + """ + Names of the columns in the table. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> table.column_names + ['a', 'b'] + """ + return self._lazy_frame.columns + + @property + def number_of_columns(self) -> int: + """ + The number of columns in the table. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> table.number_of_columns + 2 + """ + return self._lazy_frame.width + + @property + def number_of_rows(self) -> int: + """ + The number of rows in the table. + + Note that this operation must fully load the data into memory, which can be expensive. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> table.number_of_rows + 3 + """ + if self._data_frame is None: + self._data_frame = self._lazy_frame.collect() + + return self._data_frame.height + + @property + def schema(self) -> Schema: # TODO: rethink return type + raise NotImplementedError + + # ------------------------------------------------------------------------------------------------------------------ + # Column operations + # ------------------------------------------------------------------------------------------------------------------ + + def add_columns( + self, + columns: ExperimentalPolarsColumn | list[ExperimentalPolarsColumn], + ) -> ExperimentalPolarsTable: + raise NotImplementedError + + def get_column(self, name: str) -> ExperimentalPolarsColumn: + raise NotImplementedError + + def get_column_type(self, name: str) -> ColumnType: # TODO rethink return type + raise NotImplementedError + + def has_column(self, name: str) -> bool: + raise NotImplementedError + + def remove_columns(self, names: list[str]) -> ExperimentalPolarsTable: + raise NotImplementedError + + def remove_columns_except(self, names: list[str]) -> ExperimentalPolarsTable: + raise NotImplementedError + + def remove_columns_with_missing_values(self) -> ExperimentalPolarsTable: + raise NotImplementedError + + def remove_columns_with_non_numerical_values(self) -> ExperimentalPolarsTable: + raise NotImplementedError + + def rename_column(self, old_name: str, new_name: str) -> ExperimentalPolarsTable: + """ + Return a new table with a column renamed. + + Note that the original table is not modified. + + Parameters + ---------- + old_name: + The name of the column to rename. + new_name: + The new name of the column. + + Returns + ------- + new_table: + The table with the column renamed. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> table.rename_column("a", "A") + shape: (3, 2) + ┌─────┬─────┐ + │ A ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + │ 2 ┆ 5 │ + │ 3 ┆ 6 │ + └─────┴─────┘ + """ + # TODO: raises? + return ExperimentalPolarsTable._from_polars_lazy_frame( + self._lazy_frame.rename({old_name: new_name}), + ) + + def replace_column( + self, + old_name: str, + new_columns: ExperimentalPolarsColumn | list[ExperimentalPolarsColumn], + ) -> ExperimentalPolarsTable: + raise NotImplementedError + + def transform_column( + self, + name: str, + transformer: Callable[[ExperimentalPolarsRow], ExperimentalPolarsCell], + ) -> ExperimentalPolarsTable: + raise NotImplementedError + + # ------------------------------------------------------------------------------------------------------------------ + # Row operations + # ------------------------------------------------------------------------------------------------------------------ + + # TODO: Rethink group_rows/group_rows_by_column. They should not return a dict. + + def remove_duplicate_rows(self) -> ExperimentalPolarsTable: + """ + Remove duplicate rows from the table. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 2], "b": [4, 5, 5]}) + >>> table.remove_duplicate_rows() + shape: (2, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + │ 2 ┆ 5 │ + └─────┴─────┘ + """ + return ExperimentalPolarsTable._from_polars_lazy_frame( + self._lazy_frame.unique(maintain_order=True), + ) + + def remove_rows( + self, + query: Callable[[ExperimentalPolarsRow], ExperimentalPolarsCell[bool]], + ) -> ExperimentalPolarsTable: + raise NotImplementedError + + def remove_rows_by_column( + self, + name: str, + query: Callable[[ExperimentalPolarsCell], ExperimentalPolarsCell[bool]], + ) -> ExperimentalPolarsTable: + raise NotImplementedError + + def remove_rows_with_missing_values( + self, + column_names: list[str] | None = None, + ) -> ExperimentalPolarsTable: + """ + Remove rows with missing values from the table. + + Note that the original table is not modified. + + Parameters + ---------- + column_names: + Names of the columns to consider. If None, all columns are considered. + + Returns + ------- + filtered_table: + The table without rows containing missing values in the specified columns. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, None, 3], "b": [4, 5, None]}) + >>> table.remove_rows_with_missing_values() + shape: (1, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + └─────┴─────┘ + """ + return ExperimentalPolarsTable._from_polars_lazy_frame( + self._lazy_frame.drop_nulls(subset=column_names), + ) + + def remove_rows_with_outliers( + self, + column_names: list[str] | None = None, + ) -> ExperimentalPolarsTable: + raise NotImplementedError + + def shuffle_rows(self) -> ExperimentalPolarsTable: + raise NotImplementedError + + def slice_rows(self, start: int = 0, end: int | None = None) -> ExperimentalPolarsTable: + raise NotImplementedError + + def sort_rows( + self, + key_selector: Callable[[ExperimentalPolarsRow], ExperimentalPolarsCell], + *, + descending: bool = False, + ) -> ExperimentalPolarsTable: + raise NotImplementedError + + def sort_rows_by_column( + self, + name: str, + *, + descending: bool = False, + ) -> ExperimentalPolarsTable: + raise NotImplementedError + + def split_rows( + self, + percentage_in_first: float, + *, + shuffle: bool = True, + ) -> tuple[ExperimentalPolarsTable, ExperimentalPolarsTable]: + raise NotImplementedError + + # ------------------------------------------------------------------------------------------------------------------ + # Table operations + # ------------------------------------------------------------------------------------------------------------------ + + def add_table_as_columns(self, other: ExperimentalPolarsTable) -> ExperimentalPolarsTable: + raise NotImplementedError + + def add_table_as_rows(self, other: ExperimentalPolarsTable) -> ExperimentalPolarsTable: + raise NotImplementedError + + def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer) -> ExperimentalPolarsTable: + raise NotImplementedError + + def transform_table(self, fitted_transformer: TableTransformer) -> ExperimentalPolarsTable: + raise NotImplementedError + + # ------------------------------------------------------------------------------------------------------------------ + # Statistics + # ------------------------------------------------------------------------------------------------------------------ + + def summarize_statistics(self) -> ExperimentalPolarsTable: + raise NotImplementedError + + # ------------------------------------------------------------------------------------------------------------------ + # Visualization + # ------------------------------------------------------------------------------------------------------------------ + + def plot_boxplots(self) -> Image: + raise NotImplementedError + + def plot_correlation_heatmap(self) -> Image: + raise NotImplementedError + + def plot_histograms(self, *, number_of_bins: int = 10) -> Image: + raise NotImplementedError + + def plot_lineplot(self, x_name: str, y_name: str) -> Image: + raise NotImplementedError + + def plot_scatterplot(self, x_name: str, y_name: str) -> Image: + raise NotImplementedError + + # ------------------------------------------------------------------------------------------------------------------ + # Export + # ------------------------------------------------------------------------------------------------------------------ + + def to_columns(self) -> list[ExperimentalPolarsColumn]: + raise NotImplementedError + + def to_csv_file(self, path: str | Path) -> None: + """ + Write the table to a CSV file. + + If the file and/or the parent directories do not exist, they will be created. If the file exists already, it + will be overwritten. + + Parameters + ---------- + path: + The path to the CSV file. If the file extension is omitted, it is assumed to be ".csv". + + Raises + ------ + ValueError + If the path has an extension that is not ".csv". + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> table.to_csv_file("./src/resources/to_csv_file.csv") + """ + path = _check_and_normalize_file_path(path, ".csv", [".csv"]) + path.parent.mkdir(parents=True, exist_ok=True) + + self._lazy_frame.sink_csv(path) + + def to_dict(self) -> dict[str, list[Any]]: + """ + Return a dictionary that maps column names to column values. + + Returns + ------- + dict_: + Dictionary representation of the table. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> table.to_dict() + {'a': [1, 2, 3], 'b': [4, 5, 6]} + """ + if self._data_frame is None: + self._data_frame = self._lazy_frame.collect() + + return self._data_frame.to_dict(as_series=False) + + def to_json_file( + self, + path: str | Path, + *, + orientation: Literal["column", "row"] = "column", + ) -> None: + """ + Write the table to a JSON file. + + If the file and/or the parent directories do not exist, they will be created. If the file exists already, it + will be overwritten. + + Note that this operation must fully load the data into memory, which can be expensive. + + Parameters + ---------- + path: + The path to the JSON file. If the file extension is omitted, it is assumed to be ".json". + orientation: + The orientation of the JSON file. If "column", the JSON file will be structured as a list of columns. If + "row", the JSON file will be structured as a list of rows. Row orientation is more human-readable, but + slower and less memory-efficient. + + Raises + ------ + ValueError + If the path has an extension that is not ".json". + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> table.to_json_file("./src/resources/to_json_file.json") + """ + path = _check_and_normalize_file_path(path, ".json", [".json"]) + path.parent.mkdir(parents=True, exist_ok=True) + + # Write JSON to file + if self._data_frame is None: + self._data_frame = self._lazy_frame.collect() + + self._data_frame.write_json(path, row_oriented=(orientation == "row")) + + def to_parquet_file(self, path: str | Path) -> None: + raise NotImplementedError + + def to_tabular_dataset(self, target_name: str, extra_names: list[str] | None = None) -> TabularDataset: + """ + Return a new `TabularDataset` with columns marked as a target, feature, or extra. + + * The target column is the column that a model should predict. + * Feature columns are columns that a model should use to make predictions. + * Extra columns are columns that are neither feature nor target. They can be used to provide additional context, + like an ID or name column. + + Feature columns are implicitly defined as all columns except the target and extra columns. If no extra columns + are specified, all columns except the target column are used as features. + + Parameters + ---------- + target_name: + Name of the target column. + extra_names: + Names of the columns that are neither feature nor target. If None, no extra columns are used, i.e. all but + the target column are used as features. + + Returns + ------- + dataset: + A new tabular dataset with the given target and feature names. + + Raises + ------ + ValueError + If the target column is also a feature column. + ValueError + If no feature columns are specified. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"item": ["apple", "milk", "beer"], "price": [1.10, 1.19, 1.79], "amount_bought": [74, 72, 51]}) + >>> dataset = table.to_tabular_dataset(target_name="amount_bought", extra_names=["item"]) + """ + from safeds.data.labeled.containers import TabularDataset + + # TODO: more efficient implementation + return TabularDataset(self.temporary_to_old_table(), target_name, extra_names) + + def temporary_to_old_table(self) -> Table: + """ + Convert the table to the old table format. This method is temporary and will be removed in a later version. + + Returns + ------- + old_table: + The table in the old format. + + Examples + -------- + >>> from safeds.data.tabular.containers import ExperimentalPolarsTable + >>> table = ExperimentalPolarsTable({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> old_table = table.temporary_to_old_table() + """ + if self._data_frame is None: + self._data_frame = self._lazy_frame.collect() + + return Table._from_pandas_dataframe(self._data_frame.to_pandas()) + + # ------------------------------------------------------------------------------------------------------------------ + # Dataframe interchange protocol + # ------------------------------------------------------------------------------------------------------------------ + + def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): # type: ignore[no-untyped-def] + """ + Return a dataframe object that conforms to the dataframe interchange protocol. + + Generally, there is no reason to call this method directly. The dataframe interchange protocol is designed to + allow libraries to consume tabular data from different sources, such as `pandas` or `polars`. If you still + decide to call this method, you should not rely on any capabilities of the returned object beyond the dataframe + interchange protocol. + + The specification of the dataframe interchange protocol can be found + [here](https://data-apis.org/dataframe-protocol/latest/index.html). + + Note that this operation must fully load the data into memory, which can be expensive. + + Parameters + ---------- + nan_as_null: + This parameter is deprecated and will be removed in a later revision of the dataframe interchange protocol. + Setting it has no effect. + allow_copy: + Whether memory may be copied to create the dataframe object. + + Returns + ------- + dataframe: + A dataframe object that conforms to the dataframe interchange protocol. + """ + if self._data_frame is None: + self._data_frame = self._lazy_frame.collect() + + return self._data_frame.__dataframe__(allow_copy=allow_copy) + + # ------------------------------------------------------------------------------------------------------------------ + # IPython integration + # ------------------------------------------------------------------------------------------------------------------ + + def _repr_html_(self) -> str: + """ + Return a compact HTML representation of the table for IPython. + + Note that this operation must fully load the data into memory, which can be expensive. + + Returns + ------- + html: + The generated HTML. + """ + if self._data_frame is None: + self._data_frame = self._lazy_frame.collect() + + return self._data_frame._repr_html_() diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 61d89038e..43f2fabd5 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -146,7 +146,7 @@ def __init__(self, values: list[tuple[str, str]]) -> None: ) -class WrongFileExtensionError(Exception): +class WrongFileExtensionError(ValueError): """Exception raised when the file has the wrong file extension.""" def __init__(self, file: str | Path, file_extension: str | list[str]) -> None: