Skip to content

Commit

Permalink
Rename LegacyTable -> Table
Browse files Browse the repository at this point in the history
  • Loading branch information
Jay Chia committed Dec 1, 2023
1 parent 7c27f78 commit 43ed1d7
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 17 deletions.
2 changes: 1 addition & 1 deletion benchmarking/parquet/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def fn(files: list[str]) -> list[pa.Table]:


def daft_bulk_read(paths: list[str], columns: list[str] | None = None) -> list[pa.Table]:
tables = daft.table.LegacyTable.read_parquet_bulk(paths, columns=columns)
tables = daft.table.Table.read_parquet_bulk(paths, columns=columns)
return [t.to_arrow() for t in tables]


Expand Down
8 changes: 3 additions & 5 deletions daft/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@

import os

from .table import Table as _Table
from .table import read_parquet_into_pyarrow, read_parquet_into_pyarrow_bulk
from .table import Table, read_parquet_into_pyarrow, read_parquet_into_pyarrow_bulk

# Need to import after `.table` due to circular dep issue otherwise
from .micropartition import MicroPartition as _MicroPartition # isort:skip


LegacyTable = _Table
MicroPartition = _MicroPartition

# Use $DAFT_MICROPARTITIONS envvar as a feature flag to turn off MicroPartitions
if os.getenv("DAFT_MICROPARTITIONS", "1") != "1":
MicroPartition = LegacyTable # type: ignore
MicroPartition = Table # type: ignore


__all__ = ["MicroPartition", "LegacyTable", "read_parquet_into_pyarrow", "read_parquet_into_pyarrow_bulk"]
__all__ = ["MicroPartition", "Table", "read_parquet_into_pyarrow", "read_parquet_into_pyarrow_bulk"]
6 changes: 3 additions & 3 deletions tests/integration/io/parquet/test_reads_public_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import daft
from daft.filesystem import get_filesystem, get_protocol_from_path
from daft.table import LegacyTable, MicroPartition
from daft.table import MicroPartition, Table


def get_filesystem_from_path(path: str, **kwargs) -> fsspec.AbstractFileSystem:
Expand Down Expand Up @@ -253,7 +253,7 @@ def test_parquet_read_table_bulk(parquet_file, public_storage_io_config, multith
pa_read = MicroPartition.from_arrow(read_parquet_with_pyarrow(url))

# Legacy Table returns a list[Table]
if MicroPartition == LegacyTable:
if MicroPartition == Table:
for daft_native_read in daft_native_reads:
assert daft_native_read.schema() == pa_read.schema()
pd.testing.assert_frame_equal(daft_native_read.to_pandas(), pa_read.to_pandas())
Expand Down Expand Up @@ -337,7 +337,7 @@ def test_row_groups_selection_bulk(public_storage_io_config, multithreaded_io):
url = ["s3://daft-public-data/test_fixtures/parquet-dev/mvp.parquet"] * 11
row_groups = [list(range(10))] + [[i] for i in range(10)]

if MicroPartition == LegacyTable:
if MicroPartition == Table:
first, *rest = MicroPartition.read_parquet_bulk(
url, io_config=public_storage_io_config, multithreaded_io=multithreaded_io, row_groups_per_path=row_groups
)
Expand Down
16 changes: 8 additions & 8 deletions tests/table/test_partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from daft.datatype import DataType
from daft.expressions import col
from daft.logical.schema import Schema
from daft.table import LegacyTable, MicroPartition
from daft.table import MicroPartition, Table

daft_int_types = [
DataType.int8(),
Expand Down Expand Up @@ -67,7 +67,7 @@ def test_partitioning_micropartitions_hash(mp) -> None:
],
)
def test_partitioning_micropartitions_range_empty(mp) -> None:
boundaries = LegacyTable.from_pydict({"a": np.linspace(0, 10, 3)[1:]}).eval_expression_list(
boundaries = Table.from_pydict({"a": np.linspace(0, 10, 3)[1:]}).eval_expression_list(
[col("a").cast(DataType.int64())]
)
split_tables = mp.partition_by_range([col("a")], boundaries, [True])
Expand All @@ -85,7 +85,7 @@ def test_partitioning_micropartitions_range_empty(mp) -> None:
],
)
def test_partitioning_micropartitions_range_boundaries_empty(mp) -> None:
boundaries = LegacyTable.from_pydict({"a": [], "b": []}).eval_expression_list([col("a").cast(DataType.int64())])
boundaries = Table.from_pydict({"a": [], "b": []}).eval_expression_list([col("a").cast(DataType.int64())])
split_tables = mp.partition_by_range([col("a"), col("b")], boundaries, [False, False])
assert len(split_tables) == 1
assert split_tables[0].to_pydict() == {"a": [], "b": []}
Expand All @@ -105,7 +105,7 @@ def test_partitioning_micropartitions_range_boundaries_empty(mp) -> None:
],
)
def test_partitioning_micropartitions_range(mp) -> None:
boundaries = LegacyTable.from_pydict({"a": np.linspace(0, 5, 3)[1:]}).eval_expression_list(
boundaries = Table.from_pydict({"a": np.linspace(0, 5, 3)[1:]}).eval_expression_list(
[col("a").cast(DataType.int64())]
)
split_tables = mp.partition_by_range([col("a")], boundaries, [True])
Expand Down Expand Up @@ -251,7 +251,7 @@ def test_table_partition_by_range_single_column(size, k, desc) -> None:
if desc:
input_boundaries = input_boundaries[::-1]

boundaries = LegacyTable.from_pydict({"x": input_boundaries}).eval_expression_list(
boundaries = Table.from_pydict({"x": input_boundaries}).eval_expression_list(
[col("x").cast(table.get_column("x").datatype())]
)

Expand Down Expand Up @@ -286,7 +286,7 @@ def test_table_partition_by_range_multi_column(size, k, desc) -> None:
if desc:
input_boundaries = input_boundaries[::-1]

boundaries = LegacyTable.from_pydict({"x": np.ones(k - 1), "y": input_boundaries}).eval_expression_list(
boundaries = Table.from_pydict({"x": np.ones(k - 1), "y": input_boundaries}).eval_expression_list(
[col("x").cast(table.get_column("x").datatype()), col("y").cast(table.get_column("y").datatype())]
)

Expand All @@ -310,7 +310,7 @@ def test_table_partition_by_range_multi_column(size, k, desc) -> None:

def test_table_partition_by_range_multi_column_string() -> None:
table = MicroPartition.from_pydict({"x": ["a", "c", "a", "c"], "y": ["1", "2", "3", "4"]})
boundaries = LegacyTable.from_pydict({"x": ["b"], "y": ["1"]})
boundaries = Table.from_pydict({"x": ["b"], "y": ["1"]})
split_tables = table.partition_by_range([col("x"), col("y")], boundaries, [False, False])
assert len(split_tables) == 2

Expand All @@ -326,7 +326,7 @@ def test_table_partition_by_range_multi_column_string() -> None:
def test_table_partition_by_range_input() -> None:
data = {"x": [1, 2, 3], "b": [0, 1, 2]}
table_cls = MicroPartition.from_pydict(data)
boundaries = LegacyTable.from_pydict(data)
boundaries = Table.from_pydict(data)

with pytest.raises(ValueError, match="Schema Mismatch"):
table_cls.partition_by_range([col("x")], boundaries, [False])
Expand Down

0 comments on commit 43ed1d7

Please sign in to comment.