Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow setting non-string typed values in set_properties #504

Merged
merged 7 commits into from
Mar 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pyiceberg/catalog/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,12 @@ class CreateTableRequest(IcebergBaseModel):
partition_spec: Optional[PartitionSpec] = Field(alias="partition-spec")
write_order: Optional[SortOrder] = Field(alias="write-order")
stage_create: bool = Field(alias="stage-create", default=False)
properties: Properties = Field(default_factory=dict)
properties: Dict[str, str] = Field(default_factory=dict)

# validators
transform_properties_dict_value_to_str = field_validator('properties', mode='before')(transform_dict_value_to_str)
@field_validator('properties', mode='before')
def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]:
return transform_dict_value_to_str(properties)


class RegisterTableRequest(IcebergBaseModel):
Expand Down
9 changes: 7 additions & 2 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
Union,
)

from pydantic import Field, SerializeAsAny
from pydantic import Field, SerializeAsAny, field_validator
from sortedcontainers import SortedList
from typing_extensions import Annotated

Expand Down Expand Up @@ -124,6 +124,7 @@
NestedField,
PrimitiveType,
StructType,
transform_dict_value_to_str,
)
from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.datetime import datetime_to_millis
Expand Down Expand Up @@ -293,7 +294,7 @@ def upgrade_table_version(self, format_version: Literal[1, 2]) -> Transaction:

return self

def set_properties(self, properties: Properties = EMPTY_DICT, **kwargs: str) -> Transaction:
def set_properties(self, properties: Properties = EMPTY_DICT, **kwargs: Any) -> Transaction:
HonahX marked this conversation as resolved.
Show resolved Hide resolved
"""Set properties.

When a property is already set, it will be overwritten.
Expand Down Expand Up @@ -474,6 +475,10 @@ class SetPropertiesUpdate(TableUpdate):
action: TableUpdateAction = TableUpdateAction.set_properties
updates: Dict[str, str]

@field_validator('updates', mode='before')
def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]:
return transform_dict_value_to_str(properties)


class RemovePropertiesUpdate(TableUpdate):
action: TableUpdateAction = TableUpdateAction.remove_properties
Expand Down
4 changes: 3 additions & 1 deletion pyiceberg/table/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,9 @@ class TableMetadataCommonFields(IcebergBaseModel):
current-snapshot-id even if the refs map is null."""

# validators
transform_properties_dict_value_to_str = field_validator('properties', mode='before')(transform_dict_value_to_str)
@field_validator('properties', mode='before')
def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]:
return transform_dict_value_to_str(properties)

def snapshot_by_id(self, snapshot_id: int) -> Optional[Snapshot]:
"""Get the snapshot by snapshot_id."""
Expand Down
17 changes: 17 additions & 0 deletions tests/integration/test_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import pytest
from hive_metastore.ttypes import LockRequest, LockResponse, LockState, UnlockRequest
from pyarrow.fs import S3FileSystem
from pydantic_core import ValidationError

from pyiceberg.catalog import Catalog, load_catalog
from pyiceberg.catalog.hive import HiveCatalog, _HiveClient
Expand Down Expand Up @@ -119,6 +120,14 @@ def test_table_properties(catalog: Catalog) -> None:
table = table.transaction().remove_properties("abc").commit_transaction()
assert table.properties == DEFAULT_PROPERTIES

table = table.transaction().set_properties(abc=123).commit_transaction()
# properties are stored as strings in the iceberg spec
assert table.properties == dict(abc="123", **DEFAULT_PROPERTIES)

with pytest.raises(ValidationError) as exc_info:
table.transaction().set_properties(property_name=None).commit_transaction()
assert "None type is not a supported value in properties: property_name" in str(exc_info.value)


@pytest.mark.integration
@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')])
Expand All @@ -141,6 +150,14 @@ def test_table_properties_dict(catalog: Catalog) -> None:
table = table.transaction().remove_properties("abc").commit_transaction()
assert table.properties == DEFAULT_PROPERTIES

table = table.transaction().set_properties({"abc": 123}).commit_transaction()
# properties are stored as strings in the iceberg spec
assert table.properties == dict({"abc": "123"}, **DEFAULT_PROPERTIES)

with pytest.raises(ValidationError) as exc_info:
table.transaction().set_properties({"property_name": None}).commit_transaction()
assert "None type is not a supported value in properties: property_name" in str(exc_info.value)


@pytest.mark.integration
@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')])
Expand Down