Skip to content

Commit

Permalink
Add a specific error for missing columns during materialization (#1619)
Browse files Browse the repository at this point in the history
* Add a specific error for missing columns during materialization

Signed-off-by: Achal Shah <[email protected]>

* make format

Signed-off-by: Achal Shah <[email protected]>

* Be more specific about dataframe

Signed-off-by: Achal Shah <[email protected]>

* Update error message

Signed-off-by: Achal Shah <[email protected]>

* format

Signed-off-by: Achal Shah <[email protected]>
  • Loading branch information
achals authored Jun 11, 2021
1 parent 0d7e858 commit 1fd030c
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 3 deletions.
12 changes: 12 additions & 0 deletions sdk/python/feast/errors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Set

from colorama import Fore, Style


Expand Down Expand Up @@ -75,3 +77,13 @@ def __init__(self, expected, missing):
f"The entity dataframe you have provided must contain columns {expected}, "
f"but {missing} were missing."
)


class FeastJoinKeysDuringMaterialization(Exception):
def __init__(
self, source: str, join_key_columns: Set[str], source_columns: Set[str]
):
super().__init__(
f"The DataFrame from {source} being materialized must have at least {join_key_columns} columns present, "
f"but these were missing: {join_key_columns - source_columns} "
)
6 changes: 3 additions & 3 deletions sdk/python/feast/infra/offline_stores/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytz

from feast.data_source import DataSource, FileSource
from feast.errors import FeastJoinKeysDuringMaterialization
from feast.feature_view import FeatureView
from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob
from feast.infra.provider import (
Expand Down Expand Up @@ -218,9 +219,8 @@ def pull_latest_from_table_or_query(

source_columns = set(source_df.columns)
if not set(join_key_columns).issubset(source_columns):
raise ValueError(
f"The DataFrame must have at least {set(join_key_columns)} columns present, "
f"but these were missing: {set(join_key_columns)- source_columns} "
raise FeastJoinKeysDuringMaterialization(
data_source.path, set(join_key_columns), source_columns
)

ts_columns = (
Expand Down
34 changes: 34 additions & 0 deletions sdk/python/tests/example_feature_repo_with_entity_join_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from google.protobuf.duration_pb2 import Duration

from feast import Entity, Feature, FeatureView, ValueType
from feast.data_source import FileSource

driver_hourly_stats = FileSource(
path="%PARQUET_PATH%", # placeholder to be replaced by the test
event_timestamp_column="datetime",
created_timestamp_column="created",
)


# The join key here is deliberately different from the parquet file to test the failure path.
driver = Entity(
name="driver_id",
value_type=ValueType.INT64,
description="driver id",
join_key="driver",
)


driver_hourly_stats_view = FeatureView(
name="driver_hourly_stats",
entities=["driver_id"],
ttl=Duration(seconds=86400 * 1),
features=[
Feature(name="conv_rate", dtype=ValueType.FLOAT),
Feature(name="acc_rate", dtype=ValueType.FLOAT),
Feature(name="avg_daily_trips", dtype=ValueType.INT64),
],
online=True,
input=driver_hourly_stats,
tags={},
)
23 changes: 23 additions & 0 deletions sdk/python/tests/test_e2e_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,26 @@ def test_e2e_local() -> None:
assert r.returncode == 0

_assert_online_features(store, driver_df, end_date)

# Test a failure case when the parquet file doesn't include a join key
with runner.local_repo(
get_example_repo("example_feature_repo_with_entity_join_key.py").replace(
"%PARQUET_PATH%", driver_stats_path
),
"file",
) as store:

assert store.repo_path is not None

# feast materialize
returncode, output = runner.run_with_output(
[
"materialize",
start_date.isoformat(),
(end_date - timedelta(days=7)).isoformat(),
],
cwd=Path(store.repo_path),
)

assert returncode != 0
assert "feast.errors.FeastJoinKeysDuringMaterialization" in str(output)
Empty file.

0 comments on commit 1fd030c

Please sign in to comment.