Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Switch from join_key to join_keys in tests and docs #2580

Merged
merged 3 commits into from
Apr 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/getting-started/concepts/entity.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
An entity is a collection of semantically related features. Users define entities to map to the domain of their use case. For example, a ride-hailing service could have customers and drivers as their entities, which group related features that correspond to these customers and drivers.

```python
driver = Entity(name='driver', value_type=ValueType.STRING, join_key='driver_id')
driver = Entity(name='driver', value_type=ValueType.STRING, join_keys=['driver_id'])
```

Entities are typically defined as part of feature views. Entity name is used to reference the entity from a feature view definition and join key is used to identify the physical primary key on which feature values should be stored and retrieved. These keys are used during the lookup of feature values from the online store and the join process in point-in-time joins. It is possible to define composite entities \(more than one entity object\) in a feature view. It is also possible for feature views to have zero entities. See [feature view](feature-view.md) for more details.
Expand Down
2 changes: 1 addition & 1 deletion docs/getting-started/concepts/feature-view.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ It is suggested that you dynamically specify the new FeatureView name using `.wi
from feast import BigQuerySource, Entity, FeatureView, Field, ValueType
from feast.types import Int32

location = Entity(name="location", join_key="location_id", value_type=ValueType.INT64)
location = Entity(name="location", join_keys=["location_id"], value_type=ValueType.INT64)

location_stats_fv= FeatureView(
name="location_stats",
Expand Down
4 changes: 2 additions & 2 deletions docs/getting-started/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ driver_hourly_stats = FileSource(
# fetch features.
# Entity has a name used for later reference (in a feature view, eg)
# and join_key to identify physical field name used in storages
driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",)
driver = Entity(name="driver", value_type=ValueType.INT64, join_keys=["driver_id"], description="driver id",)

# Our parquet files contain sample data that includes a driver_id column, timestamps and
# three feature column. Here we define a Feature View that will allow us to serve this
Expand Down Expand Up @@ -168,7 +168,7 @@ driver_hourly_stats = FileSource(
# fetch features.
# Entity has a name used for later reference (in a feature view, eg)
# and join_key to identify physical field name used in storages
driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",)
driver = Entity(name="driver", value_type=ValueType.INT64, join_keys=["driver_id"], description="driver id",)

# Our parquet files contain sample data that includes a driver_id column, timestamps and
# three feature column. Here we define a Feature View that will allow us to serve this
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/validating-historical-features.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ batch_source = FileSource(


```python
taxi_entity = Entity(name='taxi', join_key='taxi_id')
taxi_entity = Entity(name='taxi', join_keys=['taxi_id'])
```


Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
DUMMY_ENTITY_NAME = "__dummy"
DUMMY_ENTITY_VAL = ""
DUMMY_ENTITY = Entity(
name=DUMMY_ENTITY_NAME, join_key=DUMMY_ENTITY_ID, value_type=ValueType.STRING,
name=DUMMY_ENTITY_NAME, join_keys=[DUMMY_ENTITY_ID], value_type=ValueType.STRING,
)


Expand Down
4 changes: 3 additions & 1 deletion sdk/python/feast/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def update_entities_with_inferred_types_from_feature_views(
if not (incomplete_entities_keys & set(view.entities)):
continue # skip if view doesn't contain any entities that need inference

col_names_and_types = view.batch_source.get_table_column_names_and_types(config)
col_names_and_types = list(
view.batch_source.get_table_column_names_and_types(config)
)
for entity_name in view.entities:
if entity_name in incomplete_entities:
entity = incomplete_entities[entity_name]
Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/templates/aws/driver_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
driver = Entity(
# Name of the entity. Must be unique within a project
name="driver",
# The join key of an entity describes the storage level field/column on which
# features can be looked up. The join key is also used to join feature
# The join keys of an entity describe the storage level field/column on which
# features can be looked up. The join keys are also used to join feature
# tables/views when building feature vectors
join_key="driver_id",
join_keys=["driver_id"],
# The storage level type for an entity
value_type=ValueType.INT64,
)
Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/templates/gcp/driver_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
driver = Entity(
# Name of the entity. Must be unique within a project
name="driver",
# The join key of an entity describes the storage level field/column on which
# features can be looked up. The join key is also used to join feature
# The join keys of an entity describe the storage level field/column on which
# features can be looked up. The join keys are also used to join feature
# tables/views when building feature vectors
join_key="driver_id",
join_keys=["driver_id"],
# The storage level type for an entity
value_type=ValueType.INT64,
)
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/templates/local/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

# Define an entity for the driver. You can think of entity as a primary key used to
# fetch features.
driver = Entity(name="driver", join_key="driver_id", value_type=ValueType.INT64,)
driver = Entity(name="driver", join_keys=["driver_id"], value_type=ValueType.INT64,)

# Our parquet files contain sample data that includes a driver_id column, timestamps and
# three feature column. Here we define a Feature View that will allow us to serve this
Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/templates/snowflake/driver_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
driver = Entity(
# Name of the entity. Must be unique within a project
name="driver",
# The join key of an entity describes the storage level field/column on which
# features can be looked up. The join key is also used to join feature
# The join keys of an entity describe the storage level field/column on which
# features can be looked up. The join keys are also used to join feature
# tables/views when building feature vectors
join_key="driver_id",
join_keys=["driver_id"],
)

# Indicates a data source from which feature values can be retrieved. Sources are queried when building training
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/tests/example_repos/example_feature_repo_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@

driver = Entity(
name="driver", # The name is derived from this argument, not object name.
join_key="driver_id",
join_keys=["driver_id"],
value_type=ValueType.INT64,
description="driver id",
)

customer = Entity(
name="customer", # The name is derived from this argument, not object name.
join_key="customer_id",
join_keys=["customer_id"],
value_type=ValueType.STRING,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
name="driver_id",
value_type=ValueType.INT64,
description="driver id",
join_key="driver",
join_keys=["driver"],
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def driver(value_type: ValueType = ValueType.INT64):
name="driver", # The name is derived from this argument, not object name.
value_type=value_type,
description="driver id",
join_key="driver_id",
join_keys=["driver_id"],
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ def test_historical_features_from_bigquery_sources_containing_backfills(environm
created_timestamp_column="created",
)

driver = Entity(name="driver", join_key="driver_id", value_type=ValueType.INT64)
driver = Entity(name="driver", join_keys=["driver_id"], value_type=ValueType.INT64)
driver_fv = FeatureView(
name="driver_stats",
entities=["driver"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,9 @@ def test_feature_view_inference_success(test_feature_store, dataframe_source):
with prep_file_source(
df=dataframe_source, event_timestamp_column="ts_1"
) as file_source:
entity = Entity(name="id", join_key="id_join_key", value_type=ValueType.INT64)
entity = Entity(
name="id", join_keys=["id_join_key"], value_type=ValueType.INT64
)

fv1 = FeatureView(
name="fv1",
Expand Down Expand Up @@ -436,7 +438,7 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source):
df=dataframe_source, event_timestamp_column="ts_1"
) as file_source:

e = Entity(name="id", join_key="id_join_key", value_type=ValueType.STRING)
e = Entity(name="id", join_keys=["id_join_key"], value_type=ValueType.STRING)

# Create Feature View
fv1 = FeatureView(
Expand Down
14 changes: 7 additions & 7 deletions sdk/python/tests/integration/registration/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def test_update_entities_with_inferred_types_from_feature_views(
name="fv2", entities=["id"], batch_source=file_source_2, ttl=None,
)

actual_1 = Entity(name="id", join_key="id_join_key")
actual_2 = Entity(name="id", join_key="id_join_key")
actual_1 = Entity(name="id", join_keys=["id_join_key"])
actual_2 = Entity(name="id", join_keys=["id_join_key"])

update_entities_with_inferred_types_from_feature_views(
[actual_1], [fv1], RepoConfig(provider="local", project="test")
Expand All @@ -64,16 +64,16 @@ def test_update_entities_with_inferred_types_from_feature_views(
[actual_2], [fv2], RepoConfig(provider="local", project="test")
)
assert actual_1 == Entity(
name="id", join_key="id_join_key", value_type=ValueType.INT64
name="id", join_keys=["id_join_key"], value_type=ValueType.INT64
)
assert actual_2 == Entity(
name="id", join_key="id_join_key", value_type=ValueType.STRING
name="id", join_keys=["id_join_key"], value_type=ValueType.STRING
)

with pytest.raises(RegistryInferenceFailure):
# two viable data types
update_entities_with_inferred_types_from_feature_views(
[Entity(name="id", join_key="id_join_key")],
[Entity(name="id", join_keys=["id_join_key"])],
[fv1, fv2],
RepoConfig(provider="local", project="test"),
)
Expand Down Expand Up @@ -289,8 +289,8 @@ def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame:

def test_update_feature_views_with_inferred_features():
file_source = FileSource(name="test", path="test path")
entity1 = Entity(name="test1", join_key="test_column_1")
entity2 = Entity(name="test2", join_key="test_column_2")
entity1 = Entity(name="test1", join_keys=["test_column_1"])
entity2 = Entity(name="test2", join_keys=["test_column_2"])
feature_view_1 = FeatureView(
name="test1",
entities=[entity1],
Expand Down