From c65865e65ce60dbcb1489ff5a27da20f5af88a09 Mon Sep 17 00:00:00 2001 From: Oleksii Moskalenko Date: Sat, 26 Mar 2022 02:31:52 +0200 Subject: [PATCH] chore: Updating documentation for entity's join key (#2451) Signed-off-by: pyalex --- .github/workflows/master_only.yml | 2 +- docs/getting-started/concepts/entity.md | 2 +- .../concepts/feature-retrieval.md | 5 ++++- docs/getting-started/quickstart.md | 18 ++++++++++++++---- .../read-features-from-the-online-store.md | 1 + docs/tutorials/driver-stats-on-snowflake.md | 7 ++++++- 6 files changed, 27 insertions(+), 8 deletions(-) diff --git a/.github/workflows/master_only.yml b/.github/workflows/master_only.yml index 01e477d031..4c7faad298 100644 --- a/.github/workflows/master_only.yml +++ b/.github/workflows/master_only.yml @@ -205,4 +205,4 @@ jobs: make push-${{ matrix.component }}-docker REGISTRY=${REGISTRY} VERSION=${GITHUB_SHA} docker tag ${REGISTRY}/${{ matrix.component }}:${GITHUB_SHA} ${REGISTRY}/${{ matrix.component }}:develop - docker push ${REGISTRY}/${{ matrix.component }}:develop + docker push ${REGISTRY}/${{ matrix.component }}:develop \ No newline at end of file diff --git a/docs/getting-started/concepts/entity.md b/docs/getting-started/concepts/entity.md index a4db98cfd1..bc8aa2ac99 100644 --- a/docs/getting-started/concepts/entity.md +++ b/docs/getting-started/concepts/entity.md @@ -6,7 +6,7 @@ An entity is a collection of semantically related features. Users define entitie driver = Entity(name='driver', value_type=ValueType.STRING, join_key='driver_id') ``` -Entities are typically defined as part of feature views. Entities are used to identify the primary key on which feature values should be stored and retrieved. These keys are used during the lookup of feature values from the online store and the join process in point-in-time joins. It is possible to define composite entities \(more than one entity object\) in a feature view. It is also possible for feature views to have zero entities. See [feature view](feature-view.md) for more details. +Entities are typically defined as part of feature views. Entity name is used to reference the entity from a feature view definition and join key is used to identify the physical primary key on which feature values should be stored and retrieved. These keys are used during the lookup of feature values from the online store and the join process in point-in-time joins. It is possible to define composite entities \(more than one entity object\) in a feature view. It is also possible for feature views to have zero entities. See [feature view](feature-view.md) for more details. Entities should be reused across feature views. diff --git a/docs/getting-started/concepts/feature-retrieval.md b/docs/getting-started/concepts/feature-retrieval.md index dfbbd99073..bece0f5527 100644 --- a/docs/getting-started/concepts/feature-retrieval.md +++ b/docs/getting-started/concepts/feature-retrieval.md @@ -20,7 +20,10 @@ online_features = fs.get_online_features( 'driver_locations:lon', 'drivers_activity:trips_today' ], - entity_rows=[{'driver': 'driver_1001'}] + entity_rows=[ + # {join_key: entity_value} + {'driver': 'driver_1001'} + ] ) ``` diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index c067513d31..ed70f75712 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -95,14 +95,16 @@ driver_hourly_stats = FileSource( # Define an entity for the driver. You can think of entity as a primary key used to # fetch features. -driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) +# Entity has a name used for later reference (in a feature view, eg) +# and join_key to identify physical field name used in storages +driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this # data to our model online. driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", - entities=["driver_id"], + entities=["driver"], # reference entity by name ttl=Duration(seconds=86400 * 1), features=[ Feature(name="conv_rate", dtype=ValueType.FLOAT), @@ -162,14 +164,16 @@ driver_hourly_stats = FileSource( # Define an entity for the driver. You can think of entity as a primary key used to # fetch features. -driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) +# Entity has a name used for later reference (in a feature view, eg) +# and join_key to identify physical field name used in storages +driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this # data to our model online. driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", - entities=["driver_id"], + entities=["driver"], # reference entity by name ttl=Duration(seconds=86400 * 1), features=[ Feature(name="conv_rate", dtype=ValueType.FLOAT), @@ -213,8 +217,13 @@ from feast import FeatureStore # The entity dataframe is the dataframe we want to enrich with feature values entity_df = pd.DataFrame.from_dict( { + # entity's join key -> entity values "driver_id": [1001, 1002, 1003], + + # label name -> label values "label_driver_reported_satisfaction": [1, 5, 3], + + # "event_timestamp" (reserved key) -> timestamps "event_timestamp": [ datetime.now() - timedelta(minutes=11), datetime.now() - timedelta(minutes=36), @@ -320,6 +329,7 @@ feature_vector = store.get_online_features( "driver_hourly_stats:avg_daily_trips", ], entity_rows=[ + # {join_key: entity_value} {"driver_id": 1004}, {"driver_id": 1005}, ], diff --git a/docs/how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md b/docs/how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md index 16568fd62a..7b0a46239b 100644 --- a/docs/how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md +++ b/docs/how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md @@ -34,6 +34,7 @@ fs = FeatureStore(repo_path="path/to/feature/repo") online_features = fs.get_online_features( features=features, entity_rows=[ + # {join_key: entity_value, ...} {"driver_id": 1001}, {"driver_id": 1002}] ).to_dict() diff --git a/docs/tutorials/driver-stats-on-snowflake.md b/docs/tutorials/driver-stats-on-snowflake.md index 94ac109c94..01b158cb1a 100644 --- a/docs/tutorials/driver-stats-on-snowflake.md +++ b/docs/tutorials/driver-stats-on-snowflake.md @@ -124,7 +124,12 @@ fs.materialize_incremental(end_date=datetime.now()) {% code title="test.py" %} ```python online_features = fs.get_online_features( - features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], + features=features, + entity_rows=[ + # {join_key: entity_value} + {"driver_id": 1001}, + {"driver_id": 1002} + ], ).to_dict() ``` {% endcode %}