From 39b6c009d76c9b3e1a4fd1911f06e6ca312e34fa Mon Sep 17 00:00:00 2001 From: Miles Adkins Date: Tue, 23 Aug 2022 23:00:52 -0500 Subject: [PATCH] fix: Updated snowflake template Signed-off-by: Miles Adkins --- .../feast/templates/snowflake/README.md | 8 +-- .../feast/templates/snowflake/bootstrap.py | 5 ++ .../feast/templates/snowflake/driver_repo.py | 58 ------------------ .../snowflake/feature_repo/__init__.py | 0 .../{example_repo.py => driver_repo.py} | 0 .../snowflake/feature_repo/feature_store.yaml | 59 ++++++++----------- .../templates/snowflake/feature_store.yaml | 28 --------- .../{feature_repo => }/test_workflow.py | 8 ++- 8 files changed, 38 insertions(+), 128 deletions(-) delete mode 100644 sdk/python/feast/templates/snowflake/driver_repo.py delete mode 100644 sdk/python/feast/templates/snowflake/feature_repo/__init__.py rename sdk/python/feast/templates/snowflake/feature_repo/{example_repo.py => driver_repo.py} (100%) delete mode 100644 sdk/python/feast/templates/snowflake/feature_store.yaml rename sdk/python/feast/templates/snowflake/{feature_repo => }/test_workflow.py (94%) diff --git a/sdk/python/feast/templates/snowflake/README.md b/sdk/python/feast/templates/snowflake/README.md index c21c6c1eac..0c950de435 100644 --- a/sdk/python/feast/templates/snowflake/README.md +++ b/sdk/python/feast/templates/snowflake/README.md @@ -2,7 +2,7 @@ A quick view of what's in this repository: * `data/` contains raw demo parquet data -* `example_repo.py` contains demo feature definitions +* `driver_repo.py` contains demo feature definitions * `feature_store.yaml` contains a demo setup configuring where data sources are * `test_workflow.py` showcases how to run all key Feast commands, including defining, retrieving, and pushing features. @@ -14,6 +14,6 @@ You can run the overall workflow with `python test_workflow.py`. 2. Setup CI/CD + dev vs staging vs prod environments to automatically update the registry as you change Feast feature definitions. See [docs](https://docs.feast.dev/how-to-guides/running-feast-in-production#1.-automatically-deploying-changes-to-your-feature-definitions). 3. (optional) Regularly scheduled materialization to power low latency feature retrieval (e.g. via Airflow). See [Batch data ingestion](https://docs.feast.dev/getting-started/concepts/data-ingestion#batch-data-ingestion) for more details. -4. (optional) Deploy feature server instances with `feast serve` to expose endpoints to retrieve online features. - - See [Python feature server](https://docs.feast.dev/reference/feature-servers/python-feature-server) for details. - - Use cases can also directly call the Feast client to fetch features as per [Feature retrieval](https://docs.feast.dev/getting-started/concepts/feature-retrieval) \ No newline at end of file +4. (optional) Deploy feature server instances with `feast serve` to expose endpoints to retrieve online features. + - See [Python feature server](https://docs.feast.dev/reference/feature-servers/python-feature-server) for details. + - Use cases can also directly call the Feast client to fetch features as per [Feature retrieval](https://docs.feast.dev/getting-started/concepts/feature-retrieval) diff --git a/sdk/python/feast/templates/snowflake/bootstrap.py b/sdk/python/feast/templates/snowflake/bootstrap.py index b478996fc9..01f4045fe7 100644 --- a/sdk/python/feast/templates/snowflake/bootstrap.py +++ b/sdk/python/feast/templates/snowflake/bootstrap.py @@ -75,6 +75,11 @@ def bootstrap(): execute_snowflake_statement( conn, f'DROP TABLE IF EXISTS "{project_name}_feast_driver_hourly_stats"' ) + execute_snowflake_statement( + conn, + f'ALTER WAREHOUSE IF EXISTS "{snowflake_warehouse}" RESUME IF SUSPENDED', + ) + write_pandas( conn, driver_df, diff --git a/sdk/python/feast/templates/snowflake/driver_repo.py b/sdk/python/feast/templates/snowflake/driver_repo.py deleted file mode 100644 index 5453e44795..0000000000 --- a/sdk/python/feast/templates/snowflake/driver_repo.py +++ /dev/null @@ -1,58 +0,0 @@ -from datetime import timedelta - -import yaml - -from feast import BatchFeatureView, Entity, FeatureService, SnowflakeSource - -# Define an entity for the driver. Entities can be thought of as primary keys used to -# retrieve features. Entities are also used to join multiple tables/views during the -# construction of feature vectors -driver = Entity( - # Name of the entity. Must be unique within a project - name="driver", - # The join keys of an entity describe the storage level field/column on which - # features can be looked up. The join keys are also used to join feature - # tables/views when building feature vectors - join_keys=["driver_id"], -) - -# Indicates a data source from which feature values can be retrieved. Sources are queried when building training -# datasets or materializing features into an online store. -project_name = yaml.safe_load(open("feature_store.yaml"))["project"] - -driver_stats_source = SnowflakeSource( - # The Snowflake table where features can be found - database=yaml.safe_load(open("feature_store.yaml"))["offline_store"]["database"], - table=f"{project_name}_feast_driver_hourly_stats", - # The event timestamp is used for point-in-time joins and for ensuring only - # features within the TTL are returned - timestamp_field="event_timestamp", - # The (optional) created timestamp is used to ensure there are no duplicate - # feature rows in the offline store or when building training datasets - created_timestamp_column="created", -) - -# Feature views are a grouping based on how features are stored in either the -# online or offline store. -driver_stats_fv = BatchFeatureView( - # The unique name of this feature view. Two feature views in a single - # project cannot have the same name - name="driver_hourly_stats", - # The list of entities specifies the keys required for joining or looking - # up features from this feature view. The reference provided in this field - # correspond to the name of a defined entity (or entities) - entities=[driver], - # The timedelta is the maximum age that each feature value may have - # relative to its lookup time. For historical features (used in training), - # TTL is relative to each timestamp provided in the entity dataframe. - # TTL also allows for eviction of keys from online stores and limits the - # amount of historical scanning required for historical feature values - # during retrieval - ttl=timedelta(weeks=52), - # Batch sources are used to find feature values. In the case of this feature - # view we will query a source table on Snowflake for driver statistics - # features - source=driver_stats_source, -) - -driver_stats_fs = FeatureService(name="driver_activity", features=[driver_stats_fv]) diff --git a/sdk/python/feast/templates/snowflake/feature_repo/__init__.py b/sdk/python/feast/templates/snowflake/feature_repo/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/sdk/python/feast/templates/snowflake/feature_repo/example_repo.py b/sdk/python/feast/templates/snowflake/feature_repo/driver_repo.py similarity index 100% rename from sdk/python/feast/templates/snowflake/feature_repo/example_repo.py rename to sdk/python/feast/templates/snowflake/feature_repo/driver_repo.py diff --git a/sdk/python/feast/templates/snowflake/feature_repo/feature_store.yaml b/sdk/python/feast/templates/snowflake/feature_repo/feature_store.yaml index d146216fca..104e6394c6 100644 --- a/sdk/python/feast/templates/snowflake/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/snowflake/feature_repo/feature_store.yaml @@ -1,39 +1,28 @@ project: my_project -# By default, the registry is a file (but can be turned into a more scalable SQL-backed registry) -# On GCP/AWS, minimally you should create a GCS/S3 bucket for a remote file registry -# See https://docs.feast.dev/getting-started/concepts/registry for details -registry: data/registry.db -provider: gcp +registry: registry.db +provider: local offline_store: - type: snowflake.offline - account: SNOWFLAKE_DEPLOYMENT_URL - user: SNOWFLAKE_USER - password: SNOWFLAKE_PASSWORD - role: SNOWFLAKE_ROLE - warehouse: SNOWFLAKE_WAREHOUSE - database: SNOWFLAKE_DATABASE + type: snowflake.offline + account: SNOWFLAKE_DEPLOYMENT_URL + user: SNOWFLAKE_USER + password: SNOWFLAKE_PASSWORD + role: SNOWFLAKE_ROLE + warehouse: SNOWFLAKE_WAREHOUSE + database: SNOWFLAKE_DATABASE +batch_engine: + type: snowflake.engine + account: SNOWFLAKE_DEPLOYMENT_URL + user: SNOWFLAKE_USER + password: SNOWFLAKE_PASSWORD + role: SNOWFLAKE_ROLE + warehouse: SNOWFLAKE_WAREHOUSE + database: SNOWFLAKE_DATABASE online_store: - type: snowflake.online - account: SNOWFLAKE_DEPLOYMENT_URL - user: SNOWFLAKE_USER - password: SNOWFLAKE_PASSWORD - role: SNOWFLAKE_ROLE - warehouse: SNOWFLAKE_WAREHOUSE - database: SNOWFLAKE_DATABASE -# Use GCP Datastore -# See https://docs.feast.dev/reference/online-stores/datastore -#online_store: -# type: datastore -# project_id: my_gcp_project -# namespace: my_datastore_namespace -# Use AWS DynamoDB -# See https://docs.feast.dev/reference/online-stores/dynamodb -#online_store: -# type: dynamodb -# region: %AWS_REGION% -# Use Redis -# See https://docs.feast.dev/reference/online-stores/redis -#online_store: -# type: redis -# connection_string: "localhost:6379" + type: snowflake.online + account: SNOWFLAKE_DEPLOYMENT_URL + user: SNOWFLAKE_USER + password: SNOWFLAKE_PASSWORD + role: SNOWFLAKE_ROLE + warehouse: SNOWFLAKE_WAREHOUSE + database: SNOWFLAKE_DATABASE entity_key_serialization_version: 2 diff --git a/sdk/python/feast/templates/snowflake/feature_store.yaml b/sdk/python/feast/templates/snowflake/feature_store.yaml deleted file mode 100644 index 104e6394c6..0000000000 --- a/sdk/python/feast/templates/snowflake/feature_store.yaml +++ /dev/null @@ -1,28 +0,0 @@ -project: my_project -registry: registry.db -provider: local -offline_store: - type: snowflake.offline - account: SNOWFLAKE_DEPLOYMENT_URL - user: SNOWFLAKE_USER - password: SNOWFLAKE_PASSWORD - role: SNOWFLAKE_ROLE - warehouse: SNOWFLAKE_WAREHOUSE - database: SNOWFLAKE_DATABASE -batch_engine: - type: snowflake.engine - account: SNOWFLAKE_DEPLOYMENT_URL - user: SNOWFLAKE_USER - password: SNOWFLAKE_PASSWORD - role: SNOWFLAKE_ROLE - warehouse: SNOWFLAKE_WAREHOUSE - database: SNOWFLAKE_DATABASE -online_store: - type: snowflake.online - account: SNOWFLAKE_DEPLOYMENT_URL - user: SNOWFLAKE_USER - password: SNOWFLAKE_PASSWORD - role: SNOWFLAKE_ROLE - warehouse: SNOWFLAKE_WAREHOUSE - database: SNOWFLAKE_DATABASE -entity_key_serialization_version: 2 diff --git a/sdk/python/feast/templates/snowflake/feature_repo/test_workflow.py b/sdk/python/feast/templates/snowflake/test_workflow.py similarity index 94% rename from sdk/python/feast/templates/snowflake/feature_repo/test_workflow.py rename to sdk/python/feast/templates/snowflake/test_workflow.py index 02a746bdb2..6f5e33622a 100644 --- a/sdk/python/feast/templates/snowflake/feature_repo/test_workflow.py +++ b/sdk/python/feast/templates/snowflake/test_workflow.py @@ -8,9 +8,10 @@ def run_demo(): - store = FeatureStore(repo_path=".") + store = FeatureStore(repo_path="./feature_repo") print("\n--- Run feast apply to setup feature store on Snowflake ---") - subprocess.run(["feast", "apply"]) + command = "cd feature_repo; feast apply" + subprocess.run(command, shell=True) print("\n--- Historical features for training ---") fetch_historical_features_entity_df(store, for_batch_scoring=False) @@ -49,7 +50,8 @@ def run_demo(): fetch_online_features(store, use_feature_service=True) print("\n--- Run feast teardown ---") - subprocess.run(["feast", "teardown"]) + command = "cd feature_repo; feast teardown" + subprocess.run(command, shell=True) def fetch_historical_features_entity_df(store: FeatureStore, for_batch_scoring: bool):