From ae19b2fe3fc0482de0404f6f5b947666dcd68c6c Mon Sep 17 00:00:00 2001 From: David Y Liu <7172604+mavysavydav@users.noreply.github.com> Date: Fri, 25 Mar 2022 17:23:00 -0700 Subject: [PATCH 1/5] Preserve ordering of features in _get_column_names --- sdk/python/feast/infra/provider.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index 4441b77c64..a820085bce 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -290,9 +290,10 @@ def _get_column_names( # We need to exclude join keys and timestamp columns from the list of features, after they are mapped to # their final column names via the `field_mapping` field of the source. - _feature_names = set(feature_names) - set(join_keys) - _feature_names = _feature_names - {event_timestamp_column, created_timestamp_column} - feature_names = list(_feature_names) + feature_names = [name for name in feature_names + if name not in join_keys + and not event_timestamp_column + and not created_timestamp_column] return ( join_keys, feature_names, From 2e179d0ec23c019891b1218371b5ffb3f7cbd561 Mon Sep 17 00:00:00 2001 From: David Y Liu Date: Wed, 30 Mar 2022 20:55:38 -0700 Subject: [PATCH 2/5] Correction + unit test Signed-off-by: David Y Liu --- sdk/python/feast/infra/provider.py | 4 +- .../test_dynamodb_online_store.py | 57 ------------------- 2 files changed, 2 insertions(+), 59 deletions(-) delete mode 100644 sdk/python/tests/unit/online_store/test_dynamodb_online_store.py diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index a820085bce..f4bb3f6b28 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -292,8 +292,8 @@ def _get_column_names( # their final column names via the `field_mapping` field of the source. feature_names = [name for name in feature_names if name not in join_keys - and not event_timestamp_column - and not created_timestamp_column] + and name != event_timestamp_column + and name != created_timestamp_column] return ( join_keys, feature_names, diff --git a/sdk/python/tests/unit/online_store/test_dynamodb_online_store.py b/sdk/python/tests/unit/online_store/test_dynamodb_online_store.py deleted file mode 100644 index 0f42230ef5..0000000000 --- a/sdk/python/tests/unit/online_store/test_dynamodb_online_store.py +++ /dev/null @@ -1,57 +0,0 @@ -from dataclasses import dataclass - -import pytest -from moto import mock_dynamodb2 - -from feast.infra.offline_stores.file import FileOfflineStoreConfig -from feast.infra.online_stores.dynamodb import ( - DynamoDBOnlineStore, - DynamoDBOnlineStoreConfig, -) -from feast.repo_config import RepoConfig -from tests.utils.online_store_utils import ( - _create_n_customer_test_samples, - _create_test_table, - _insert_data_test_table, -) - -REGISTRY = "s3://test_registry/registry.db" -PROJECT = "test_aws" -PROVIDER = "aws" -TABLE_NAME = "dynamodb_online_store" -REGION = "us-west-2" - - -@dataclass -class MockFeatureView: - name: str - - -@pytest.fixture -def repo_config(): - return RepoConfig( - registry=REGISTRY, - project=PROJECT, - provider=PROVIDER, - online_store=DynamoDBOnlineStoreConfig(region=REGION), - offline_store=FileOfflineStoreConfig(), - ) - - -@mock_dynamodb2 -@pytest.mark.parametrize("n_samples", [5, 50, 100]) -def test_online_read(repo_config, n_samples): - """Test DynamoDBOnlineStore online_read method.""" - _create_test_table(PROJECT, f"{TABLE_NAME}_{n_samples}", REGION) - data = _create_n_customer_test_samples(n=n_samples) - _insert_data_test_table(data, PROJECT, f"{TABLE_NAME}_{n_samples}", REGION) - - entity_keys, features = zip(*data) - dynamodb_store = DynamoDBOnlineStore() - returned_items = dynamodb_store.online_read( - config=repo_config, - table=MockFeatureView(name=f"{TABLE_NAME}_{n_samples}"), - entity_keys=entity_keys, - ) - assert len(returned_items) == len(data) - assert [item[1] for item in returned_items] == list(features) From d50151aeb6e4d455b59ae59f71444666e0920501 Mon Sep 17 00:00:00 2001 From: David Y Liu Date: Wed, 30 Mar 2022 21:28:51 -0700 Subject: [PATCH 3/5] lint corrections Signed-off-by: David Y Liu --- sdk/python/feast/go_server.py | 2 +- sdk/python/feast/infra/provider.py | 11 +++++++---- sdk/python/feast/transformation_server.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/sdk/python/feast/go_server.py b/sdk/python/feast/go_server.py index 1fcbab61f0..38a350fd6d 100644 --- a/sdk/python/feast/go_server.py +++ b/sdk/python/feast/go_server.py @@ -25,10 +25,10 @@ from subprocess import Popen from typing import Any, Dict, List, Optional, Union -import grpc from tenacity import retry, stop_after_attempt, stop_after_delay, wait_exponential import feast +import grpc from feast.errors import FeatureNameCollisionError, InvalidFeaturesParameterType from feast.feature_service import FeatureService from feast.flags_helper import is_test diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index f4bb3f6b28..8e0f8c7803 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -290,10 +290,13 @@ def _get_column_names( # We need to exclude join keys and timestamp columns from the list of features, after they are mapped to # their final column names via the `field_mapping` field of the source. - feature_names = [name for name in feature_names - if name not in join_keys - and name != event_timestamp_column - and name != created_timestamp_column] + feature_names = [ + name + for name in feature_names + if name not in join_keys + and name != event_timestamp_column + and name != created_timestamp_column + ] return ( join_keys, feature_names, diff --git a/sdk/python/feast/transformation_server.py b/sdk/python/feast/transformation_server.py index 83f4af749e..8e0efd7313 100644 --- a/sdk/python/feast/transformation_server.py +++ b/sdk/python/feast/transformation_server.py @@ -2,10 +2,10 @@ import sys from concurrent import futures -import grpc import pyarrow as pa from grpc_reflection.v1alpha import reflection +import grpc from feast.errors import OnDemandFeatureViewNotFoundException from feast.feature_store import FeatureStore from feast.protos.feast.serving.TransformationService_pb2 import ( From e49b3246e4ec473ede2266f47fbe833d6ab25f0a Mon Sep 17 00:00:00 2001 From: David Y Liu Date: Wed, 30 Mar 2022 21:34:20 -0700 Subject: [PATCH 4/5] correction Signed-off-by: David Y Liu --- .../test_dynamodb_online_store.py | 57 +++++++++++++++++++ sdk/python/tests/unit/infra/test_provider.py | 47 +++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py create mode 100644 sdk/python/tests/unit/infra/test_provider.py diff --git a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py new file mode 100644 index 0000000000..0f42230ef5 --- /dev/null +++ b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py @@ -0,0 +1,57 @@ +from dataclasses import dataclass + +import pytest +from moto import mock_dynamodb2 + +from feast.infra.offline_stores.file import FileOfflineStoreConfig +from feast.infra.online_stores.dynamodb import ( + DynamoDBOnlineStore, + DynamoDBOnlineStoreConfig, +) +from feast.repo_config import RepoConfig +from tests.utils.online_store_utils import ( + _create_n_customer_test_samples, + _create_test_table, + _insert_data_test_table, +) + +REGISTRY = "s3://test_registry/registry.db" +PROJECT = "test_aws" +PROVIDER = "aws" +TABLE_NAME = "dynamodb_online_store" +REGION = "us-west-2" + + +@dataclass +class MockFeatureView: + name: str + + +@pytest.fixture +def repo_config(): + return RepoConfig( + registry=REGISTRY, + project=PROJECT, + provider=PROVIDER, + online_store=DynamoDBOnlineStoreConfig(region=REGION), + offline_store=FileOfflineStoreConfig(), + ) + + +@mock_dynamodb2 +@pytest.mark.parametrize("n_samples", [5, 50, 100]) +def test_online_read(repo_config, n_samples): + """Test DynamoDBOnlineStore online_read method.""" + _create_test_table(PROJECT, f"{TABLE_NAME}_{n_samples}", REGION) + data = _create_n_customer_test_samples(n=n_samples) + _insert_data_test_table(data, PROJECT, f"{TABLE_NAME}_{n_samples}", REGION) + + entity_keys, features = zip(*data) + dynamodb_store = DynamoDBOnlineStore() + returned_items = dynamodb_store.online_read( + config=repo_config, + table=MockFeatureView(name=f"{TABLE_NAME}_{n_samples}"), + entity_keys=entity_keys, + ) + assert len(returned_items) == len(data) + assert [item[1] for item in returned_items] == list(features) diff --git a/sdk/python/tests/unit/infra/test_provider.py b/sdk/python/tests/unit/infra/test_provider.py new file mode 100644 index 0000000000..08337ea74e --- /dev/null +++ b/sdk/python/tests/unit/infra/test_provider.py @@ -0,0 +1,47 @@ +# Copyright 2020 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import timedelta + +from feast import BigQuerySource +from feast.entity import Entity +from feast.feature import Feature +from feast.feature_view import FeatureView +from feast.infra.provider import _get_column_names +from feast.value_type import ValueType + + +def test_get_column_names_preserves_feature_ordering(): + entity = Entity("my-entity", description="My entity", value_type=ValueType.STRING) + fv = FeatureView( + name="my-fv", + entities=["my-entity"], + ttl=timedelta(days=1), + batch_source=BigQuerySource(table="non-existent-mock"), + features=[ + Feature(name="a", dtype=ValueType.STRING), + Feature(name="b", dtype=ValueType.STRING), + Feature(name="c", dtype=ValueType.STRING), + Feature(name="d", dtype=ValueType.STRING), + Feature(name="e", dtype=ValueType.STRING), + Feature(name="f", dtype=ValueType.STRING), + Feature(name="g", dtype=ValueType.STRING), + Feature(name="h", dtype=ValueType.STRING), + Feature(name="i", dtype=ValueType.STRING), + Feature(name="j", dtype=ValueType.STRING), + ], + ) + + _, feature_list, _, _ = _get_column_names(fv, [entity]) + assert feature_list == ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] From 363113d006f83953a0b8a22d7e458699998311e3 Mon Sep 17 00:00:00 2001 From: David Y Liu Date: Wed, 30 Mar 2022 21:49:30 -0700 Subject: [PATCH 5/5] my 'make lint' seems to work differently than the ones in the PR tests. Reverting lint changes Signed-off-by: David Y Liu --- sdk/python/feast/go_server.py | 2 +- sdk/python/feast/transformation_server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/go_server.py b/sdk/python/feast/go_server.py index 38a350fd6d..1fcbab61f0 100644 --- a/sdk/python/feast/go_server.py +++ b/sdk/python/feast/go_server.py @@ -25,10 +25,10 @@ from subprocess import Popen from typing import Any, Dict, List, Optional, Union +import grpc from tenacity import retry, stop_after_attempt, stop_after_delay, wait_exponential import feast -import grpc from feast.errors import FeatureNameCollisionError, InvalidFeaturesParameterType from feast.feature_service import FeatureService from feast.flags_helper import is_test diff --git a/sdk/python/feast/transformation_server.py b/sdk/python/feast/transformation_server.py index 8e0efd7313..83f4af749e 100644 --- a/sdk/python/feast/transformation_server.py +++ b/sdk/python/feast/transformation_server.py @@ -2,10 +2,10 @@ import sys from concurrent import futures +import grpc import pyarrow as pa from grpc_reflection.v1alpha import reflection -import grpc from feast.errors import OnDemandFeatureViewNotFoundException from feast.feature_store import FeatureStore from feast.protos.feast.serving.TransformationService_pb2 import (