Skip to content

Commit

Permalink
fix: Forcing ODFV udfs to be __main__ module and fixing false positiv…
Browse files Browse the repository at this point in the history
…e duplicate data source warning (#2677)

* fix: Forcing ODFV udfs to be __main__ module so clients don't need the udf's module. Fixing duplicate data source conflict between push source and batch source

Signed-off-by: Danny Chiao <[email protected]>

* fix

Signed-off-by: Danny Chiao <[email protected]>

* fix

Signed-off-by: Danny Chiao <[email protected]>
  • Loading branch information
adchia authored May 12, 2022
1 parent 01f9f0a commit 2ce33cd
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
7 changes: 7 additions & 0 deletions sdk/python/feast/on_demand_feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,14 @@ def on_demand_feature_view(
if not _sources:
raise ValueError("The `sources` parameter must be specified.")

def mainify(obj):
# Needed to allow dill to properly serialize the udf. Otherwise, clients will need to have a file with the same
# name as the original file defining the ODFV.
if obj.__module__ != "__main__":
obj.__module__ = "__main__"

def decorator(user_function):
mainify(user_function)
on_demand_feature_view_obj = OnDemandFeatureView(
name=user_function.__name__,
sources=_sources,
Expand Down
7 changes: 6 additions & 1 deletion sdk/python/feast/repo_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def parse_repo(repo_root: Path) -> RepoContents:
request_feature_views=[],
)

data_sources_set = set()
for repo_file in get_repo_files(repo_root):
module_path = py_path_to_module(repo_file)
module = importlib.import_module(module_path)
Expand All @@ -119,14 +120,18 @@ def parse_repo(repo_root: Path) -> RepoContents:
(obj is ds) for ds in res.data_sources
):
res.data_sources.append(obj)
data_sources_set.add(obj)
if isinstance(obj, FeatureView) and not any(
(obj is fv) for fv in res.feature_views
):
res.feature_views.append(obj)
if isinstance(obj.stream_source, PushSource) and not any(
(obj is ds) for ds in res.data_sources
):
res.data_sources.append(obj.stream_source.batch_source)
push_source_dep = obj.stream_source.batch_source
# Don't add if the push source's batch source is a duplicate of an existing batch source
if push_source_dep not in data_sources_set:
res.data_sources.append(push_source_dep)
elif isinstance(obj, Entity) and not any(
(obj is entity) for entity in res.entities
):
Expand Down

0 comments on commit 2ce33cd

Please sign in to comment.