Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizing group selection in server get_view() #2055

Merged
merged 1 commit into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions fiftyone/core/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -1377,7 +1377,9 @@ def _get_group_media_types(self):
return self._dataset.group_media_types


def make_optimized_select_view(sample_collection, sample_ids, ordered=False):
def make_optimized_select_view(
sample_collection, sample_ids, ordered=False, groups=False
):
"""Returns a view that selects the provided sample IDs that is optimized
to reduce the document list as early as possible in the pipeline.

Expand All @@ -1393,6 +1395,7 @@ def make_optimized_select_view(sample_collection, sample_ids, ordered=False):
sample_ids: a sample ID or iterable of sample IDs to select
ordered (False): whether to sort the samples in the returned view to
match the order of the provided IDs
groups (False): whether the IDs are group IDs, not sample IDs

Returns:
a :class:`DatasetView`
Expand All @@ -1405,6 +1408,9 @@ def make_optimized_select_view(sample_collection, sample_ids, ordered=False):
# run the entire view's aggregation first and then select the samples
# of interest at the end
#
if groups:
return view.select_groups(sample_ids, ordered=ordered)

return view.select(sample_ids, ordered=ordered)

#
Expand All @@ -1421,7 +1427,13 @@ def make_optimized_select_view(sample_collection, sample_ids, ordered=False):
# that could affect our ability to select the samples of interest first,
# we'll need to account for that here...
#
optimized_view = view._dataset.select(sample_ids, ordered=ordered)
if groups:
optimized_view = view._dataset.select_groups(
sample_ids, ordered=ordered
)
else:
optimized_view = view._dataset.select(sample_ids, ordered=ordered)

for stage in view._stages:
if type(stage) not in fost._STAGES_THAT_SELECT_OR_REORDER:
optimized_view._stages.append(stage)
Expand Down
13 changes: 3 additions & 10 deletions fiftyone/server/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
| `voxel51.com <https://voxel51.com/>`_
|
"""
from bson import ObjectId

import fiftyone.core.dataset as fod
from fiftyone.core.expressions import ViewField as F, VALUE
import fiftyone.core.fields as fof
Expand All @@ -22,13 +20,6 @@
_LABEL_TAGS = "_label_tags"


def get_group(sample_collection, group_id):
id_field = sample_collection.group_field + "._id"
return sample_collection.mongo(
[{"$match": {"$expr": {"$eq": ["$" + id_field, ObjectId(group_id)]}}}]
)


def get_view(
dataset_name,
stages=None,
Expand Down Expand Up @@ -69,7 +60,9 @@ def get_view(
view.group_slice = view.default_group_slice

if sample_filter.group.id:
view = get_group(view, sample_filter.group.id)
view = fov.make_optimized_select_view(
view, sample_filter.group.id, groups=True
)

elif sample_filter.id:
view = fov.make_optimized_select_view(view, sample_filter.id)
Expand Down