From 25cce676056dede4f7a5918c704bc608918991be Mon Sep 17 00:00:00 2001 From: brimoor Date: Sat, 8 Apr 2023 22:37:13 -0400 Subject: [PATCH 1/3] revalidating view stages upon reload --- fiftyone/core/clips.py | 7 +++++-- fiftyone/core/patches.py | 7 +++++-- fiftyone/core/video.py | 7 +++++-- fiftyone/core/view.py | 6 +++++- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/fiftyone/core/clips.py b/fiftyone/core/clips.py index 29e3d7691e..859be8d537 100644 --- a/fiftyone/core/clips.py +++ b/fiftyone/core/clips.py @@ -284,7 +284,7 @@ def keep_fields(self): super().keep_fields() def reload(self): - """Reloads this view from the source collection in the database. + """Reloads the view. Note that :class:`ClipView` instances are not singletons, so any in-memory clips extracted from this view will not be updated by calling @@ -298,11 +298,14 @@ def reload(self): # This assumes that calling `load_view()` when the current clips # dataset has been deleted will cause a new one to be generated # - self._clips_dataset.delete() _view = self._clips_stage.load_view(self._source_collection) self._clips_dataset = _view._clips_dataset + view = self._base_view + for stage in self._stages: + view = view.add_stage(stage) + def _sync_source_sample(self, sample): if not self._classification_field: return diff --git a/fiftyone/core/patches.py b/fiftyone/core/patches.py index ccb510d7c6..db5ab40996 100644 --- a/fiftyone/core/patches.py +++ b/fiftyone/core/patches.py @@ -297,7 +297,7 @@ def keep_fields(self): super().keep_fields() def reload(self): - """Reloads this view from the source collection in the database. + """Reloads the view. Note that :class:`PatchView` instances are not singletons, so any in-memory patches extracted from this view will not be updated by @@ -311,11 +311,14 @@ def reload(self): # This assumes that calling `load_view()` when the current patches # dataset has been deleted will cause a new one to be generated # - self._patches_dataset.delete() _view = self._patches_stage.load_view(self._source_collection) self._patches_dataset = _view._patches_dataset + view = self._base_view + for stage in self._stages: + view = view.add_stage(stage) + def _sync_source_sample(self, sample): for field in self._label_fields: self._sync_source_sample_field(sample, field) diff --git a/fiftyone/core/video.py b/fiftyone/core/video.py index 142d2b2b30..d57fb56e35 100644 --- a/fiftyone/core/video.py +++ b/fiftyone/core/video.py @@ -258,7 +258,7 @@ def keep_fields(self): super().keep_fields() def reload(self): - """Reloads this view from the source collection in the database. + """Reloads the view. Note that :class:`FrameView` instances are not singletons, so any in-memory frames extracted from this view will not be updated by @@ -272,11 +272,14 @@ def reload(self): # This assumes that calling `load_view()` when the current patches # dataset has been deleted will cause a new one to be generated # - self._frames_dataset.delete() _view = self._frames_stage.load_view(self._source_collection) self._frames_dataset = _view._frames_dataset + view = self._base_view + for stage in self._stages: + view = view.add_stage(stage) + def _set_labels(self, field_name, sample_ids, label_docs): super()._set_labels(field_name, sample_ids, label_docs) diff --git a/fiftyone/core/view.py b/fiftyone/core/view.py index 7ff6ccbd7f..918b8591f7 100644 --- a/fiftyone/core/view.py +++ b/fiftyone/core/view.py @@ -1100,7 +1100,7 @@ def clone(self, name=None, persistent=False): ) def reload(self): - """Reloads the underlying dataset from the database. + """Reloads the view. Note that :class:`fiftyone.core.sample.SampleView` instances are not singletons, so any in-memory samples extracted from this view will not @@ -1108,6 +1108,10 @@ def reload(self): """ self._dataset.reload() + view = self._base_view + for stage in self._stages: + view = view.add_stage(stage) + def to_dict( self, rel_dir=None, From 0764c54f5ac4a28743f1b065ec3c21e0005fbf1e Mon Sep 17 00:00:00 2001 From: brimoor Date: Sat, 8 Apr 2023 22:37:38 -0400 Subject: [PATCH 2/3] adding more reload() unit tests --- tests/unittests/similarity_tests.py | 46 ++++++++++++++++++++++------- tests/unittests/view_tests.py | 29 ++++++++++++++++++ 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/tests/unittests/similarity_tests.py b/tests/unittests/similarity_tests.py index 458355e1a9..d43ece9758 100644 --- a/tests/unittests/similarity_tests.py +++ b/tests/unittests/similarity_tests.py @@ -99,19 +99,32 @@ def test_image_similarity(self): query_id = dataset.first().id view1 = dataset.sort_by_similarity(query_id) + values1 = view1.values("id") + view2 = dataset.sort_by_similarity(query_id, reverse=True) + values2 = view2.values("id") - self.assertEqual( - view1.values("id"), list(reversed(view2.values("id"))) - ) + self.assertListEqual(values2, values1[::-1]) view3 = dataset.sort_by_similarity(query_id, k=4) + values3 = view3.values("id") - self.assertEqual(len(view3), 4) + self.assertListEqual(values3, values1[:4]) view4 = dataset.sort_by_similarity(query_id, brain_key="img_sim") + values4 = view4.values("id") + + self.assertListEqual(values4, values1) + + view5 = view4.limit(2) + values5 = view5.values("id") + + self.assertListEqual(values5, values1[:2]) + + view5.reload() + values5 = view5.values("id") - self.assertEqual(view1.values("id"), view4.values("id")) + self.assertListEqual(values5, values1[:2]) @drop_datasets def test_object_similarity(self): @@ -163,19 +176,32 @@ def test_object_similarity(self): patches = dataset.to_patches("ground_truth") view1 = patches.sort_by_similarity(query_id) + values1 = view1.values("id") + view2 = patches.sort_by_similarity(query_id, reverse=True) + values2 = view2.values("id") - self.assertEqual( - view1.values("id"), list(reversed(view2.values("id"))) - ) + self.assertListEqual(values2, values1[::-1]) view3 = patches.sort_by_similarity(query_id, k=4) + values3 = view3.values("id") - self.assertEqual(len(view3), 4) + self.assertListEqual(values3, values1[:4]) view4 = patches.sort_by_similarity(query_id, brain_key="obj_sim") + values4 = view4.values("id") + + self.assertEqual(values4, values1) + + view5 = view4.limit(2) + values5 = view5.values("id") + + self.assertListEqual(values5, values1[:2]) + + view5.reload() + values5 = view5.values("id") - self.assertEqual(view1.values("id"), view4.values("id")) + self.assertListEqual(values5, values1[:2]) if __name__ == "__main__": diff --git a/tests/unittests/view_tests.py b/tests/unittests/view_tests.py index 6cdf59f874..62b73ece51 100644 --- a/tests/unittests/view_tests.py +++ b/tests/unittests/view_tests.py @@ -281,6 +281,35 @@ def test_view_name_readonly(self): with self.assertRaises(AttributeError): view.name = "new_name" + @drop_datasets + def test_reload(self): + dataset = fo.Dataset() + dataset.add_samples( + [ + fo.Sample(filepath="image1.jpg", foo="bar"), + fo.Sample(filepath="image2.jpg", spam="eggs"), + fo.Sample(filepath="image3.jpg"), + fo.Sample(filepath="image4.jpg"), + fo.Sample(filepath="image5.jpg"), + ] + ) + + view = dataset.take(3).sort_by("filepath").select_fields("foo") + sample_ids = view.values("id") + + # Reloading should not cause dataset-independent view stage parameters + # like Take's internal random seed to be changed + view.reload() + same_sample_ids = view.values("id") + + self.assertListEqual(sample_ids, same_sample_ids) + + dataset.delete_sample_field("foo") + + # Field `foo` no longer exists, so validation should fail on reload + with self.assertRaises(ValueError): + view.reload() + class ViewFieldTests(unittest.TestCase): @skip_windows # TODO: don't skip on Windows From 80785a4f1879c1200f3e9c74b02d0e56e6f0074c Mon Sep 17 00:00:00 2001 From: brimoor Date: Sat, 8 Apr 2023 22:46:03 -0400 Subject: [PATCH 3/3] linting --- fiftyone/core/clips.py | 4 ++-- fiftyone/core/patches.py | 4 ++-- fiftyone/core/video.py | 4 ++-- fiftyone/core/view.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fiftyone/core/clips.py b/fiftyone/core/clips.py index 859be8d537..eb8a89d6df 100644 --- a/fiftyone/core/clips.py +++ b/fiftyone/core/clips.py @@ -302,9 +302,9 @@ def reload(self): _view = self._clips_stage.load_view(self._source_collection) self._clips_dataset = _view._clips_dataset - view = self._base_view + _view = self._base_view for stage in self._stages: - view = view.add_stage(stage) + _view = _view.add_stage(stage) def _sync_source_sample(self, sample): if not self._classification_field: diff --git a/fiftyone/core/patches.py b/fiftyone/core/patches.py index db5ab40996..8736a23646 100644 --- a/fiftyone/core/patches.py +++ b/fiftyone/core/patches.py @@ -315,9 +315,9 @@ def reload(self): _view = self._patches_stage.load_view(self._source_collection) self._patches_dataset = _view._patches_dataset - view = self._base_view + _view = self._base_view for stage in self._stages: - view = view.add_stage(stage) + _view = _view.add_stage(stage) def _sync_source_sample(self, sample): for field in self._label_fields: diff --git a/fiftyone/core/video.py b/fiftyone/core/video.py index d57fb56e35..29d199fa35 100644 --- a/fiftyone/core/video.py +++ b/fiftyone/core/video.py @@ -276,9 +276,9 @@ def reload(self): _view = self._frames_stage.load_view(self._source_collection) self._frames_dataset = _view._frames_dataset - view = self._base_view + _view = self._base_view for stage in self._stages: - view = view.add_stage(stage) + _view = _view.add_stage(stage) def _set_labels(self, field_name, sample_ids, label_docs): super()._set_labels(field_name, sample_ids, label_docs) diff --git a/fiftyone/core/view.py b/fiftyone/core/view.py index 918b8591f7..969d41c543 100644 --- a/fiftyone/core/view.py +++ b/fiftyone/core/view.py @@ -1108,9 +1108,9 @@ def reload(self): """ self._dataset.reload() - view = self._base_view + _view = self._base_view for stage in self._stages: - view = view.add_stage(stage) + _view = _view.add_stage(stage) def to_dict( self,