From 743a84060b4ef5063fbbac7f4605f8f2b12a95e0 Mon Sep 17 00:00:00 2001 From: quaxsze Date: Wed, 15 Nov 2023 19:01:38 +0100 Subject: [PATCH 1/7] Access rights and index dataset without file --- udata/core/dataset/models.py | 3 +++ udata/core/dataset/search.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/udata/core/dataset/models.py b/udata/core/dataset/models.py index 990dfd005..1d59650c9 100644 --- a/udata/core/dataset/models.py +++ b/udata/core/dataset/models.py @@ -106,6 +106,7 @@ PIVOTAL_DATA = 'pivotal-data' CLOSED_FORMATS = ('pdf', 'doc', 'docx', 'word', 'xls', 'excel', 'xlsx') +ACCESS_RIGHTS = ('public', 'openable', 'restricted') # Maximum acceptable Damerau-Levenshtein distance # used to guess license @@ -484,6 +485,8 @@ class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document): deleted = db.DateTimeField() archived = db.DateTimeField() + access_rights = db.StringField(choices=list(ACCESS_RIGHTS), default='public', required=True) + def __str__(self): return self.title or '' diff --git a/udata/core/dataset/search.py b/udata/core/dataset/search.py index d0c9c309f..0461c6e93 100644 --- a/udata/core/dataset/search.py +++ b/udata/core/dataset/search.py @@ -46,9 +46,11 @@ class DatasetSearch(ModelSearchAdapter): @classmethod def is_indexable(cls, dataset): + valid_access_rights = ((dataset.access_rights == 'public' and len(dataset.resources) > 0) + or dataset.access_rights == 'openable' + or dataset.access_rights == 'restricted') return (dataset.deleted is None and dataset.archived is None and - len(dataset.resources) > 0 and - not dataset.private) + not dataset.private and valid_access_rights) @classmethod def mongo_search(cls, args): From 40a21f6486d641ffe7bf3651a56a6442bcd954b4 Mon Sep 17 00:00:00 2001 From: quaxsze Date: Thu, 16 Nov 2023 19:38:55 +0100 Subject: [PATCH 2/7] add harvest --- udata/core/dataset/api_fields.py | 6 +++++- udata/core/dataset/models.py | 3 ++- udata/core/dataset/rdf.py | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/udata/core/dataset/api_fields.py b/udata/core/dataset/api_fields.py index c26dce5c7..be2474dc8 100644 --- a/udata/core/dataset/api_fields.py +++ b/udata/core/dataset/api_fields.py @@ -7,7 +7,8 @@ from .models import ( UPDATE_FREQUENCIES, RESOURCE_FILETYPES, DEFAULT_FREQUENCY, - CHECKSUM_TYPES, DEFAULT_CHECKSUM_TYPE, DEFAULT_LICENSE, RESOURCE_TYPES + CHECKSUM_TYPES, DEFAULT_CHECKSUM_TYPE, DEFAULT_LICENSE, + RESOURCE_TYPES, ACCESS_RIGHTS, DEFAULT_ACCESS_RIGHTS ) @@ -244,6 +245,9 @@ 'quality': fields.Raw(description='The dataset quality', readonly=True), 'last_update': fields.ISODateTime( description='The resources last modification date', required=True), + 'access_right': fields.String( + description='The dataset access right', required=True, + enum=list(ACCESS_RIGHTS), default=DEFAULT_ACCESS_RIGHTS), 'internal': fields.Nested( dataset_internal_fields, readonly=True, description='Site internal and specific object\'s data'), }, mask=DEFAULT_MASK) diff --git a/udata/core/dataset/models.py b/udata/core/dataset/models.py index 1d59650c9..ae00edf49 100644 --- a/udata/core/dataset/models.py +++ b/udata/core/dataset/models.py @@ -107,6 +107,7 @@ PIVOTAL_DATA = 'pivotal-data' CLOSED_FORMATS = ('pdf', 'doc', 'docx', 'word', 'xls', 'excel', 'xlsx') ACCESS_RIGHTS = ('public', 'openable', 'restricted') +DEFAULT_ACCESS_RIGHTS = 'public' # Maximum acceptable Damerau-Levenshtein distance # used to guess license @@ -485,7 +486,7 @@ class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document): deleted = db.DateTimeField() archived = db.DateTimeField() - access_rights = db.StringField(choices=list(ACCESS_RIGHTS), default='public', required=True) + access_rights = db.StringField(choices=list(ACCESS_RIGHTS), default=DEFAULT_ACCESS_RIGHTS, required=True) def __str__(self): return self.title or '' diff --git a/udata/core/dataset/rdf.py b/udata/core/dataset/rdf.py index 87016bfc8..a5eb1ddf4 100644 --- a/udata/core/dataset/rdf.py +++ b/udata/core/dataset/rdf.py @@ -194,6 +194,7 @@ def dataset_to_rdf(dataset, graph=None): d.set(DCT.description, Literal(dataset.description)) d.set(DCT.issued, Literal(dataset.created_at)) d.set(DCT.modified, Literal(dataset.last_modified)) + d.set(DCT.accessRights, Literal(dataset.access_rights)) if dataset.acronym: d.set(SKOS.altLabel, Literal(dataset.acronym)) @@ -488,6 +489,7 @@ def dataset_from_rdf(graph, dataset=None, node=None): default_license = dataset.license or License.default() dataset_license = rdf_value(d, DCT.license) dataset.license = License.guess(dataset_license, *licenses, default=default_license) + dataset.access_rights = rdf_value(d, DCT.accessRights) identifier = rdf_value(d, DCT.identifier) uri = d.identifier.toPython() if isinstance(d.identifier, URIRef) else None From 442b72a2c0055e6214a446ae9092e03866d31269 Mon Sep 17 00:00:00 2001 From: quaxsze Date: Thu, 16 Nov 2023 23:08:40 +0100 Subject: [PATCH 3/7] add test --- udata/tests/search/test_adapter.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/udata/tests/search/test_adapter.py b/udata/tests/search/test_adapter.py index 8111201fe..c1b31c4e7 100644 --- a/udata/tests/search/test_adapter.py +++ b/udata/tests/search/test_adapter.py @@ -181,3 +181,14 @@ def test_index_model_from_datetime(self, mock_req): } url = f"{current_app.config['SEARCH_SERVICE_API_URL']}/datasets/index" mock_req.assert_called_with(url, json=expected_value) + + def test_dataset_adapter(self): + public_visible_dataset = VisibleDatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='public') + public_non_visible_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='public') + openable_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='openable') + restricted_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='restricted') + + assert DatasetSearch.is_indexable(public_visible_dataset) + assert not DatasetSearch.is_indexable(public_non_visible_dataset) + assert DatasetSearch.is_indexable(openable_dataset) + assert DatasetSearch.is_indexable(restricted_dataset) \ No newline at end of file From 2d507d7262a9b8d33cdf6d2ca3d8ce06b39af177 Mon Sep 17 00:00:00 2001 From: quaxsze Date: Thu, 16 Nov 2023 23:09:08 +0100 Subject: [PATCH 4/7] add test --- udata/tests/search/test_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/udata/tests/search/test_adapter.py b/udata/tests/search/test_adapter.py index c1b31c4e7..a46198970 100644 --- a/udata/tests/search/test_adapter.py +++ b/udata/tests/search/test_adapter.py @@ -191,4 +191,4 @@ def test_dataset_adapter(self): assert DatasetSearch.is_indexable(public_visible_dataset) assert not DatasetSearch.is_indexable(public_non_visible_dataset) assert DatasetSearch.is_indexable(openable_dataset) - assert DatasetSearch.is_indexable(restricted_dataset) \ No newline at end of file + assert DatasetSearch.is_indexable(restricted_dataset) From c615d4f3425f9560f094e71ee76aefe2b9b57fed Mon Sep 17 00:00:00 2001 From: quaxsze Date: Tue, 5 Dec 2023 09:41:20 -0300 Subject: [PATCH 5/7] change wording --- udata/core/dataset/models.py | 2 +- udata/core/dataset/search.py | 2 +- udata/tests/search/test_adapter.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/udata/core/dataset/models.py b/udata/core/dataset/models.py index 4e1e4613e..d27ee4b67 100644 --- a/udata/core/dataset/models.py +++ b/udata/core/dataset/models.py @@ -106,7 +106,7 @@ PIVOTAL_DATA = 'pivotal-data' CLOSED_FORMATS = ('pdf', 'doc', 'docx', 'word', 'xls', 'excel', 'xlsx') -ACCESS_RIGHTS = ('public', 'openable', 'restricted') +ACCESS_RIGHTS = ('public', 'non-public', 'restricted') DEFAULT_ACCESS_RIGHTS = 'public' # Maximum acceptable Damerau-Levenshtein distance diff --git a/udata/core/dataset/search.py b/udata/core/dataset/search.py index 81348c868..860ea315f 100644 --- a/udata/core/dataset/search.py +++ b/udata/core/dataset/search.py @@ -47,7 +47,7 @@ class DatasetSearch(ModelSearchAdapter): @classmethod def is_indexable(cls, dataset): valid_access_rights = ((dataset.access_rights == 'public' and len(dataset.resources) > 0) - or dataset.access_rights == 'openable' + or dataset.access_rights == 'non-public' or dataset.access_rights == 'restricted') return (dataset.deleted is None and dataset.archived is None and not dataset.private and valid_access_rights) diff --git a/udata/tests/search/test_adapter.py b/udata/tests/search/test_adapter.py index a46198970..ade67cecf 100644 --- a/udata/tests/search/test_adapter.py +++ b/udata/tests/search/test_adapter.py @@ -185,10 +185,10 @@ def test_index_model_from_datetime(self, mock_req): def test_dataset_adapter(self): public_visible_dataset = VisibleDatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='public') public_non_visible_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='public') - openable_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='openable') + non_public_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='non-public') restricted_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='restricted') assert DatasetSearch.is_indexable(public_visible_dataset) assert not DatasetSearch.is_indexable(public_non_visible_dataset) - assert DatasetSearch.is_indexable(openable_dataset) + assert DatasetSearch.is_indexable(non_public_dataset) assert DatasetSearch.is_indexable(restricted_dataset) From 2cbcdc5a93f94d98624aca56f7f6a1da21cd0ebf Mon Sep 17 00:00:00 2001 From: quaxsze Date: Wed, 3 Apr 2024 14:51:29 -0300 Subject: [PATCH 6/7] fix test --- udata/tests/search/test_adapter.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/udata/tests/search/test_adapter.py b/udata/tests/search/test_adapter.py index 283274ece..a69c8f9bd 100644 --- a/udata/tests/search/test_adapter.py +++ b/udata/tests/search/test_adapter.py @@ -185,12 +185,10 @@ def test_index_model_from_datetime(self, mock_req): mock_req.assert_called_with(url, json=expected_value) def test_dataset_adapter(self): - public_visible_dataset = VisibleDatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='public') - public_non_visible_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='public') + public_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='public') non_public_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='non-public') restricted_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='restricted') - assert DatasetSearch.is_indexable(public_visible_dataset) - assert not DatasetSearch.is_indexable(public_non_visible_dataset) + assert DatasetSearch.is_indexable(public_dataset) assert DatasetSearch.is_indexable(non_public_dataset) assert DatasetSearch.is_indexable(restricted_dataset) From 41d2e8b81aef1920b9459bb1630f14357b526db4 Mon Sep 17 00:00:00 2001 From: quaxsze Date: Tue, 9 Apr 2024 11:30:36 -0300 Subject: [PATCH 7/7] fix import --- udata/core/dataset/constants.py | 2 ++ udata/core/dataset/models.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/udata/core/dataset/constants.py b/udata/core/dataset/constants.py index 2796e9e00..7a9265bba 100644 --- a/udata/core/dataset/constants.py +++ b/udata/core/dataset/constants.py @@ -70,6 +70,8 @@ PIVOTAL_DATA = 'pivotal-data' CLOSED_FORMATS = ('pdf', 'doc', 'docx', 'word', 'xls', 'excel', 'xlsx') +ACCESS_RIGHTS = ('public', 'non-public', 'restricted') +DEFAULT_ACCESS_RIGHTS = 'public' # Maximum acceptable Damerau-Levenshtein distance # used to guess license diff --git a/udata/core/dataset/models.py b/udata/core/dataset/models.py index 5949a6c6d..d652e3b0d 100644 --- a/udata/core/dataset/models.py +++ b/udata/core/dataset/models.py @@ -23,7 +23,7 @@ from udata.utils import get_by, hash_url, to_naive_datetime from udata.uris import ValidationError, endpoint_for from udata.uris import validate as validate_url -from .constants import CHECKSUM_TYPES, CLOSED_FORMATS, DEFAULT_LICENSE, LEGACY_FREQUENCIES, MAX_DISTANCE, PIVOTAL_DATA, RESOURCE_FILETYPES, RESOURCE_TYPES, SCHEMA_CACHE_DURATION, UPDATE_FREQUENCIES +from .constants import CHECKSUM_TYPES, CLOSED_FORMATS, DEFAULT_LICENSE, LEGACY_FREQUENCIES, MAX_DISTANCE, PIVOTAL_DATA, RESOURCE_FILETYPES, RESOURCE_TYPES, SCHEMA_CACHE_DURATION, UPDATE_FREQUENCIES, ACCESS_RIGHTS, DEFAULT_ACCESS_RIGHTS from .preview import get_preview_url from .exceptions import (