Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Access rights and index dataset without file #2925

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
6 changes: 5 additions & 1 deletion udata/core/dataset/api_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from .constants import (
UPDATE_FREQUENCIES, RESOURCE_FILETYPES, DEFAULT_FREQUENCY,
CHECKSUM_TYPES, DEFAULT_CHECKSUM_TYPE, DEFAULT_LICENSE, RESOURCE_TYPES
CHECKSUM_TYPES, DEFAULT_CHECKSUM_TYPE, DEFAULT_LICENSE,
RESOURCE_TYPES, ACCESS_RIGHTS, DEFAULT_ACCESS_RIGHTS
)


Expand Down Expand Up @@ -250,6 +251,9 @@
'quality': fields.Raw(description='The dataset quality', readonly=True),
'last_update': fields.ISODateTime(
description='The resources last modification date', required=True),
'access_right': fields.String(
description='The dataset access right', required=True,
enum=list(ACCESS_RIGHTS), default=DEFAULT_ACCESS_RIGHTS),
'schema': fields.Nested(
schema_fields, allow_null=True, description='Reference to the associated schema'),
'internal': fields.Nested(
Expand Down
2 changes: 2 additions & 0 deletions udata/core/dataset/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@

PIVOTAL_DATA = 'pivotal-data'
CLOSED_FORMATS = ('pdf', 'doc', 'docx', 'word', 'xls', 'excel', 'xlsx')
ACCESS_RIGHTS = ('public', 'non-public', 'restricted')
DEFAULT_ACCESS_RIGHTS = 'public'

# Maximum acceptable Damerau-Levenshtein distance
# used to guess license
Expand Down
5 changes: 4 additions & 1 deletion udata/core/dataset/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from udata.utils import get_by, hash_url, to_naive_datetime
from udata.uris import ValidationError, endpoint_for
from udata.uris import validate as validate_url
from .constants import CHECKSUM_TYPES, CLOSED_FORMATS, DEFAULT_LICENSE, LEGACY_FREQUENCIES, MAX_DISTANCE, PIVOTAL_DATA, RESOURCE_FILETYPES, RESOURCE_TYPES, SCHEMA_CACHE_DURATION, UPDATE_FREQUENCIES
from .constants import CHECKSUM_TYPES, CLOSED_FORMATS, DEFAULT_LICENSE, LEGACY_FREQUENCIES, MAX_DISTANCE, PIVOTAL_DATA, RESOURCE_FILETYPES, RESOURCE_TYPES, SCHEMA_CACHE_DURATION, UPDATE_FREQUENCIES, ACCESS_RIGHTS, DEFAULT_ACCESS_RIGHTS

from .preview import get_preview_url
from .exceptions import (
Expand All @@ -34,6 +34,7 @@

NON_ASSIGNABLE_SCHEMA_TYPES = ['datapackage']


log = logging.getLogger(__name__)


Expand Down Expand Up @@ -491,6 +492,8 @@ class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document):
deleted = db.DateTimeField()
archived = db.DateTimeField()

access_rights = db.StringField(choices=list(ACCESS_RIGHTS), default=DEFAULT_ACCESS_RIGHTS, required=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

access_rights is at the Dataset level, may not match with usage and DCAT?


def __str__(self):
return self.title or ''

Expand Down
2 changes: 2 additions & 0 deletions udata/core/dataset/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ def dataset_to_rdf(dataset, graph=None):
d.set(DCT.description, Literal(dataset.description))
d.set(DCT.issued, Literal(dataset.created_at))
d.set(DCT.modified, Literal(dataset.last_modified))
d.set(DCT.accessRights, Literal(dataset.access_rights))

if dataset.acronym:
d.set(SKOS.altLabel, Literal(dataset.acronym))
Expand Down Expand Up @@ -610,6 +611,7 @@ def dataset_from_rdf(graph: Graph, dataset=None, node=None):
default_license = dataset.license or License.default()
dataset_license = rdf_value(d, DCT.license)
dataset.license = License.guess(dataset_license, *licenses, default=default_license)
dataset.access_rights = rdf_value(d, DCT.accessRights)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should map from and towards the European vocabulary: http://publications.europa.eu/resource/authority/access-right.


identifier = rdf_value(d, DCT.identifier)
uri = d.identifier.toPython() if isinstance(d.identifier, URIRef) else None
Expand Down
5 changes: 4 additions & 1 deletion udata/core/dataset/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,11 @@ class DatasetSearch(ModelSearchAdapter):

@classmethod
def is_indexable(cls, dataset):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need to modify this anymore, see #2997

valid_access_rights = (dataset.access_rights == 'public'
or dataset.access_rights == 'non-public'
or dataset.access_rights == 'restricted')
return (dataset.deleted is None and dataset.archived is None and
not dataset.private)
not dataset.private and valid_access_rights)

@classmethod
def mongo_search(cls, args):
Expand Down
9 changes: 9 additions & 0 deletions udata/tests/search/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,12 @@ def test_index_model_from_datetime(self, mock_req):
}
url = f"{current_app.config['SEARCH_SERVICE_API_URL']}/datasets/index"
mock_req.assert_called_with(url, json=expected_value)

def test_dataset_adapter(self):
public_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='public')
non_public_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='non-public')
restricted_dataset = DatasetFactory(id='61fd30cb29ea95c7bc0e1211', access_rights='restricted')

assert DatasetSearch.is_indexable(public_dataset)
assert DatasetSearch.is_indexable(non_public_dataset)
assert DatasetSearch.is_indexable(restricted_dataset)