Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

294 define codelist in qubeconfig #460

Merged
merged 81 commits into from
May 27, 2022
Merged
Show file tree
Hide file tree
Changes from 79 commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
49eaca8
WIP json schema for codelists
Apr 26, 2022
56d51b0
Updates to the json schema
Apr 26, 2022
52d31fc
Updates to the code list schema
Apr 26, 2022
51efae4
WIP json schema for the code list
Apr 26, 2022
4f5fee2
Added themes and keywords
Apr 26, 2022
67f8a28
Developed the code list schema file
Apr 27, 2022
83336ad
Minor updates
Apr 27, 2022
ff22025
Updates to the date formats
Apr 27, 2022
6288b48
Removed duplication from the json schema
Apr 27, 2022
22ea209
Schema description changes
Apr 27, 2022
880a35e
Force updating json schema
Apr 27, 2022
ba24a20
Reuploading the full schema
Apr 27, 2022
9aa8d9f
~data set~
Apr 28, 2022
de8bee2
Only the schema is required
Apr 28, 2022
2bbcaad
Json schema sort validations
Apr 28, 2022
7f1cf61
Updates to the schema based on PR feedback
Apr 28, 2022
8c72412
Minor updates
Apr 28, 2022
d579290
Updates to code list urls
Apr 28, 2022
d16507f
Mapping codelist config dict to dataclass
May 3, 2022
6a442f8
Completed altering code for code list config
May 4, 2022
01f751d
Updates to v1.1
May 4, 2022
1aefb0d
Removed changes to v1.0
May 4, 2022
8e6aba0
Update columnschema.py
May 4, 2022
de4f562
WIP comments
May 4, 2022
b95fc23
Moved templates to the root folder
May 7, 2022
42c71a3
Updates to code list config
May 7, 2022
89223c5
Removed sort order per concept
May 7, 2022
cbd7ae3
Updates to code list config
May 7, 2022
47f8c49
Further updates to the functionality
May 9, 2022
9470476
Merge branch 'main' into 294-define-codelist-in-qubeconfig
May 9, 2022
ee8d089
Merged master
May 9, 2022
747bb65
Added unit tests
May 9, 2022
f98346a
Removed failing behave test
May 9, 2022
e6e0ab8
Updates to same as
May 9, 2022
329cf6e
WIP behave test
May 9, 2022
cd586d2
WIP behave test
May 9, 2022
c1a9802
WIP schema versioning
May 10, 2022
7564efd
Updates to schema version detection
May 10, 2022
f80bf7e
Further updates
May 10, 2022
093727e
Minor updates
May 10, 2022
d3669f7
Changed templates folder
May 10, 2022
30428dc
WIP updates to unit tests
May 10, 2022
2595aa3
Fixed all breaking tests
May 10, 2022
7fbf771
Removed redundant code
May 10, 2022
2a13442
Implemented behave test
May 10, 2022
3787b54
Code cleaning prior to review
May 10, 2022
24045f1
WIP rel paths
May 10, 2022
6644e93
Fixed failing tests
May 10, 2022
89711d5
Pyright fixes
May 10, 2022
453a9c5
Added schema urls
May 11, 2022
3f8efc2
Merge branch 'main' into 294-define-codelist-in-qubeconfig
May 11, 2022
e7f7d32
Updates to the schema urls
May 11, 2022
c67ef0c
Updates to schema version paths
May 11, 2022
f5ada50
Update cubes.py
May 11, 2022
3e169cf
PR feedback updates
May 17, 2022
a9355d9
Update catalog_metadata_reader.py
May 17, 2022
ae57488
Further PR feedback updates
May 17, 2022
5b3e0a3
Further updates based on PR feedback
May 18, 2022
dfc3a5f
Updates to enums on versions
May 18, 2022
0da5e4e
WIP sorting concepts
May 24, 2022
1cba3bc
Completed sorting for code list config
May 25, 2022
77627c9
Completed sorting feature
May 25, 2022
56ee9c0
Code commenting
May 25, 2022
17283de
Method name changes for clarity
May 25, 2022
335343c
Updated schemas to include inline code lists
May 25, 2022
f686de8
Updates
May 25, 2022
10654b7
Ability to define inline code list
May 25, 2022
730120b
Completed inline code list schema define and behave tests
May 25, 2022
fc90b58
Fixed all the broken unit tests
May 25, 2022
e3bf08e
Passing of validation errors to the build command and pyright fixes
May 25, 2022
8e2ad4f
Updates to unit tests
May 25, 2022
3c39cad
Minor updates
May 25, 2022
71ee141
Code structure updates
May 25, 2022
75014c2
Applied black formatter to code_list_config.py and improved performan…
May 26, 2022
b73c980
PR feedback updates
May 26, 2022
4100ec6
Supporting abs paths for code list config
May 26, 2022
3599a0d
WIP sort change
May 26, 2022
4c36ef6
Completed sorting
May 26, 2022
1739184
Code comment updates
May 26, 2022
d93f28f
Changed list to set for storing sort orders defined by the users
May 27, 2022
38a276c
Added unit tests for sorting
May 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions csvcubed/csvcubed/cli/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@
from csvcubed.cli.error_mapping import friendly_error_mapping
from csvcubed.models.cube import QbCube
from csvcubed.models.errorurl import HasErrorUrl
from csvcubed.models.validationerror import SpecificValidationError, ValidationError
from csvcubed.models.validationerror import ValidationError
from csvcubed.readers.cubeconfig.schema_versions import (
QubeConfigDeserialiser,
get_deserialiser_for_schema,
)
from csvcubed.readers.cubeconfig.utils import load_resource
from csvcubed.utils.json import serialize_sets
from csvcubed.utils.log import log_exception
from csvcubed.utils.qb.validation.cube import validate_qb_component_constraints
from csvcubed.writers.qbwriter import QbWriter

Expand Down
196 changes: 196 additions & 0 deletions csvcubed/csvcubed/models/codelistconfig/code_list_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
"""
Code List Config
----------------

Models for representing code list config.
"""

from typing import Optional, List, Tuple, Dict
GDonRanasinghe marked this conversation as resolved.
Show resolved Hide resolved
from dataclasses import dataclass, field
from pathlib import Path

from csvcubedmodels.dataclassbase import DataClassBase

from csvcubed.models.cube.qb.catalog import CatalogMetadata
from csvcubed.utils.json import load_json_document
from csvcubed.models.cube.qb.components import NewQbConcept
from csvcubed.readers.catalogmetadata.v1.catalog_metadata_reader import (
metadata_from_dict,
)

CODE_LIST_CONFIG_DEFAULT_URL = "https://purl.org/csv-cubed/code-list-config/v1"


@dataclass
class CodeListConfigSort(DataClassBase):
"""Model for representing the sort object in code list config."""

by: str
method: str


@dataclass
class CodeListConfigConcept(DataClassBase):
"""Model for representing a code list concept in code list config."""

label: str
notation: str
description: Optional[str] = field(default=None)
sort_order: Optional[int] = field(default=None)
same_as: Optional[str] = field(default=None)
children: List["CodeListConfigConcept"] = field(default_factory=list)


@dataclass
class CodeListConfig(DataClassBase):
"""Model for representing a code list config."""

sort: Optional[CodeListConfigSort] = field(default=None)
concepts: List[CodeListConfigConcept] = field(default_factory=list)
GDonRanasinghe marked this conversation as resolved.
Show resolved Hide resolved
schema: str = field(default=CODE_LIST_CONFIG_DEFAULT_URL)
# Using CatalogMetadata in the dataclass requires providing default_factory as otherwise, the metadata itself needs to be provided. Since we want have the metadata at initialisation, the default_factory below is defined.
metadata: CatalogMetadata = field(
GDonRanasinghe marked this conversation as resolved.
Show resolved Hide resolved
default_factory=lambda: CatalogMetadata("Metadata")
)

def __post_init__(self):
# Sorting top-level concepts.
self.concepts = self._sort_concepts(self.concepts)
self._apply_sort_to_child_concepts(self.concepts)

def _apply_sort_to_child_concepts(self, concepts: List[CodeListConfigConcept]):
"""
Sorting children in each parent concept seperately.
"""
for concept in concepts:
if any(concept.children):
concept.children = self._sort_concepts(concept.children)
self._apply_sort_to_child_concepts(concept.children)

def _assign_sort_order_to_concepts(
self,
concepts_without_sort_order: List[CodeListConfigConcept],
user_defined_sort_orders: List[int],
) -> List[CodeListConfigConcept]:
"""Assinging a sort order to concepts without sort order whilst avoiding conflicts with the sort orders already used by the user."""

sort_order: int = 0
concepts: List[CodeListConfigConcept] = []
for concept in concepts_without_sort_order:
while sort_order in user_defined_sort_orders:
sort_order += 1
concept.sort_order = sort_order
concepts.append(concept)
sort_order += 1

return concepts

def _sort_concepts(
self, concepts: List[CodeListConfigConcept]
) -> List[CodeListConfigConcept]:
"""
Sorting concepts based on the sort object and sort order defined in the code list json.
"""
# Step 1: Identify sort orders defined by the user in code list config json.
user_defined_sort_orders: List[int] = [
robons marked this conversation as resolved.
Show resolved Hide resolved
concept.sort_order for concept in concepts if concept.sort_order is not None
]

# Step 2: Identify the concepts with and without sort order.
concepts_with_sort_order: List[CodeListConfigConcept] = [
concept for concept in concepts if concept.sort_order is not None
]
concepts_without_sort_order: List[CodeListConfigConcept] = [
concept for concept in concepts if concept.sort_order is None
]

# Step 3: If the sort object is defined, concepts without a sort order will be sorted by the sort object first.
if self.sort is not None:
if self.sort.by != "label" and self.sort.by != "notation":
raise Exception(
f"Unsupported sort by {self.sort.by}. The supported options are 'label' and 'notation'."
)
if self.sort.method != "ascending" and self.sort.by != "descending":
raise Exception(
f"Unsupported sort method {self.sort.method}. The supported options are 'ascending' and 'descending'."
)

concepts_without_sort_order.sort(
key=lambda concept: (
concept.label
if self.sort and self.sort.by == "label"
else concept.notation,
),
reverse=True if self.sort.method == "descending" else False,
)

# Step 4: Fianlly, all the concepts are sorted by the sort order.
all_concepts = concepts_with_sort_order + self._assign_sort_order_to_concepts(
concepts_without_sort_order, user_defined_sort_orders
)

sorted_concepts: List[CodeListConfigConcept] = sorted(
all_concepts,
key=lambda concept: concept.sort_order is not None and concept.sort_order,
reverse=False,
)

return sorted_concepts

@classmethod
def from_json_file(cls, file_path: Path) -> Tuple["CodeListConfig", Dict]:
"""
Converts code list config json to `CodeListConfig`.
"""
code_list_dict = load_json_document(file_path)
schema = code_list_dict.get("$schema", CODE_LIST_CONFIG_DEFAULT_URL)

code_list_config = cls.from_dict(code_list_dict)
code_list_config.schema = schema
code_list_config.metadata = metadata_from_dict(code_list_dict)

return (code_list_config, code_list_dict)

@classmethod
def from_dict(cls, code_list_dict: Dict) -> "CodeListConfig":
"""
Converts code list config dict to `CodeListConfig`.
"""
schema = code_list_dict.get("$schema", CODE_LIST_CONFIG_DEFAULT_URL)

code_list_config = super().from_dict(code_list_dict)
code_list_config.schema = schema
code_list_config.metadata = metadata_from_dict(code_list_dict)

return code_list_config

@property
def new_qb_concepts(self) -> list[NewQbConcept]:
"""
Converts concepts of type CodeListConfigConcept to concepts of type NewQbConcept whilst maintaining the hierarchy.
"""

new_qb_concepts: list[NewQbConcept] = []
if self.concepts:
concepts_with_maybe_parent: list[
GDonRanasinghe marked this conversation as resolved.
Show resolved Hide resolved
Tuple[CodeListConfigConcept, Optional[CodeListConfigConcept]]
] = [(c, None) for c in self.concepts]

for (concept, maybe_parent_concept) in concepts_with_maybe_parent:
new_qb_concepts.append(
NewQbConcept(
label=concept.label,
code=concept.notation,
parent_code=maybe_parent_concept.notation
if maybe_parent_concept
else None,
sort_order=concept.sort_order,
GDonRanasinghe marked this conversation as resolved.
Show resolved Hide resolved
description=concept.description,
)
)
if any(concept.children):
concepts_with_maybe_parent += [
(child, concept) for child in concept.children
]

return new_qb_concepts
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Catalog Metadata Reader
-----------------------

Functionalities necessary for reading catalog metadata.
"""
from csvcubed.models.cube.qb.catalog import CatalogMetadata

from csvcubed.utils.datetime import parse_iso_8601_date_time
from csvcubed.utils.dict import get_with_func_or_none
from csvcubed.utils.uri import uri_safe


def metadata_from_dict(config: dict) -> CatalogMetadata:
"""
Converts dict into `CatalogMetadata`.
"""
themes = config.get("themes", [])
if themes and isinstance(themes, str):
themes = [themes]

keywords = config.get("keywords", [])
if keywords and isinstance(keywords, str):
keywords = [keywords]

return CatalogMetadata(
identifier=get_with_func_or_none(config, "id", uri_safe),
title=config["title"],
description=config.get("description", ""),
summary=config.get("summary", ""),
creator_uri=config.get("creator"),
publisher_uri=config.get("publisher"),
public_contact_point_uri=config.get("public_contact_point"),
dataset_issued=get_with_func_or_none(
config, "dataset_issued", parse_iso_8601_date_time
),
dataset_modified=get_with_func_or_none(
config, "dataset_modified", parse_iso_8601_date_time
),
license_uri=config.get("license"),
theme_uris=themes,
keywords=keywords,
# spatial_bound_uri=uri_safe(config['spatial_bound'])
# if config.get('spatial_bound') else None,
# temporal_bound_uri=uri_safe(config['temporal_bound'])
# if config.get('temporal_bound') else None,
)
60 changes: 45 additions & 15 deletions csvcubed/csvcubed/readers/cubeconfig/schema_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

Contains an enum listing the qube-config.json schema versions recognised by csvcubed.
"""

import logging
from enum import Enum
from pathlib import Path
from typing import Optional, Callable, Tuple, List
Expand All @@ -12,21 +14,35 @@

from csvcubed.models.cube import QbCube
from csvcubed.models.validationerror import ValidationError
from csvcubed.readers.cubeconfig import v1_0
from csvcubed.readers.cubeconfig.v1 import configdeserialiser as v1_configdeserialiser

_logger = logging.getLogger(__name__)

QubeConfigDeserialiser = Callable[
[Path, Optional[Path]], Tuple[QbCube, List[JsonSchemaValidationError], List[ValidationError]]
[Path, Optional[Path]],
Tuple[QbCube, List[JsonSchemaValidationError], List[ValidationError]],
]

_V1_SCHEMA_URL = "https://purl.org/csv-cubed/qube-config/v1.0"
_v1_0_SCHEMA_URL = "https://purl.org/csv-cubed/qube-config/v1.0"
_v1_1_SCHEMA_URL = "https://purl.org/csv-cubed/qube-config/v1.1"
_v1_SCHEMA_URL = "https://purl.org/csv-cubed/qube-config/v1" # v1 defaults to the latest minor version of v1.*.


class QubeConfigJsonSchemaVersion(Enum):
class QubeConfigJsonSchemaMajorVersion(Enum):
"""
Known versions of the QubeConfig JSON Schema and the directory/module name they are contained within.
Major versions of the cube config schema.
"""

V1_0 = "v1_0"
v1 = 1


class QubeConfigJsonSchemaMinorVersion(Enum):
GDonRanasinghe marked this conversation as resolved.
Show resolved Hide resolved
"""
Minor versions of the cube config schema.
"""

v0 = 0
v1 = 1


def get_deserialiser_for_schema(
Expand All @@ -36,21 +52,35 @@ def get_deserialiser_for_schema(
Provides a versioned deserialiser function appropriate to the referenced schema.
"""
# Default to the latest version of the schema.
schema_path = _V1_SCHEMA_URL if maybe_schema_path is None else maybe_schema_path
schema_path = _v1_1_SCHEMA_URL if maybe_schema_path is None else maybe_schema_path

schema_version = _get_schema_version(schema_path)
schema_version_major, schema_version_minor = _get_schema_version(schema_path)
_logger.info(
f"Using schema version {schema_version_major.value}.{schema_version_minor.value}"
)

if schema_version == QubeConfigJsonSchemaVersion.V1_0:
return v1_0.configdeserialiser.get_deserialiser(
schema_path, schema_version.value
if schema_version_major == QubeConfigJsonSchemaMajorVersion.v1:
return v1_configdeserialiser.get_deserialiser(
schema_path, schema_version_minor.value
)
else:
raise ValueError(f"Unhandled schema version {schema_version}")
raise ValueError(f"Unhandled major schema version {schema_version_major}")


def _get_schema_version(schema_path: str) -> QubeConfigJsonSchemaVersion:
if schema_path == _V1_SCHEMA_URL:
return QubeConfigJsonSchemaVersion.V1_0
def _get_schema_version(
schema_path: str,
) -> Tuple[QubeConfigJsonSchemaMajorVersion, QubeConfigJsonSchemaMinorVersion]:
if schema_path == _v1_0_SCHEMA_URL:
return (
QubeConfigJsonSchemaMajorVersion.v1,
QubeConfigJsonSchemaMinorVersion.v0,
)
# The second condition in the following makes v1 defaults to the latest minor version of v1.*.
elif schema_path == _v1_1_SCHEMA_URL or schema_path == _v1_SCHEMA_URL:
return (
QubeConfigJsonSchemaMajorVersion.v1,
QubeConfigJsonSchemaMinorVersion.v1,
)
else:
raise ValueError(
f"The $schema '{schema_path}' referenced in the cube config file is not recognised."
Expand Down
Loading