From 7225e12c1a6006e18ff6b1d6751e1e2d14322829 Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Mon, 2 Aug 2021 17:43:16 +0100 Subject: [PATCH 1/7] Issue #57 - Introducing pydantic model validation. --- csvqb/Pipfile | 1 + csvqb/Pipfile.lock | 15 ++- csvqb/csvqb/configloaders/infojson.py | 13 ++- csvqb/csvqb/models/cube/catalog.py | 29 +++--- csvqb/csvqb/models/cube/columns.py | 45 +++------ csvqb/csvqb/models/cube/csvqb/catalog.py | 62 ++++++------- csvqb/csvqb/models/cube/csvqb/columns.py | 38 ++------ .../models/cube/csvqb/components/attribute.py | 70 +++++--------- .../models/cube/csvqb/components/codelist.py | 71 ++++++-------- .../components/datastructuredefinition.py | 33 ++----- .../models/cube/csvqb/components/dimension.py | 82 +++++++--------- .../models/cube/csvqb/components/measure.py | 71 ++++---------- .../cube/csvqb/components/observedvalue.py | 82 +++++++--------- .../models/cube/csvqb/components/unit.py | 93 ++++++------------- csvqb/csvqb/models/cube/cube.py | 7 +- csvqb/csvqb/models/pydanticmodel.py | 83 +++++++++++++++++ csvqb/csvqb/models/uriidentifiable.py | 35 +++++++ .../cube/qb/test_cubeqb_errorvalidation.py | 2 +- csvqb/csvqb/tests/unit/pydantic/__init__.py | 0 csvqb/csvqb/tests/unit/pydantic/test_cube.py | 11 +++ .../tests/unit/pydantic/test_cubemetadata.py | 16 ++++ csvqb/csvqb/writers/qbwriter.py | 13 ++- 22 files changed, 420 insertions(+), 452 deletions(-) create mode 100644 csvqb/csvqb/models/pydanticmodel.py create mode 100644 csvqb/csvqb/models/uriidentifiable.py create mode 100644 csvqb/csvqb/tests/unit/pydantic/__init__.py create mode 100644 csvqb/csvqb/tests/unit/pydantic/test_cube.py create mode 100644 csvqb/csvqb/tests/unit/pydantic/test_cubemetadata.py diff --git a/csvqb/Pipfile b/csvqb/Pipfile index 120a07519..6f7984e32 100644 --- a/csvqb/Pipfile +++ b/csvqb/Pipfile @@ -13,6 +13,7 @@ pandas = "*" unidecode = "*" csvwlib-models = {editable = true,path = "./../sharedmodels"} rdflib-jsonld = "*" +pydantic = {editable = true,git = "https://github.com/robons/pydantic.git"} [requires] python_version = "3.9" diff --git a/csvqb/Pipfile.lock b/csvqb/Pipfile.lock index 12605f38d..0db86332e 100644 --- a/csvqb/Pipfile.lock +++ b/csvqb/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "1c0fe18ad491693dd768f3187189e266cbc526c5490a03755d05a121b6f983e4" + "sha256": "f6eb3499ca20ed4219f6a4bb3ef6b3977ac6a0c8f1f1cbab0ed95eeb696a87d5" }, "pipfile-spec": 6, "requires": { @@ -88,6 +88,11 @@ "index": "pypi", "version": "==1.2.5" }, + "pydantic": { + "editable": true, + "git": "https://github.com/robons/pydantic.git", + "ref": "8db068b1d22a91365bd1a3c6b6636ac8711199cd" + }, "pyparsing": { "hashes": [ "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", @@ -130,6 +135,14 @@ ], "version": "==1.16.0" }, + "typing-extensions": { + "hashes": [ + "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497", + "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342", + "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84" + ], + "version": "==3.10.0.0" + }, "unidecode": { "hashes": [ "sha256:12435ef2fc4cdfd9cf1035a1db7e98b6b047fe591892e81f34e94959591fad00", diff --git a/csvqb/csvqb/configloaders/infojson.py b/csvqb/csvqb/configloaders/infojson.py index 7d65bd417..8a2db189f 100644 --- a/csvqb/csvqb/configloaders/infojson.py +++ b/csvqb/csvqb/configloaders/infojson.py @@ -7,7 +7,7 @@ N.B. this should **not** be used by external users and should be moved into the gss-utils package in Issue #101: https://github.com/GSS-Cogs/csvwlib/issues/101 """ - +import datetime from typing import Dict, List, Any, Optional, Union from pathlib import Path import json @@ -103,19 +103,20 @@ def _metadata_from_dict(config: dict) -> "CatalogMetadata": config, "publisher", lambda p: str(GOV[uri_safe(p)]) ) theme_uris = [str(GDP.term(t)) for t in config.get("families", [])] + dt_issued = get_with_func_or_none(config, "published", parser.parse) or datetime.datetime.now() return CatalogMetadata( - get_from_dict_ensure_exists(config, "title"), - uri_safe_identifier=get_from_dict_ensure_exists(config, "id"), + title=get_from_dict_ensure_exists(config, "title"), summary=config.get("summary"), description=config.get("description"), creator_uri=publisher, publisher_uri=publisher, - issued=get_with_func_or_none(config, "published", parser.parse), + issued=dt_issued, theme_uris=theme_uris, keywords=config.get("keywords", []), landing_page_uri=config.get("landingPage"), license_uri=config.get("license"), public_contact_point_uri=config.get("contactUri"), + uri_safe_identifier_override=get_from_dict_ensure_exists(config, "id"), ) @@ -215,7 +216,9 @@ def _get_column_for_metadata_config( measure_component = ExistingQbMeasure(maybe_measure_uri) unit_component = ExistingQbUnit(maybe_unit_uri) observation_value = QbSingleMeasureObservationValue( - measure_component, unit_component, maybe_data_type + measure=measure_component, + unit=unit_component, + data_type=maybe_data_type or "decimal" ) return QbColumn(column_name, observation_value) elif maybe_data_type is not None: diff --git a/csvqb/csvqb/models/cube/catalog.py b/csvqb/csvqb/models/cube/catalog.py index 67242c493..319a798e8 100644 --- a/csvqb/csvqb/models/cube/catalog.py +++ b/csvqb/csvqb/models/cube/catalog.py @@ -2,23 +2,22 @@ Catalog Metadata (base) ----------------------- """ +from dataclasses import dataclass from datetime import datetime -from typing import Optional, List -from abc import ABC +from typing import Optional +from abc import ABC, abstractmethod -from csvqb.models.validationerror import ValidationError +from csvqb.models.pydanticmodel import PydanticModel -class CatalogMetadataBase(ABC): - def __init__( - self, - title: str, - description: Optional[str] = None, - issued: Optional[datetime] = None, - ): - self.title: str = title - self.description: Optional[str] = description - self.issued: Optional[datetime] = issued +@dataclass +class CatalogMetadataBase(PydanticModel, ABC): + title: str - def validate(self) -> List[ValidationError]: - return [] # TODO: implement this + @abstractmethod + def get_description(self) -> Optional[str]: + pass + + @abstractmethod + def get_issued(self) -> datetime: + pass diff --git a/csvqb/csvqb/models/cube/columns.py b/csvqb/csvqb/models/cube/columns.py index 7e19179d5..c28dcad84 100644 --- a/csvqb/csvqb/models/cube/columns.py +++ b/csvqb/csvqb/models/cube/columns.py @@ -3,50 +3,33 @@ ---------------------- """ from abc import ABC, abstractmethod -import pandas as pd +from dataclasses import dataclass, field from typing import Optional, List - -from csvqb.utils.uri import uri_safe +from csvqb.inputs import PandasDataTypes +from csvqb.models.pydanticmodel import PydanticModel +from csvqb.models.uriidentifiable import UriIdentifiable from csvqb.models.validationerror import ValidationError -class CsvColumn(ABC): - def __init__( - self, csv_column_title: str, uri_safe_identifier: Optional[str] = None - ): - self.csv_column_title: str = csv_column_title - self.uri_safe_identifier: str = ( - uri_safe(csv_column_title) - if uri_safe_identifier is None - else uri_safe_identifier - ) +@dataclass +class CsvColumn(PydanticModel, UriIdentifiable, ABC): + csv_column_title: str - @abstractmethod - def __str__(self) -> str: - pass + def get_identifier(self) -> str: + return self.csv_column_title @abstractmethod - def validate( - self, column_data: Optional[pd.Series] = None - ) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: pass +@dataclass class SuppressedCsvColumn(CsvColumn): """ A column which is only defined in the CSV and should not be propagated. """ + uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) - def __init__( - self, csv_column_title: str, uri_safe_identifier: Optional[str] = None - ): - CsvColumn.__init__(self, csv_column_title, uri_safe_identifier) - - def __str__(self) -> str: - return f"SuppressedCsvColumn('{self.csv_column_title}')" - - def validate( - self, column_data: Optional[pd.Series] = None - ) -> List[ValidationError]: - return [] # TODO: implement this + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: + return [] diff --git a/csvqb/csvqb/models/cube/csvqb/catalog.py b/csvqb/csvqb/models/cube/csvqb/catalog.py index 9f6e51c44..1762e52f2 100644 --- a/csvqb/csvqb/models/cube/csvqb/catalog.py +++ b/csvqb/csvqb/models/cube/csvqb/catalog.py @@ -2,46 +2,40 @@ Catalog Metadata (DCAT) ----------------------- """ +from dataclasses import dataclass, field from datetime import datetime -from typing import Optional, List +from typing import Optional from sharedmodels.rdf import dcat -from csvqb.models.validationerror import ValidationError -from csvqb.utils.uri import uri_safe from csvqb.models.cube.catalog import CatalogMetadataBase +from csvqb.models.uriidentifiable import UriIdentifiable -class CatalogMetadata(CatalogMetadataBase): - def __init__( - self, - title: str, - uri_safe_identifier: Optional[str] = None, - summary: Optional[str] = None, - description: Optional[str] = None, - creator_uri: Optional[str] = None, - publisher_uri: Optional[str] = None, - issued: Optional[datetime] = None, - theme_uris: List[str] = [], - keywords: List[str] = [], - landing_page_uri: Optional[str] = None, - license_uri: Optional[str] = None, - public_contact_point_uri: Optional[str] = None, - ): - CatalogMetadataBase.__init__( - self, title, description=description, issued=issued - ) - self.uri_safe_identifier: str = uri_safe_identifier or uri_safe(title) - self.summary: Optional[str] = summary - self.creator_uri: Optional[str] = creator_uri - self.publisher_uri: Optional[str] = publisher_uri - self.theme_uris: List[str] = theme_uris - self.keywords: List[str] = keywords - self.landing_page_uri: Optional[str] = landing_page_uri - self.license_uri: Optional[str] = license_uri - self.public_contact_point_uri: Optional[str] = public_contact_point_uri - - def validate(self) -> List[ValidationError]: - return CatalogMetadataBase.validate(self) + [] # TODO: augment this +@dataclass +class CatalogMetadata(CatalogMetadataBase, UriIdentifiable): + theme_uris: list[str] = field(default_factory=list, repr=False) + keywords: list[str] = field(default_factory=list, repr=False) + issued: datetime = field(default_factory=lambda: datetime.now(), repr=False) + summary: Optional[str] = field(default=None, repr=False) + description: Optional[str] = field(default=None, repr=False) + creator_uri: Optional[str] = field(default=None, repr=False) + publisher_uri: Optional[str] = field(default=None, repr=False) + landing_page_uri: Optional[str] = field(default=None, repr=False) + license_uri: Optional[str] = field(default=None, repr=False) + public_contact_point_uri: Optional[str] = field(default=None, repr=False) + uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) + + def __post_init__(self): + print("Hello.") + + def get_issued(self) -> datetime: + return self.issued + + def get_description(self) -> Optional[str]: + return self.description + + def get_identifier(self) -> str: + return self.title def configure_dcat_dataset(self, dataset: dcat.Dataset) -> None: dt_now = datetime.now() diff --git a/csvqb/csvqb/models/cube/csvqb/columns.py b/csvqb/csvqb/models/cube/csvqb/columns.py index 355b93c1c..0428a5b07 100644 --- a/csvqb/csvqb/models/cube/csvqb/columns.py +++ b/csvqb/csvqb/models/cube/csvqb/columns.py @@ -2,46 +2,28 @@ Columns with qb Components -------------------------- """ +from dataclasses import field, dataclass from typing import Optional, TypeVar, Generic, List -import pandas as pd - +from csvqb.inputs import PandasDataTypes, pandas_input_to_columnar from .components.datastructuredefinition import ColumnarQbDataStructureDefinition -from csvqb.models.validationerror import ValidationError from csvqb.models.cube.columns import CsvColumn - +from ...validationerror import ValidationError QbColumnarDsdType = TypeVar( "QbColumnarDsdType", bound=ColumnarQbDataStructureDefinition, covariant=True ) +@dataclass class QbColumn(CsvColumn, Generic[QbColumnarDsdType]): """ A CSV column and the qb components it relates to. """ + csv_column_title: str + component: QbColumnarDsdType + output_uri_template: Optional[str] = field(default=None, repr=False) + uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) - def __init__( - self, - csv_column_title: str, - component: QbColumnarDsdType, - output_uri_template: Optional[str] = None, - uri_safe_identifier: Optional[str] = None, - ): - CsvColumn.__init__(self, csv_column_title, uri_safe_identifier) - if not isinstance(component, ColumnarQbDataStructureDefinition): - raise Exception( - f"{component} of type {type(component)} is not a valid columnar component." - ) - self.component: QbColumnarDsdType = component - self.output_uri_template: Optional[str] = output_uri_template - - def __str__(self) -> str: - return f"QbColumn('{self.csv_column_title}', {self.component})" - - def validate(self, column_data: Optional[pd.Series]) -> List[ValidationError]: - errors = self.component.validate() - if column_data is not None: - errors += self.component.validate_data(column_data) - - return errors + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: + return self.component.validate_data(data) diff --git a/csvqb/csvqb/models/cube/csvqb/components/attribute.py b/csvqb/csvqb/models/cube/csvqb/components/attribute.py index bc5e1ab60..9ea8db3ae 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/attribute.py +++ b/csvqb/csvqb/models/cube/csvqb/components/attribute.py @@ -2,12 +2,11 @@ Attributes ---------- """ +from dataclasses import dataclass, field from typing import Optional, List -from abc import ABC +from abc import ABC, abstractmethod -import pandas as pd - -from csvqb.utils.uri import uri_safe +from csvqb.models.uriidentifiable import UriIdentifiable from .datastructuredefinition import ColumnarQbDataStructureDefinition from .codelist import QbCodeList, NewQbCodeList from csvqb.models.validationerror import ValidationError @@ -15,73 +14,54 @@ from csvqb.models.cube.csvqb.catalog import CatalogMetadata +@dataclass class QbAttribute(ColumnarQbDataStructureDefinition, ABC): - def __init__(self, is_required: bool): - self.is_required: bool = is_required + @abstractmethod + def is_required(self) -> bool: + pass +@dataclass class ExistingQbAttribute(QbAttribute): - def __init__(self, uri: str, is_required: bool = False): - QbAttribute.__init__(self, is_required) - self.attribute_uri: str = uri - - def __str__(self) -> str: - return f"ExistingQbAttribute('{self.attribute_uri}')" + attribute_uri: str + is_required: bool = field(default=False, repr=False) - def validate(self) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: implement this - def validate_data(self, data: pd.Series) -> List[ValidationError]: - return [] # TODO: implement this +@dataclass +class NewQbAttribute(QbAttribute, UriIdentifiable): + label: str + description: Optional[str] = field(default=None, repr=False) + code_list: Optional[QbCodeList] = field(default=None, repr=False) + parent_attribute_uri: Optional[str] = field(default=None, repr=False) + source_uri: Optional[str] = field(default=None, repr=False) + is_required: bool = field(default=False, repr=False) + uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) -class NewQbAttribute(QbAttribute): - def __init__( - self, - label: str, - uri_safe_identifier: Optional[str] = None, - description: Optional[str] = None, - code_list: Optional[QbCodeList] = None, - parent_attribute_uri: Optional[str] = None, - source_uri: Optional[str] = None, - is_required: bool = False, - ): - QbAttribute.__init__(self, is_required) - self.label: str = label - self.uri_safe_identifier: str = ( - uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) - ) - self.description: Optional[str] = description - self.code_list: Optional[QbCodeList] = code_list - self.parent_attribute_uri: Optional[str] = parent_attribute_uri - self.source_uri: Optional[str] = source_uri + def get_identifier(self) -> str: + return self.label @staticmethod def from_data( label: str, data: PandasDataTypes, - uri_safe_identifier: Optional[str] = None, description: Optional[str] = None, parent_attribute_uri: Optional[str] = None, source_uri: Optional[str] = None, is_required: bool = False, + uri_safe_identifier_override: Optional[str] = None, ): - return NewQbAttribute( label, - uri_safe_identifier=uri_safe_identifier, description=description, code_list=NewQbCodeList.from_data(CatalogMetadata(label), data), parent_attribute_uri=parent_attribute_uri, source_uri=source_uri, is_required=is_required, + uri_safe_identifier_override=uri_safe_identifier_override, ) - def __str__(self) -> str: - return f"NewQbAttribute('{self.label}')" - - def validate(self) -> List[ValidationError]: - return [] # TODO: implement this - - def validate_data(self, data: pd.Series) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: implement this diff --git a/csvqb/csvqb/models/cube/csvqb/components/codelist.py b/csvqb/csvqb/models/cube/csvqb/components/codelist.py index ee7416823..aac94a88f 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/codelist.py +++ b/csvqb/csvqb/models/cube/csvqb/components/codelist.py @@ -2,11 +2,11 @@ Code Lists ---------- """ +from dataclasses import dataclass, field from typing import Optional, List from abc import ABC -import pandas as pd - +from csvqb.models.uriidentifiable import UriIdentifiable from .datastructuredefinition import QbDataStructureDefinition from csvqb.models.cube.csvqb.catalog import CatalogMetadata from csvqb.models.validationerror import ValidationError @@ -14,67 +14,53 @@ from csvqb.inputs import PandasDataTypes, pandas_input_to_columnar_str +@dataclass class QbCodeList(QbDataStructureDefinition, ABC): pass +@dataclass class ExistingQbCodeList(QbCodeList): """ Contains metadata necessary to link a dimension to an existing skos:ConceptScheme. """ + concept_scheme_uri: str - def __init__(self, concept_scheme_uri: str): - self.concept_scheme_uri: str = concept_scheme_uri + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: + return [] # TODO: implement this. - def __str__(self) -> str: - return f"ExistingQbCodeList('{self.concept_scheme_uri}')" - def validate(self) -> List[ValidationError]: - return [] # TODO: implement this. +@dataclass(eq=False, unsafe_hash=False) +class NewQbConcept(UriIdentifiable): + label: str + code: str = field(default="") + parent_code: Optional[str] = field(default=None, repr=False) + sort_order: Optional[int] = field(default=None, repr=False) + description: Optional[str] = field(default=None, repr=False) + uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) - def validate_data(self, data: pd.Series) -> List[ValidationError]: - return [] # TODO: implement this. + def get_identifier(self) -> str: + return self.code + def __post_init__(self): + if self.code.strip() == "": + self.code = uri_safe(self.label) -class NewQbConcept: - def __init__( - self, - label: str, - code: Optional[str] = None, - parent_code: Optional[str] = None, - sort_order: Optional[int] = None, - description: Optional[str] = None, - ): - self.label: str = label - self.code: str = code or uri_safe(label) - self.parent_code: Optional[str] = parent_code - self.sort_order: Optional[int] = sort_order - self.description: Optional[str] = description - - def __str__(self) -> str: - return f"NewQbConcept('{self.code}', '{self.label}')" + def __eq__(self, other): + return isinstance(other, NewQbConcept) and self.code == other.code def __hash__(self): return self.code.__hash__() +@dataclass class NewQbCodeList(QbCodeList): """ Contains the metadata necessary to create a new skos:ConceptScheme which is local to a dataset. """ - - def __init__( - self, - metadata: CatalogMetadata, - concepts: List[NewQbConcept], - variant_of_uris: List[str] = [], - ): - self.metadata: CatalogMetadata = metadata - self.concepts: List[NewQbConcept] = concepts - self.variant_of_uris: List[str] = variant_of_uris # For xkos:variant usage. - - def __str__(self) -> str: - return f"NewQbCodeList('{self.metadata.title}')" + metadata: CatalogMetadata + concepts: List[NewQbConcept] + variant_of_uris: List[str] = field(default_factory=list) @staticmethod def from_data( @@ -86,8 +72,5 @@ def from_data( concepts = [NewQbConcept(c) for c in sorted(set(columnar_data))] return NewQbCodeList(metadata, concepts, variant_of_uris=variant_of_uris) - def validate(self) -> List[ValidationError]: - return self.metadata.validate() + [] # TODO: augment this. - - def validate_data(self, data: pd.Series) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: implement this. diff --git a/csvqb/csvqb/models/cube/csvqb/components/datastructuredefinition.py b/csvqb/csvqb/models/cube/csvqb/components/datastructuredefinition.py index 410643a67..c3782814f 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/datastructuredefinition.py +++ b/csvqb/csvqb/models/cube/csvqb/components/datastructuredefinition.py @@ -2,41 +2,29 @@ Data Structure Definitions -------------------------- """ +from dataclasses import dataclass from abc import ABC, abstractmethod -import pandas as pd from typing import List +from csvqb.inputs import PandasDataTypes +from csvqb.models.pydanticmodel import PydanticModel from csvqb.models.validationerror import ValidationError -class QbDataStructureDefinition(ABC): +@dataclass +class QbDataStructureDefinition(PydanticModel, ABC): """ Base class for entities holding information necessary to generate one or many qb DataStructureDefinition (DSD) components. """ @abstractmethod - def validate(self) -> List[ValidationError]: - """ - Validate this component's metadata. - """ - pass - - @abstractmethod - def validate_data(self, data: pd.Series) -> List[ValidationError]: - """ - Validate some data against this component's definition. - """ - pass - - @abstractmethod - def __str__(self) -> str: - """ - Ensure that descendents implement the to string method to help users debug their data. - """ + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: + """Validate some data against this component's definition.""" pass +@dataclass class ColumnarQbDataStructureDefinition(QbDataStructureDefinition, ABC): """ Base class representing Qb Data Structure Definitions which can be directly attached to a `pd.DataFrame` column. @@ -45,11 +33,8 @@ class ColumnarQbDataStructureDefinition(QbDataStructureDefinition, ABC): pass +@dataclass class MultiQbDataStructureDefinition(ColumnarQbDataStructureDefinition, ABC): """ Base class representing an entity which defines a group of `QbDataStructureDefinition` s. """ - - @abstractmethod - def get_qb_components(self) -> List[QbDataStructureDefinition]: - pass diff --git a/csvqb/csvqb/models/cube/csvqb/components/dimension.py b/csvqb/csvqb/models/cube/csvqb/components/dimension.py index 29017b405..6c55bcdcf 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/dimension.py +++ b/csvqb/csvqb/models/cube/csvqb/components/dimension.py @@ -2,12 +2,11 @@ Dimensions ---------- """ +from dataclasses import dataclass, field from typing import Optional, List -from abc import ABC +from abc import ABC, abstractmethod -import pandas as pd - -from csvqb.utils.uri import uri_safe +from csvqb.models.uriidentifiable import UriIdentifiable from .datastructuredefinition import ColumnarQbDataStructureDefinition from .codelist import QbCodeList, NewQbCodeList from csvqb.models.validationerror import ValidationError @@ -15,61 +14,51 @@ from ..catalog import CatalogMetadata +@dataclass class QbDimension(ColumnarQbDataStructureDefinition, ABC): - def __init__(self, range_uri: Optional[str]): - self.range_uri: Optional[str] = range_uri - + @property + @abstractmethod + def range_uri(self) -> Optional[str]: + pass -class ExistingQbDimension(QbDimension): - def __init__(self, dimension_uri: str, range_uri: Optional[str] = None): - QbDimension.__init__(self, range_uri) - self.dimension_uri: str = dimension_uri - self.range_uri: Optional[str] = range_uri + @range_uri.setter + @abstractmethod + def range_uri(self, value: Optional[str]): + pass - def __str__(self) -> str: - return f"ExistingQbDimension('{self.dimension_uri}')" - def validate(self) -> List[ValidationError]: - return [] # TODO: add more validation checks +@dataclass +class ExistingQbDimension(QbDimension): + dimension_uri: str + range_uri: Optional[str] = field(default=None, repr=False) - def validate_data(self, data: pd.Series) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: add more validation checks -class NewQbDimension(QbDimension): - def __init__( - self, - label: str, - description: Optional[str] = None, - uri_safe_identifier: Optional[str] = None, - # todo: Ensure we link the code-list to the qb column component somehow - code_list: Optional[QbCodeList] = None, - parent_dimension_uri: Optional[str] = None, - source_uri: Optional[str] = None, - range_uri: Optional[str] = None, - ): - QbDimension.__init__(self, range_uri) - self.label: str = label - self.description: Optional[str] = description - self.uri_safe_identifier: str = ( - uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) - ) - self.code_list: Optional[QbCodeList] = code_list - self.parent_dimension_uri: Optional[str] = parent_dimension_uri - self.source_uri: Optional[str] = source_uri +@dataclass +class NewQbDimension(QbDimension, UriIdentifiable): + label: str + description: Optional[str] = field(default=None, repr=False) + # todo: Ensure we link the code-list to the qb column component somehow + code_list: Optional[QbCodeList] = field(default=None, repr=False) + parent_dimension_uri: Optional[str] = field(default=None, repr=False) + source_uri: Optional[str] = field(default=None, repr=False) + range_uri: Optional[str] = field(default=None, repr=False) + uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) - def __str__(self) -> str: - return f"NewQbDimension('{self.label}')" + def get_identifier(self) -> str: + return self.label @staticmethod def from_data( label: str, data: PandasDataTypes, description: Optional[str] = None, - uri_safe_identifier: Optional[str] = None, parent_dimension_uri: Optional[str] = None, source_uri: Optional[str] = None, range_uri: Optional[str] = None, + uri_safe_identifier_override: Optional[str] = None, ) -> "NewQbDimension": """ Creates a new dimension and code list from the columnar data provided. @@ -77,21 +66,14 @@ def from_data( return NewQbDimension( label, description=description, - uri_safe_identifier=uri_safe_identifier, code_list=NewQbCodeList.from_data(CatalogMetadata(label), data), parent_dimension_uri=parent_dimension_uri, source_uri=source_uri, range_uri=range_uri, + uri_safe_identifier_override=uri_safe_identifier_override, ) - def validate(self) -> List[ValidationError]: - # todo: Add more validation checks - if self.code_list is not None: - return self.code_list.validate() - - return [] - - def validate_data(self, data: pd.Series) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: # todo: Add more validation checks if self.code_list is not None: return self.code_list.validate_data(data) diff --git a/csvqb/csvqb/models/cube/csvqb/components/measure.py b/csvqb/csvqb/models/cube/csvqb/components/measure.py index 06765d911..e5b8f6e1f 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/measure.py +++ b/csvqb/csvqb/models/cube/csvqb/components/measure.py @@ -2,12 +2,11 @@ Measures -------- """ +from dataclasses import dataclass, field from typing import Optional, List from abc import ABC -import pandas as pd - -from csvqb.utils.uri import uri_safe +from csvqb.models.uriidentifiable import UriIdentifiable from .datastructuredefinition import ( MultiQbDataStructureDefinition, QbDataStructureDefinition, @@ -17,64 +16,40 @@ from csvqb.inputs import PandasDataTypes, pandas_input_to_columnar_str +@dataclass class QbMeasure(QbDataStructureDefinition, ABC): pass +@dataclass class ExistingQbMeasure(QbMeasure): - def __init__(self, measure_uri: str): - QbMeasure.__init__(self) - self.measure_uri: str = measure_uri - - def __str__(self) -> str: - return f"ExistingQbMeasure('{self.measure_uri}')" - - def validate(self) -> List[ValidationError]: - return [] # TODO: implement this + measure_uri: str - def validate_data(self, data: pd.Series) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: implement this -class NewQbMeasure(QbMeasure): - def __init__( - self, - label: str, - description: Optional[str] = None, - uri_safe_identifier: Optional[str] = None, - parent_measure_uri: Optional[str] = None, - source_uri: Optional[str] = None, - ): - QbMeasure.__init__(self) - self.label: str = label - self.description: Optional[str] = description - self.uri_safe_identifier: str = ( - uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) - ) - self.parent_measure_uri: Optional[str] = parent_measure_uri - self.source_uri: Optional[str] = source_uri - - def __str__(self) -> str: - return f"NewQbMeasure('{self.label}')" +@dataclass +class NewQbMeasure(QbMeasure, UriIdentifiable): + label: str + description: Optional[str] = field(default=None, repr=False) + parent_measure_uri: Optional[str] = field(default=None, repr=False) + source_uri: Optional[str] = field(default=None, repr=False) + uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) - def validate(self) -> List[ValidationError]: - return [] # TODO: implement this + def get_identifier(self) -> str: + return self.label - def validate_data(self, data: pd.Series) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: implement this +@dataclass class QbMultiMeasureDimension(MultiQbDataStructureDefinition): """ Represents the measure types permitted in a multi-measure cube. """ - - def __init__(self, measures: List[QbMeasure]): - self.measures: List[QbMeasure] = measures - - def __str__(self) -> str: - measures_str = ", ".join([str(m) for m in self.measures]) - return f"QbMultiMeasureDimension({measures_str})" + measures: List[QbMeasure] @staticmethod def new_measures_from_data(data: PandasDataTypes) -> "QbMultiMeasureDimension": @@ -83,17 +58,9 @@ def new_measures_from_data(data: PandasDataTypes) -> "QbMultiMeasureDimension": [NewQbMeasure(m) for m in sorted(set(columnar_data))] ) - def validate(self) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: implement this - def validate_data(self, data: pd.Series) -> List[ValidationError]: - return [] # TODO: implement this - - def get_qb_components(self) -> List[QbDataStructureDefinition]: - components: List[QbDataStructureDefinition] = [QbMeasureTypeDimension] - components += self.measures - return components - QbMeasureTypeDimension = ExistingQbDimension( "http://purl.org/linked-data/cube#measureType", diff --git a/csvqb/csvqb/models/cube/csvqb/components/observedvalue.py b/csvqb/csvqb/models/cube/csvqb/components/observedvalue.py index 8244fe4fd..9330a6296 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/observedvalue.py +++ b/csvqb/csvqb/models/cube/csvqb/components/observedvalue.py @@ -13,71 +13,59 @@ ) from .measure import QbMeasure, QbMeasureTypeDimension from .unit import QbUnit, QbUnitAttribute +from dataclasses import dataclass, field +from typing import Optional, List +from abc import ABC, abstractmethod + +from csvqb.inputs import PandasDataTypes +from .datastructuredefinition import MultiQbDataStructureDefinition +from .measure import QbMeasure +from .unit import QbUnit from csvqb.models.validationerror import ValidationError +@dataclass class QbObservationValue(MultiQbDataStructureDefinition, ABC): - def __init__(self, data_type: Optional[str], unit: Optional[QbUnit]): - self.data_type: str = data_type if data_type is not None else "decimal" - self.unit: Optional[QbUnit] = unit + @property + @abstractmethod + def data_type(self) -> str: + pass + @data_type.setter + @abstractmethod + def data_type(self, value: str): + pass -class QbMultiMeasureObservationValue(QbObservationValue): - def __init__(self, data_type: Optional[str] = None, unit: Optional[QbUnit] = None): - """ + @property + @abstractmethod + def unit(self) -> Optional[QbUnit]: + pass - :param data_type: Data type of the observed value. - :param unit: Optional. Only defined where all measures in the cube have the same unit. - """ - QbObservationValue.__init__(self, data_type, unit) + @unit.setter + @abstractmethod + def unit(self, value: Optional[QbUnit]): + pass - def __str__(self) -> str: - units_str = "" if self.unit is None else f", {self.unit}" - return f"QbMultiMeasureObservationValue('{self.data_type}'{units_str})" - def validate(self) -> List[ValidationError]: - return [] # TODO: implement this +@dataclass +class QbMultiMeasureObservationValue(QbObservationValue): + data_type: str = field(default="decimal", repr=False) + unit: Optional[QbUnit] = None - def validate_data(self, data: pd.Series) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: implement this - def get_qb_components(self) -> List[QbDataStructureDefinition]: - return [] - +@dataclass class QbSingleMeasureObservationValue(QbObservationValue): """ Represents the unit/measure/datatype components necessary to define a simple qb:Observation. N.B. Requires `virt_unit` and `virt_measure` columns to be added to CSV-W metadata """ + measure: QbMeasure + unit: Optional[QbUnit] = None + data_type: str = field(default="decimal", repr=False) - def __init__( - self, - measure: QbMeasure, - unit: Optional[QbUnit] = None, - data_type: Optional[str] = None, - ): - QbObservationValue.__init__(self, data_type, unit) - self.measure: QbMeasure = measure - - def __str__(self) -> str: - units_str = "" if self.unit is None else f", {self.unit}" - return f"QbMultiMeasureObservationValue({self.measure}{units_str})" - - def validate(self) -> List[ValidationError]: - errors: List[ValidationError] = [] - if self.measure is None: - errors.append(ValidationError(f"{self} - no measure has been defined.")) - - return errors - - def validate_data(self, data: pd.Series) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: implement this - - def get_qb_components(self) -> List[QbDataStructureDefinition]: - components = [self.measure, QbMeasureTypeDimension] - if self.unit is not None: - components += [self.unit, QbUnitAttribute] - - return components diff --git a/csvqb/csvqb/models/cube/csvqb/components/unit.py b/csvqb/csvqb/models/cube/csvqb/components/unit.py index 4ac9874e3..752d54253 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/unit.py +++ b/csvqb/csvqb/models/cube/csvqb/components/unit.py @@ -2,12 +2,12 @@ Units ----- """ +from dataclasses import dataclass, field from typing import Optional, List -from abc import ABC +from abc import ABC, abstractmethod import pandas as pd - -from csvqb.utils.uri import uri_safe +from csvqb.models.uriidentifiable import UriIdentifiable from csvqb.models.validationerror import ValidationError from .attribute import ExistingQbAttribute from .datastructuredefinition import ( @@ -17,73 +17,44 @@ from csvqb.inputs import pandas_input_to_columnar_str, PandasDataTypes +@dataclass class QbUnit(QbDataStructureDefinition, ABC): - def __init__(self, unit_multiplier: Optional[int]): - self.unit_multiplier: Optional[int] = unit_multiplier + @abstractmethod + def unit_multiplier(self) -> Optional[int]: + pass +@dataclass class ExistingQbUnit(QbUnit): - def __init__(self, unit_uri: str, unit_multiplier: Optional[int] = None): - QbUnit.__init__(self, unit_multiplier) - self.unit_uri: str = unit_uri + unit_uri: str + unit_multiplier: Optional[int] = field(default=None, repr=False) - def __str__(self) -> str: - unit_multiplier_str = ( - "" if self.unit_multiplier is None else f", 10^{self.unit_multiplier}" - ) - return f"ExistingQbUnit('{self.unit_uri}'{unit_multiplier_str})" - - def validate(self) -> List[ValidationError]: - return super(ExistingQbAttribute).validate() # todo: Add more validation here. - - def validate_data(self, data: pd.Series) -> List[ValidationError]: - return super(ExistingQbAttribute).validate_data( - data - ) # todo: Add more validation here. - - -class NewQbUnit(QbUnit): - def __init__( - self, - label: str, - uri_safe_identifier: Optional[str] = None, - unit_multiplier: Optional[int] = None, - description: Optional[str] = None, - parent_unit_uri: Optional[str] = None, - source_uri: Optional[str] = None, - ): - QbUnit.__init__(self, unit_multiplier) - self.label: str = label - self.uri_safe_identifier: str = ( - uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) - ) - self.description: Optional[str] = description - self.parent_unit_uri: Optional[str] = parent_unit_uri - self.source_uri: Optional[str] = source_uri + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: + return [] # todo: Add more validation here. - def __str__(self) -> str: - return f"NewQbUnit('{self.label}')" - def validate(self) -> List[ValidationError]: - return super(ExistingQbAttribute).validate() # todo: Add more validation here. +@dataclass +class NewQbUnit(QbUnit, UriIdentifiable): + label: str + description: Optional[str] = field(default=None, repr=False) + unit_multiplier: Optional[int] = field(default=None, repr=False) + parent_unit_uri: Optional[str] = field(default=None, repr=False) + source_uri: Optional[str] = field(default=None, repr=False) + uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) - def validate_data(self, data: pd.Series) -> List[ValidationError]: - return super(ExistingQbAttribute).validate_data( - data - ) # todo: Add more validation here. + def get_identifier(self) -> str: + return self.label + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: + return [] # todo: Add more validation here. + +@dataclass class QbMultiUnits(MultiQbDataStructureDefinition): """ Represents multiple units used/defined in a cube, typically used in multi-measure cubes. """ - - def __init__(self, units: List[QbUnit]): - self.units: List[QbUnit] = units - - def __str__(self) -> str: - units_str = ",".join([str(u) for u in self.units]) - return f"QbMultiUnits({units_str})" + units: List[QbUnit] @staticmethod def new_units_from_data(data: PandasDataTypes) -> "QbMultiUnits": @@ -94,17 +65,9 @@ def new_units_from_data(data: PandasDataTypes) -> "QbMultiUnits": [NewQbUnit(u) for u in set(pandas_input_to_columnar_str(data))] ) - def validate(self) -> List[ValidationError]: - return [] # TODO: implement this - - def validate_data(self, data: pd.Series) -> List[ValidationError]: + def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: return [] # TODO: implement this - def get_qb_components(self) -> List[QbDataStructureDefinition]: - components: List[QbDataStructureDefinition] = [QbUnitAttribute] - components += self.units - return components - QbUnitAttribute = ExistingQbAttribute( "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure" diff --git a/csvqb/csvqb/models/cube/cube.py b/csvqb/csvqb/models/cube/cube.py index 49c275526..380bbad94 100644 --- a/csvqb/csvqb/models/cube/cube.py +++ b/csvqb/csvqb/models/cube/cube.py @@ -9,11 +9,12 @@ from .columns import CsvColumn from csvqb.models.cube.catalog import CatalogMetadataBase from csvqb.inputs import pandas_input_to_columnar +from ..pydanticmodel import PydanticModel TMetadata = TypeVar("TMetadata", bound=CatalogMetadataBase, covariant=True) -class Cube(Generic[TMetadata]): +class Cube(Generic[TMetadata], PydanticModel): def __init__( self, metadata: TMetadata, @@ -25,7 +26,7 @@ def __init__( self.columns: List[CsvColumn] = columns def validate(self) -> List[ValidationError]: - errors = self.metadata.validate() + errors = self.pydantic_validation() errors += self._validate_columns() return errors @@ -51,7 +52,7 @@ def _validate_columns(self) -> List[ValidationError]: ) ) - errors += col.validate(pandas_input_to_columnar(maybe_column_data)) + errors += col.validate_data(maybe_column_data) if self.data is not None: defined_column_titles = [c.csv_column_title for c in self.columns] diff --git a/csvqb/csvqb/models/pydanticmodel.py b/csvqb/csvqb/models/pydanticmodel.py new file mode 100644 index 000000000..de4d6df7a --- /dev/null +++ b/csvqb/csvqb/models/pydanticmodel.py @@ -0,0 +1,83 @@ +from dataclasses import dataclass, asdict, fields, is_dataclass +import pydantic +import pydantic.dataclasses +from pydantic import BaseConfig +from typing import ClassVar, Dict, Type, List, Iterable +from abc import ABC + + +from .validationerror import ValidationError + + +@dataclass +class PydanticModel(ABC): + """ + ValidatedModel - an abstract base class to be inherited by models which want a `validate` method which verifies + that the model's attributes agree with the corresponding type annotations. + Uses pydantic under the hood, but rather than using pydantic's constructor validation approach, we delay + validation until the `validate` method is called. + """ + + _map_class_to_pydantic_constructor: ClassVar[Dict[Type, Type]] = dict() + """_map_class_to_pydantic_constructor - Cache of pydantic constructor corresponding to a given class.""" + + class Config(BaseConfig): + """pydantic Configuration - see https://pydantic-docs.helpmanual.io/usage/model_config/""" + + extra = "forbid" + arbitrary_types_allowed = True + + @classmethod + def _get_pydantic_constructor(cls) -> Type: + if cls not in PydanticModel._map_class_to_pydantic_constructor: + PydanticModel._map_class_to_pydantic_constructor[ + cls + ] = pydantic.dataclasses.dataclass(cls, config=PydanticModel.Config) + return PydanticModel._map_class_to_pydantic_constructor[cls] + + def as_dict(self) -> dict: + """Use python dataclasses method to return this model as a dictionary.""" + return asdict(self) + + def as_shallow_dict(self) -> dict: + return dict([(f.name, getattr(self, f.name)) for f in fields(self)]) + + def pydantic_validation(self) -> List[ValidationError]: + """ + Validate this model using pydantic. + Checks that all model attributes match the expected annotated data type. **Coerces values** where possible. + """ + + pydantic_class_constructor = self.__class__._get_pydantic_constructor() + try: + validated_model = pydantic_class_constructor(**self.as_shallow_dict()) + except pydantic.ValidationError as error: + return [ + ValidationError(f"{self} - {e['loc']} - {e['msg']}") + for e in error.errors() + ] + + # Update this model's values with pydantic's coerced values + for field in fields(self): + field_value = getattr(validated_model, field.name) + if value_does_not_contain_pydantic_dataclasses(field_value): + setattr(self, field.name, field_value) + + # there are no validation errors + return [] + + +def value_does_not_contain_pydantic_dataclasses(value) -> bool: + value_is_iterable = isinstance(value, Iterable) and not isinstance(value, str) + if value_is_iterable: + # Only copy iterables if all of their items can be copied. + return all([value_does_not_contain_pydantic_dataclasses(v) for v in value]) + elif isinstance(value, object): + # Don't copy object which have been cast to pydantic dataclasses + cls = value.__class__ + return (not is_dataclass(cls)) or pydantic.dataclasses.is_builtin_dataclass( + value.__class__ + ) + + # Anything else should be fine. + return True diff --git a/csvqb/csvqb/models/uriidentifiable.py b/csvqb/csvqb/models/uriidentifiable.py new file mode 100644 index 000000000..661c9c890 --- /dev/null +++ b/csvqb/csvqb/models/uriidentifiable.py @@ -0,0 +1,35 @@ +import dataclasses +from typing import Optional +from abc import ABC, abstractmethod + +from csvqb.utils.uri import uri_safe + + +@dataclasses.dataclass +class UriIdentifiable(ABC): + """Requires that implementing classes provide callers with a way of overriding the uri_safe_identifier property.""" + + @abstractmethod + def get_identifier(self) -> str: + """ + get_identifier - returns the property which is turned into a URI-safe identifier is no override is present. + """ + pass + + @property + @abstractmethod + def uri_safe_identifier_override(self) -> Optional[str]: + pass + + @uri_safe_identifier_override.setter + @abstractmethod + def uri_safe_identifier_override(self, value: Optional[str]) -> None: + pass + + @property + def uri_safe_identifier(self) -> str: + return self.uri_safe_identifier_override or uri_safe(self.get_identifier()) + + @uri_safe_identifier.setter + def uri_safe_identifier(self, uri_safe_identifier: str) -> None: + self.uri_safe_identifier_override = uri_safe_identifier diff --git a/csvqb/csvqb/tests/unit/cube/qb/test_cubeqb_errorvalidation.py b/csvqb/csvqb/tests/unit/cube/qb/test_cubeqb_errorvalidation.py index 9bb0d76a2..f78f40ebd 100644 --- a/csvqb/csvqb/tests/unit/cube/qb/test_cubeqb_errorvalidation.py +++ b/csvqb/csvqb/tests/unit/cube/qb/test_cubeqb_errorvalidation.py @@ -66,7 +66,7 @@ def test_multi_measure_qb_definition(): ExistingQbDimension("https://example.org/dimensions/existing_dimension"), output_uri_template="https://example.org/concept-scheme/existing_scheme/{+existing_dimension}", ), - QbColumn("Value", QbMultiMeasureObservationValue("number")), + QbColumn("Value", QbMultiMeasureObservationValue(data_type="number")), QbColumn( "Measure", QbMultiMeasureDimension.new_measures_from_data(data["Measure"]), diff --git a/csvqb/csvqb/tests/unit/pydantic/__init__.py b/csvqb/csvqb/tests/unit/pydantic/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/csvqb/csvqb/tests/unit/pydantic/test_cube.py b/csvqb/csvqb/tests/unit/pydantic/test_cube.py new file mode 100644 index 000000000..f79dc3754 --- /dev/null +++ b/csvqb/csvqb/tests/unit/pydantic/test_cube.py @@ -0,0 +1,11 @@ +import pytest + +from csvqb.models.cube import * + + +def test_basic_cube_validation(): + assert True + + +if __name__ == "__main__": + pytest.main() diff --git a/csvqb/csvqb/tests/unit/pydantic/test_cubemetadata.py b/csvqb/csvqb/tests/unit/pydantic/test_cubemetadata.py new file mode 100644 index 000000000..b3ccdc70c --- /dev/null +++ b/csvqb/csvqb/tests/unit/pydantic/test_cubemetadata.py @@ -0,0 +1,16 @@ +import pytest + +from csvqb.models.cube import * + + +def test_basic_cube_metadata_validation(): + invalid_catalog_metadata = CatalogMetadata(title=None) + errors = invalid_catalog_metadata.pydantic_validation() + assert len(errors) == 1, print(", ".join([e.message for e in errors])) + error = errors[0] + assert "none is not an allowed value" in error.message + assert "title" in error.message + + +if __name__ == "__main__": + pytest.main() diff --git a/csvqb/csvqb/writers/qbwriter.py b/csvqb/csvqb/writers/qbwriter.py index 7eaaadef7..b8d69876f 100644 --- a/csvqb/csvqb/writers/qbwriter.py +++ b/csvqb/csvqb/writers/qbwriter.py @@ -149,13 +149,14 @@ def _generate_virtual_columns_for_obs_val( self, obs_val: QbObservationValue ) -> List[Dict[str, Any]]: virtual_columns: List[dict] = [] - if obs_val.unit is not None: + unit = obs_val.unit + if unit is not None: virtual_columns.append( { "name": VIRT_UNIT_COLUMN_NAME, "virtual": True, "propertyUrl": "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", - "valueUrl": self._get_unit_uri(obs_val.unit), + "valueUrl": self._get_unit_uri(unit), } ) # todo: We can't do the same thing with unti multipler unfortunately. Perhaps we should attach the unit @@ -247,11 +248,9 @@ def _get_qb_obs_val_specifications( self, observation_value: QbObservationValue ) -> List[qb.ComponentSpecification]: specs: List[qb.ComponentSpecification] = [] - - if observation_value.unit is not None: - unit_uri_safe_identifier = self._get_unit_uri_safe_identifier( - observation_value.unit - ) + unit = observation_value.unit + if unit is not None: + unit_uri_safe_identifier = self._get_unit_uri_safe_identifier(unit) specs.append( self._get_qb_units_column_specification(unit_uri_safe_identifier) ) From 700a8e009f2cac0718655db8b4bb7ce38c91de8d Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Tue, 3 Aug 2021 11:39:40 +0100 Subject: [PATCH 2/7] Issue #57 - Some more bug fixes included. --- csvqb/Pipfile.lock | 2 +- csvqb/csvqb/models/cube/csvqb/catalog.py | 3 -- csvqb/csvqb/models/cube/cube.py | 14 +++--- csvqb/csvqb/models/pydanticmodel.py | 36 +++++++++++---- .../unit/configloaders/test_infojsontests.py | 6 +-- .../cube/qb/test_cubeqb_errorvalidation.py | 4 +- csvqb/csvqb/tests/unit/pydantic/test_cube.py | 44 ++++++++++++++++++- .../tests/unit/pydantic/test_cubemetadata.py | 4 +- csvqb/csvqb/tests/unit/test_baseunit.py | 9 ++++ 9 files changed, 92 insertions(+), 30 deletions(-) diff --git a/csvqb/Pipfile.lock b/csvqb/Pipfile.lock index 0db86332e..837c834eb 100644 --- a/csvqb/Pipfile.lock +++ b/csvqb/Pipfile.lock @@ -91,7 +91,7 @@ "pydantic": { "editable": true, "git": "https://github.com/robons/pydantic.git", - "ref": "8db068b1d22a91365bd1a3c6b6636ac8711199cd" + "ref": "9d8be23e7b43bcf87f10475c50278bf429e1c214" }, "pyparsing": { "hashes": [ diff --git a/csvqb/csvqb/models/cube/csvqb/catalog.py b/csvqb/csvqb/models/cube/csvqb/catalog.py index 1762e52f2..ab2c88be1 100644 --- a/csvqb/csvqb/models/cube/csvqb/catalog.py +++ b/csvqb/csvqb/models/cube/csvqb/catalog.py @@ -25,9 +25,6 @@ class CatalogMetadata(CatalogMetadataBase, UriIdentifiable): public_contact_point_uri: Optional[str] = field(default=None, repr=False) uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) - def __post_init__(self): - print("Hello.") - def get_issued(self) -> datetime: return self.issued diff --git a/csvqb/csvqb/models/cube/cube.py b/csvqb/csvqb/models/cube/cube.py index 380bbad94..999b0277e 100644 --- a/csvqb/csvqb/models/cube/cube.py +++ b/csvqb/csvqb/models/cube/cube.py @@ -2,6 +2,7 @@ Cube ---- """ +from dataclasses import dataclass, field from typing import List, Optional, Set, TypeVar, Generic import pandas as pd @@ -14,16 +15,11 @@ TMetadata = TypeVar("TMetadata", bound=CatalogMetadataBase, covariant=True) +@dataclass class Cube(Generic[TMetadata], PydanticModel): - def __init__( - self, - metadata: TMetadata, - data: Optional[pd.DataFrame] = None, - columns: List[CsvColumn] = [], - ): - self.metadata: TMetadata = metadata - self.data: Optional[pd.DataFrame] = data - self.columns: List[CsvColumn] = columns + metadata: TMetadata + data: Optional[pd.DataFrame] = field(default=None, repr=False) + columns: List[CsvColumn] = field(default_factory=lambda: [], repr=False) def validate(self) -> List[ValidationError]: errors = self.pydantic_validation() diff --git a/csvqb/csvqb/models/pydanticmodel.py b/csvqb/csvqb/models/pydanticmodel.py index de4d6df7a..c62bb81e6 100644 --- a/csvqb/csvqb/models/pydanticmodel.py +++ b/csvqb/csvqb/models/pydanticmodel.py @@ -1,8 +1,9 @@ +import datetime from dataclasses import dataclass, asdict, fields, is_dataclass import pydantic import pydantic.dataclasses from pydantic import BaseConfig -from typing import ClassVar, Dict, Type, List, Iterable +from typing import ClassVar, Dict, Type, List, Iterable, Optional, Tuple from abc import ABC @@ -26,13 +27,27 @@ class Config(BaseConfig): extra = "forbid" arbitrary_types_allowed = True + validate_all = True @classmethod def _get_pydantic_constructor(cls) -> Type: if cls not in PydanticModel._map_class_to_pydantic_constructor: + new_cls = type( + f"{cls.__name__}_pydanticmodel_{datetime.datetime.now().timestamp()}", + (object,), + dict([(f.name, f) for f in fields(cls)]), + ) + + # Annotations need to be built up from all base classes, but overridden as per inheritence. + annotations = {} + for c in reversed(cls.mro()): + annotations_to_add = getattr(c, "__annotations__", {}) + annotations = dict(annotations, **annotations_to_add) + + setattr(new_cls, "__annotations__", annotations) PydanticModel._map_class_to_pydantic_constructor[ cls - ] = pydantic.dataclasses.dataclass(cls, config=PydanticModel.Config) + ] = pydantic.dataclasses.dataclass(new_cls, config=PydanticModel.Config) return PydanticModel._map_class_to_pydantic_constructor[cls] def as_dict(self) -> dict: @@ -49,21 +64,24 @@ def pydantic_validation(self) -> List[ValidationError]: """ pydantic_class_constructor = self.__class__._get_pydantic_constructor() + try: - validated_model = pydantic_class_constructor(**self.as_shallow_dict()) + thingy = self.as_shallow_dict() + validated_model = pydantic_class_constructor(**thingy) except pydantic.ValidationError as error: return [ ValidationError(f"{self} - {e['loc']} - {e['msg']}") for e in error.errors() ] - # Update this model's values with pydantic's coerced values - for field in fields(self): - field_value = getattr(validated_model, field.name) - if value_does_not_contain_pydantic_dataclasses(field_value): - setattr(self, field.name, field_value) + if validated_model is not None: + # Update this model's values with pydantic's coerced values + for field in fields(self): + field_value = getattr(validated_model, field.name) + + if value_does_not_contain_pydantic_dataclasses(field_value): + setattr(self, field.name, field_value) - # there are no validation errors return [] diff --git a/csvqb/csvqb/tests/unit/configloaders/test_infojsontests.py b/csvqb/csvqb/tests/unit/configloaders/test_infojsontests.py index 9a358c42f..e4fbfb0fe 100644 --- a/csvqb/csvqb/tests/unit/configloaders/test_infojsontests.py +++ b/csvqb/csvqb/tests/unit/configloaders/test_infojsontests.py @@ -52,7 +52,7 @@ def test_csv_cols_assumed_dimensions(): errors = cube.validate() errors += validate_qb_component_constraints(cube) - assert len(errors) == 0 + assert_num_validation_errors(errors, 0) def test_multiple_measures_and_units_loaded_in_uri_template(): @@ -114,7 +114,7 @@ def test_multiple_measures_and_units_loaded_in_uri_template(): errors = cube.validate() errors += validate_qb_component_constraints(cube) - assert len(errors) == 0 + assert_num_validation_errors(errors, 0) def test_cube_metadata_extracted_from_info_json(): @@ -222,7 +222,7 @@ def test_cube_metadata_extracted_from_info_json(): errors = cube.validate() errors += validate_qb_component_constraints(cube) - assert len(errors) == 0 + assert_num_validation_errors(errors, 0) if __name__ == "__main__": diff --git a/csvqb/csvqb/tests/unit/cube/qb/test_cubeqb_errorvalidation.py b/csvqb/csvqb/tests/unit/cube/qb/test_cubeqb_errorvalidation.py index f78f40ebd..a8e0685b6 100644 --- a/csvqb/csvqb/tests/unit/cube/qb/test_cubeqb_errorvalidation.py +++ b/csvqb/csvqb/tests/unit/cube/qb/test_cubeqb_errorvalidation.py @@ -99,8 +99,8 @@ def test_existing_dimension_output_uri_template(): QbColumn( "Value", QbSingleMeasureObservationValue( - ExistingQbUnit("http://some/unit"), ExistingQbMeasure("http://some/measure"), + ExistingQbUnit("http://some/unit"), ), ), ], @@ -109,7 +109,7 @@ def test_existing_dimension_output_uri_template(): errors = cube.validate() errors += validate_qb_component_constraints(cube) - assert len(errors) == 1 + assert_num_validation_errors(errors, 1) validation_errors = errors[0] assert ( "'Existing Dimension' - an ExistingQbDimension must have an output_uri_template defined." diff --git a/csvqb/csvqb/tests/unit/pydantic/test_cube.py b/csvqb/csvqb/tests/unit/pydantic/test_cube.py index f79dc3754..1ddfb431e 100644 --- a/csvqb/csvqb/tests/unit/pydantic/test_cube.py +++ b/csvqb/csvqb/tests/unit/pydantic/test_cube.py @@ -1,10 +1,50 @@ +import pandas as pd import pytest from csvqb.models.cube import * +from csvqb.tests.unit.test_baseunit import assert_num_validation_errors -def test_basic_cube_validation(): - assert True +def test_attribute_property_validation(): + """Testing that the pydantic validation does deep validation of a model.""" + metadata = CatalogMetadata("Some Qube") + data = pd.DataFrame({"A": ["a", "b", "c"], "Value": [1, 2, 3]}) + columns = [ + QbColumn( + "A", + NewQbDimension( + "Some New Dimension", + code_list=NewQbCodeList( + CatalogMetadata("Some Code List"), + # N.B. The Concepts shouldn't be strings, this should cause a validation error + concepts=["Hello", "World"], + ), + ), + ), + QbColumn( + "Value", + QbSingleMeasureObservationValue( + NewQbMeasure("Some Measure"), NewQbUnit("Some Unit") + ), + ), + ] + + cube = Cube(metadata, data, columns) + errors = cube.validate() + assert_num_validation_errors(errors, 2) + + # Ensure that the errors are related to the erroneous definition of the concepts as `str`s + # rather than `NewQbConcept`s. + error_1 = errors[0] + assert ( + "('columns', 0, 'component', 'code_list', 'concepts', 0) - instance of NewQbConcept, tuple or dict expected" + in error_1.message + ) + error_2 = errors[1] + assert ( + "('columns', 0, 'component', 'code_list', 'concepts', 1) - instance of NewQbConcept, tuple or dict expected" + in error_2.message + ) if __name__ == "__main__": diff --git a/csvqb/csvqb/tests/unit/pydantic/test_cubemetadata.py b/csvqb/csvqb/tests/unit/pydantic/test_cubemetadata.py index b3ccdc70c..90c0d8186 100644 --- a/csvqb/csvqb/tests/unit/pydantic/test_cubemetadata.py +++ b/csvqb/csvqb/tests/unit/pydantic/test_cubemetadata.py @@ -1,12 +1,14 @@ import pytest from csvqb.models.cube import * +from csvqb.tests.unit.test_baseunit import assert_num_validation_errors def test_basic_cube_metadata_validation(): + """Test that pydantic correctly marks a model as invalid when the wrong datatype is passed.""" invalid_catalog_metadata = CatalogMetadata(title=None) errors = invalid_catalog_metadata.pydantic_validation() - assert len(errors) == 1, print(", ".join([e.message for e in errors])) + assert_num_validation_errors(errors, 1) error = errors[0] assert "none is not an allowed value" in error.message assert "title" in error.message diff --git a/csvqb/csvqb/tests/unit/test_baseunit.py b/csvqb/csvqb/tests/unit/test_baseunit.py index 89964b2f9..ee713afc2 100644 --- a/csvqb/csvqb/tests/unit/test_baseunit.py +++ b/csvqb/csvqb/tests/unit/test_baseunit.py @@ -1,4 +1,7 @@ from pathlib import Path +from typing import List + +from csvqb.models.validationerror import ValidationError def get_test_base_dir() -> Path: @@ -10,3 +13,9 @@ def get_test_base_dir() -> Path: def get_test_cases_dir() -> Path: return get_test_base_dir() / "test-cases" + + +def assert_num_validation_errors( + errors: List[ValidationError], num_errors_expected: int +): + assert num_errors_expected == len(errors), ", ".join([e.message for e in errors]) From 586f59e7538554f5a6b3f6522d3e516cbb88d694 Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Tue, 3 Aug 2021 15:43:59 +0100 Subject: [PATCH 3/7] Issue #57 - Fixed bug where field with a `default_factory` could not be placed after fields with `default` values. --- csvqb/Pipfile.lock | 4 +- csvqb/csvqb/models/cube/csvqb/catalog.py | 6 +-- csvqb/csvqb/models/pydanticmodel.py | 62 +++++++++++------------- 3 files changed, 33 insertions(+), 39 deletions(-) diff --git a/csvqb/Pipfile.lock b/csvqb/Pipfile.lock index 837c834eb..7f52dfa2e 100644 --- a/csvqb/Pipfile.lock +++ b/csvqb/Pipfile.lock @@ -91,7 +91,7 @@ "pydantic": { "editable": true, "git": "https://github.com/robons/pydantic.git", - "ref": "9d8be23e7b43bcf87f10475c50278bf429e1c214" + "ref": "f0339d2178634bee95d9cc6d4e925415881f9f27" }, "pyparsing": { "hashes": [ @@ -172,7 +172,7 @@ "sha256:93aa393e9d6c54c5cd570ccadd8edad61ea0c4b9ea7a01409020c9aa019eb442", "sha256:dd83cd4b5b460958838f6eb3000c660b1f9caf2a5b1de4264e941512f603258a" ], - "markers": "platform_system == 'Darwin'", + "markers": "sys_platform == 'darwin'", "version": "==0.1.2" }, "attrs": { diff --git a/csvqb/csvqb/models/cube/csvqb/catalog.py b/csvqb/csvqb/models/cube/csvqb/catalog.py index ab2c88be1..1774bd674 100644 --- a/csvqb/csvqb/models/cube/csvqb/catalog.py +++ b/csvqb/csvqb/models/cube/csvqb/catalog.py @@ -13,14 +13,14 @@ @dataclass class CatalogMetadata(CatalogMetadataBase, UriIdentifiable): - theme_uris: list[str] = field(default_factory=list, repr=False) - keywords: list[str] = field(default_factory=list, repr=False) - issued: datetime = field(default_factory=lambda: datetime.now(), repr=False) summary: Optional[str] = field(default=None, repr=False) description: Optional[str] = field(default=None, repr=False) creator_uri: Optional[str] = field(default=None, repr=False) publisher_uri: Optional[str] = field(default=None, repr=False) landing_page_uri: Optional[str] = field(default=None, repr=False) + theme_uris: list[str] = field(default_factory=list, repr=False) + keywords: list[str] = field(default_factory=list, repr=False) + issued: datetime = field(default_factory=lambda: datetime.now(), repr=False) license_uri: Optional[str] = field(default=None, repr=False) public_contact_point_uri: Optional[str] = field(default=None, repr=False) uri_safe_identifier_override: Optional[str] = field(default=None, repr=False) diff --git a/csvqb/csvqb/models/pydanticmodel.py b/csvqb/csvqb/models/pydanticmodel.py index c62bb81e6..be13d6c00 100644 --- a/csvqb/csvqb/models/pydanticmodel.py +++ b/csvqb/csvqb/models/pydanticmodel.py @@ -1,14 +1,17 @@ -import datetime +import dataclasses from dataclasses import dataclass, asdict, fields, is_dataclass import pydantic import pydantic.dataclasses from pydantic import BaseConfig -from typing import ClassVar, Dict, Type, List, Iterable, Optional, Tuple +from typing import ClassVar, Dict, Type, List, Iterable, Union from abc import ABC from .validationerror import ValidationError +_map_class_to_pydantic_constructor: ClassVar[Dict[Type, Type]] = dict() +"""_map_class_to_pydantic_constructor - Cache of pydantic constructor corresponding to a given class.""" + @dataclass class PydanticModel(ABC): @@ -19,9 +22,6 @@ class PydanticModel(ABC): validation until the `validate` method is called. """ - _map_class_to_pydantic_constructor: ClassVar[Dict[Type, Type]] = dict() - """_map_class_to_pydantic_constructor - Cache of pydantic constructor corresponding to a given class.""" - class Config(BaseConfig): """pydantic Configuration - see https://pydantic-docs.helpmanual.io/usage/model_config/""" @@ -31,58 +31,52 @@ class Config(BaseConfig): @classmethod def _get_pydantic_constructor(cls) -> Type: - if cls not in PydanticModel._map_class_to_pydantic_constructor: - new_cls = type( - f"{cls.__name__}_pydanticmodel_{datetime.datetime.now().timestamp()}", - (object,), - dict([(f.name, f) for f in fields(cls)]), + if cls not in _map_class_to_pydantic_constructor: + _map_class_to_pydantic_constructor[cls] = pydantic.dataclasses.dataclass( + cls, + config=PydanticModel.Config, ) - - # Annotations need to be built up from all base classes, but overridden as per inheritence. - annotations = {} - for c in reversed(cls.mro()): - annotations_to_add = getattr(c, "__annotations__", {}) - annotations = dict(annotations, **annotations_to_add) - - setattr(new_cls, "__annotations__", annotations) - PydanticModel._map_class_to_pydantic_constructor[ - cls - ] = pydantic.dataclasses.dataclass(new_cls, config=PydanticModel.Config) - return PydanticModel._map_class_to_pydantic_constructor[cls] + return _map_class_to_pydantic_constructor[cls] def as_dict(self) -> dict: """Use python dataclasses method to return this model as a dictionary.""" return asdict(self) - def as_shallow_dict(self) -> dict: + def _as_shallow_dict(self) -> dict: return dict([(f.name, getattr(self, f.name)) for f in fields(self)]) - def pydantic_validation(self) -> List[ValidationError]: - """ - Validate this model using pydantic. - Checks that all model attributes match the expected annotated data type. **Coerces values** where possible. - """ - + def _to_pydantic_dataclass_or_validation_errors( + self, + ) -> Union[object, List[ValidationError]]: pydantic_class_constructor = self.__class__._get_pydantic_constructor() - try: - thingy = self.as_shallow_dict() - validated_model = pydantic_class_constructor(**thingy) + validated_model = pydantic_class_constructor(**self._as_shallow_dict()) except pydantic.ValidationError as error: return [ ValidationError(f"{self} - {e['loc']} - {e['msg']}") for e in error.errors() ] - if validated_model is not None: + return validated_model + + def pydantic_validation(self) -> List[ValidationError]: + """ + Validate this model using pydantic. + Checks that all model attributes match the expected annotated data type. **Coerces values** where possible. + """ + validated_model_or_errors = self._to_pydantic_dataclass_or_validation_errors() + if dataclasses.is_dataclass(validated_model_or_errors): + validated_model = validated_model_or_errors # Update this model's values with pydantic's coerced values for field in fields(self): field_value = getattr(validated_model, field.name) if value_does_not_contain_pydantic_dataclasses(field_value): setattr(self, field.name, field_value) + return [] - return [] + # Else we have validation errors + return validated_model_or_errors def value_does_not_contain_pydantic_dataclasses(value) -> bool: From 4179d380e41f9dc983906281ca515a72cd2cf65c Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Tue, 3 Aug 2021 16:10:47 +0100 Subject: [PATCH 4/7] Small fixes. --- csvqb/csvqb/models/cube/cube.py | 6 +++++- csvqb/csvqb/models/pydanticmodel.py | 7 ++++--- csvqb/csvqb/models/validationerror.py | 3 +++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/csvqb/csvqb/models/cube/cube.py b/csvqb/csvqb/models/cube/cube.py index 999b0277e..f03a610be 100644 --- a/csvqb/csvqb/models/cube/cube.py +++ b/csvqb/csvqb/models/cube/cube.py @@ -23,7 +23,11 @@ class Cube(Generic[TMetadata], PydanticModel): def validate(self) -> List[ValidationError]: errors = self.pydantic_validation() - errors += self._validate_columns() + try: + errors += self._validate_columns() + except Exception as e: + errors.append(ValidationError(str(e))) + return errors def _validate_columns(self) -> List[ValidationError]: diff --git a/csvqb/csvqb/models/pydanticmodel.py b/csvqb/csvqb/models/pydanticmodel.py index be13d6c00..958a27091 100644 --- a/csvqb/csvqb/models/pydanticmodel.py +++ b/csvqb/csvqb/models/pydanticmodel.py @@ -74,9 +74,10 @@ def pydantic_validation(self) -> List[ValidationError]: if value_does_not_contain_pydantic_dataclasses(field_value): setattr(self, field.name, field_value) return [] - - # Else we have validation errors - return validated_model_or_errors + else: + assert isinstance(validated_model_or_errors, list) + # Else we have validation errors + return validated_model_or_errors def value_does_not_contain_pydantic_dataclasses(value) -> bool: diff --git a/csvqb/csvqb/models/validationerror.py b/csvqb/csvqb/models/validationerror.py index 5b4f67fc5..dea606176 100644 --- a/csvqb/csvqb/models/validationerror.py +++ b/csvqb/csvqb/models/validationerror.py @@ -7,3 +7,6 @@ class ValidationError: def __init__(self, message: str): self.message: str = message + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.message})" From d861bc6617da26960a3d3eb474b1500035776421 Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Tue, 3 Aug 2021 16:40:37 +0100 Subject: [PATCH 5/7] Improving some doc string comments. --- csvqb/csvqb/models/pydanticmodel.py | 28 ++++++++++++++++++++------- csvqb/csvqb/models/uriidentifiable.py | 10 +++++++++- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/csvqb/csvqb/models/pydanticmodel.py b/csvqb/csvqb/models/pydanticmodel.py index 958a27091..41b1a9570 100644 --- a/csvqb/csvqb/models/pydanticmodel.py +++ b/csvqb/csvqb/models/pydanticmodel.py @@ -3,7 +3,7 @@ import pydantic import pydantic.dataclasses from pydantic import BaseConfig -from typing import ClassVar, Dict, Type, List, Iterable, Union +from typing import ClassVar, Dict, Type, List, Iterable, Union, Any from abc import ABC @@ -31,6 +31,7 @@ class Config(BaseConfig): @classmethod def _get_pydantic_constructor(cls) -> Type: + """Returns a constructor for creating an instance of this model which is a *pydantic* dataclass.""" if cls not in _map_class_to_pydantic_constructor: _map_class_to_pydantic_constructor[cls] = pydantic.dataclasses.dataclass( cls, @@ -43,11 +44,17 @@ def as_dict(self) -> dict: return asdict(self) def _as_shallow_dict(self) -> dict: + """Returns a dictionary which is essentially a shallow copy of this dataclass.""" return dict([(f.name, getattr(self, f.name)) for f in fields(self)]) def _to_pydantic_dataclass_or_validation_errors( self, ) -> Union[object, List[ValidationError]]: + """ + Converts this model to a pydantic dataclass. Captures any validation errors in the process. + + Returns: Either a pydantic dataclass is validation was successful **OTHERWISE** it returns a list of errors. + """ pydantic_class_constructor = self.__class__._get_pydantic_constructor() try: validated_model = pydantic_class_constructor(**self._as_shallow_dict()) @@ -62,6 +69,7 @@ def _to_pydantic_dataclass_or_validation_errors( def pydantic_validation(self) -> List[ValidationError]: """ Validate this model using pydantic. + Checks that all model attributes match the expected annotated data type. **Coerces values** where possible. """ validated_model_or_errors = self._to_pydantic_dataclass_or_validation_errors() @@ -71,7 +79,9 @@ def pydantic_validation(self) -> List[ValidationError]: for field in fields(self): field_value = getattr(validated_model, field.name) - if value_does_not_contain_pydantic_dataclasses(field_value): + if not value_is_list_of_or_single_pydantic_dataclass(field_value): + # Don't copy objects which have been cast to pydantic dataclasses + # They will bring their validation functionality with them. setattr(self, field.name, field_value) return [] else: @@ -80,16 +90,20 @@ def pydantic_validation(self) -> List[ValidationError]: return validated_model_or_errors -def value_does_not_contain_pydantic_dataclasses(value) -> bool: +def value_is_list_of_or_single_pydantic_dataclass(value: Any) -> bool: + """ + Informs the caller whether the given `value` is a pydantic dataclass, or if it is a list of pydantic dataclasses. + """ value_is_iterable = isinstance(value, Iterable) and not isinstance(value, str) if value_is_iterable: # Only copy iterables if all of their items can be copied. - return all([value_does_not_contain_pydantic_dataclasses(v) for v in value]) + return not any( + [value_is_list_of_or_single_pydantic_dataclass(v) for v in value] + ) elif isinstance(value, object): - # Don't copy object which have been cast to pydantic dataclasses cls = value.__class__ - return (not is_dataclass(cls)) or pydantic.dataclasses.is_builtin_dataclass( - value.__class__ + return is_dataclass(cls) and ( + not pydantic.dataclasses.is_builtin_dataclass(cls) ) # Anything else should be fine. diff --git a/csvqb/csvqb/models/uriidentifiable.py b/csvqb/csvqb/models/uriidentifiable.py index 661c9c890..aaf1028f7 100644 --- a/csvqb/csvqb/models/uriidentifiable.py +++ b/csvqb/csvqb/models/uriidentifiable.py @@ -7,7 +7,13 @@ @dataclasses.dataclass class UriIdentifiable(ABC): - """Requires that implementing classes provide callers with a way of overriding the uri_safe_identifier property.""" + """ + Mixin which allows a class to represent something which is URI addressable. + + It allows the user to provide a `uri_safe_identifier_override` or neglect to provide one. If an override is not + provided, then the string identifier returned by the abstract method `get_identifier` is turned into a URI-safe + token which is returned by the `uri_safe_identifier` property. + """ @abstractmethod def get_identifier(self) -> str: @@ -19,6 +25,7 @@ def get_identifier(self) -> str: @property @abstractmethod def uri_safe_identifier_override(self) -> Optional[str]: + """An override for the URI-safe string which should be used to identify this object.""" pass @uri_safe_identifier_override.setter @@ -28,6 +35,7 @@ def uri_safe_identifier_override(self, value: Optional[str]) -> None: @property def uri_safe_identifier(self) -> str: + """A URI-safe string which should be used to identify this object.""" return self.uri_safe_identifier_override or uri_safe(self.get_identifier()) @uri_safe_identifier.setter From 27bbfa90421da9bc0bcc61bf4dec1526588914b4 Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Tue, 3 Aug 2021 17:17:08 +0100 Subject: [PATCH 6/7] Oops. --- csvqb/csvqb/models/cube/csvqb/components/dimension.py | 1 - csvqb/csvqb/models/pydanticmodel.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/csvqb/csvqb/models/cube/csvqb/components/dimension.py b/csvqb/csvqb/models/cube/csvqb/components/dimension.py index 6c55bcdcf..d875697bc 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/dimension.py +++ b/csvqb/csvqb/models/cube/csvqb/components/dimension.py @@ -40,7 +40,6 @@ def validate_data(self, data: PandasDataTypes) -> List[ValidationError]: class NewQbDimension(QbDimension, UriIdentifiable): label: str description: Optional[str] = field(default=None, repr=False) - # todo: Ensure we link the code-list to the qb column component somehow code_list: Optional[QbCodeList] = field(default=None, repr=False) parent_dimension_uri: Optional[str] = field(default=None, repr=False) source_uri: Optional[str] = field(default=None, repr=False) diff --git a/csvqb/csvqb/models/pydanticmodel.py b/csvqb/csvqb/models/pydanticmodel.py index 41b1a9570..2b0ad3ce2 100644 --- a/csvqb/csvqb/models/pydanticmodel.py +++ b/csvqb/csvqb/models/pydanticmodel.py @@ -97,9 +97,7 @@ def value_is_list_of_or_single_pydantic_dataclass(value: Any) -> bool: value_is_iterable = isinstance(value, Iterable) and not isinstance(value, str) if value_is_iterable: # Only copy iterables if all of their items can be copied. - return not any( - [value_is_list_of_or_single_pydantic_dataclass(v) for v in value] - ) + return any([value_is_list_of_or_single_pydantic_dataclass(v) for v in value]) elif isinstance(value, object): cls = value.__class__ return is_dataclass(cls) and ( From c3ef54d6b0a7c93effa30eeeba725fa25c0c374b Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Tue, 3 Aug 2021 17:17:32 +0100 Subject: [PATCH 7/7] Oops. --- csvqb/csvqb/models/pydanticmodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csvqb/csvqb/models/pydanticmodel.py b/csvqb/csvqb/models/pydanticmodel.py index 2b0ad3ce2..779e5c464 100644 --- a/csvqb/csvqb/models/pydanticmodel.py +++ b/csvqb/csvqb/models/pydanticmodel.py @@ -105,4 +105,4 @@ def value_is_list_of_or_single_pydantic_dataclass(value: Any) -> bool: ) # Anything else should be fine. - return True + return False