Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #57 - Introducing DataClasses and Pydantic (Static Type) Validation #99

Merged
merged 7 commits into from
Aug 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions csvqb/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pandas = "*"
unidecode = "*"
csvwlib-models = {editable = true,path = "./../sharedmodels"}
rdflib-jsonld = "*"
pydantic = {editable = true,git = "https://github.com/robons/pydantic.git"}

[requires]
python_version = "3.9"
Expand Down
17 changes: 15 additions & 2 deletions csvqb/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 8 additions & 5 deletions csvqb/csvqb/configloaders/infojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
N.B. this should **not** be used by external users and should be moved into the gss-utils package in Issue #101:
https://github.com/GSS-Cogs/csvwlib/issues/101
"""

import datetime
from typing import Dict, List, Any, Optional, Union
from pathlib import Path
import json
Expand Down Expand Up @@ -103,19 +103,20 @@ def _metadata_from_dict(config: dict) -> "CatalogMetadata":
config, "publisher", lambda p: str(GOV[uri_safe(p)])
)
theme_uris = [str(GDP.term(t)) for t in config.get("families", [])]
dt_issued = get_with_func_or_none(config, "published", parser.parse) or datetime.datetime.now()
return CatalogMetadata(
get_from_dict_ensure_exists(config, "title"),
uri_safe_identifier=get_from_dict_ensure_exists(config, "id"),
title=get_from_dict_ensure_exists(config, "title"),
summary=config.get("summary"),
description=config.get("description"),
creator_uri=publisher,
publisher_uri=publisher,
issued=get_with_func_or_none(config, "published", parser.parse),
issued=dt_issued,
theme_uris=theme_uris,
keywords=config.get("keywords", []),
landing_page_uri=config.get("landingPage"),
license_uri=config.get("license"),
public_contact_point_uri=config.get("contactUri"),
uri_safe_identifier_override=get_from_dict_ensure_exists(config, "id"),
)


Expand Down Expand Up @@ -215,7 +216,9 @@ def _get_column_for_metadata_config(
measure_component = ExistingQbMeasure(maybe_measure_uri)
unit_component = ExistingQbUnit(maybe_unit_uri)
observation_value = QbSingleMeasureObservationValue(
measure_component, unit_component, maybe_data_type
measure=measure_component,
unit=unit_component,
data_type=maybe_data_type or "decimal"
)
return QbColumn(column_name, observation_value)
elif maybe_data_type is not None:
Expand Down
29 changes: 14 additions & 15 deletions csvqb/csvqb/models/cube/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,22 @@
Catalog Metadata (base)
-----------------------
"""
from dataclasses import dataclass
from datetime import datetime
from typing import Optional, List
from abc import ABC
from typing import Optional
from abc import ABC, abstractmethod

from csvqb.models.validationerror import ValidationError
from csvqb.models.pydanticmodel import PydanticModel


class CatalogMetadataBase(ABC):
def __init__(
self,
title: str,
description: Optional[str] = None,
issued: Optional[datetime] = None,
):
self.title: str = title
self.description: Optional[str] = description
self.issued: Optional[datetime] = issued
@dataclass
class CatalogMetadataBase(PydanticModel, ABC):
title: str
canwaf marked this conversation as resolved.
Show resolved Hide resolved

def validate(self) -> List[ValidationError]:
return [] # TODO: implement this
@abstractmethod
def get_description(self) -> Optional[str]:
pass

@abstractmethod
def get_issued(self) -> datetime:
pass
45 changes: 14 additions & 31 deletions csvqb/csvqb/models/cube/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,33 @@
----------------------
"""
from abc import ABC, abstractmethod
import pandas as pd
from dataclasses import dataclass, field
from typing import Optional, List


from csvqb.utils.uri import uri_safe
from csvqb.inputs import PandasDataTypes
from csvqb.models.pydanticmodel import PydanticModel
from csvqb.models.uriidentifiable import UriIdentifiable
from csvqb.models.validationerror import ValidationError


class CsvColumn(ABC):
def __init__(
self, csv_column_title: str, uri_safe_identifier: Optional[str] = None
):
self.csv_column_title: str = csv_column_title
self.uri_safe_identifier: str = (
uri_safe(csv_column_title)
if uri_safe_identifier is None
else uri_safe_identifier
)
@dataclass
class CsvColumn(PydanticModel, UriIdentifiable, ABC):
csv_column_title: str

@abstractmethod
def __str__(self) -> str:
pass
def get_identifier(self) -> str:
return self.csv_column_title

@abstractmethod
def validate(
self, column_data: Optional[pd.Series] = None
) -> List[ValidationError]:
def validate_data(self, data: PandasDataTypes) -> List[ValidationError]:
pass


@dataclass
class SuppressedCsvColumn(CsvColumn):
"""
A column which is only defined in the CSV and should not be propagated.
"""
uri_safe_identifier_override: Optional[str] = field(default=None, repr=False)

def __init__(
self, csv_column_title: str, uri_safe_identifier: Optional[str] = None
):
CsvColumn.__init__(self, csv_column_title, uri_safe_identifier)

def __str__(self) -> str:
return f"SuppressedCsvColumn('{self.csv_column_title}')"

def validate(
self, column_data: Optional[pd.Series] = None
) -> List[ValidationError]:
return [] # TODO: implement this
def validate_data(self, data: PandasDataTypes) -> List[ValidationError]:
return []
57 changes: 24 additions & 33 deletions csvqb/csvqb/models/cube/csvqb/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,37 @@
Catalog Metadata (DCAT)
-----------------------
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional, List
from typing import Optional
from sharedmodels.rdf import dcat

from csvqb.models.validationerror import ValidationError
from csvqb.utils.uri import uri_safe
from csvqb.models.cube.catalog import CatalogMetadataBase
from csvqb.models.uriidentifiable import UriIdentifiable


class CatalogMetadata(CatalogMetadataBase):
def __init__(
self,
title: str,
uri_safe_identifier: Optional[str] = None,
summary: Optional[str] = None,
description: Optional[str] = None,
creator_uri: Optional[str] = None,
publisher_uri: Optional[str] = None,
issued: Optional[datetime] = None,
theme_uris: List[str] = [],
keywords: List[str] = [],
landing_page_uri: Optional[str] = None,
license_uri: Optional[str] = None,
public_contact_point_uri: Optional[str] = None,
):
CatalogMetadataBase.__init__(
self, title, description=description, issued=issued
)
self.uri_safe_identifier: str = uri_safe_identifier or uri_safe(title)
self.summary: Optional[str] = summary
self.creator_uri: Optional[str] = creator_uri
self.publisher_uri: Optional[str] = publisher_uri
self.theme_uris: List[str] = theme_uris
self.keywords: List[str] = keywords
self.landing_page_uri: Optional[str] = landing_page_uri
self.license_uri: Optional[str] = license_uri
self.public_contact_point_uri: Optional[str] = public_contact_point_uri
@dataclass
class CatalogMetadata(CatalogMetadataBase, UriIdentifiable):
summary: Optional[str] = field(default=None, repr=False)
description: Optional[str] = field(default=None, repr=False)
creator_uri: Optional[str] = field(default=None, repr=False)
publisher_uri: Optional[str] = field(default=None, repr=False)
landing_page_uri: Optional[str] = field(default=None, repr=False)
theme_uris: list[str] = field(default_factory=list, repr=False)
keywords: list[str] = field(default_factory=list, repr=False)
issued: datetime = field(default_factory=lambda: datetime.now(), repr=False)
license_uri: Optional[str] = field(default=None, repr=False)
public_contact_point_uri: Optional[str] = field(default=None, repr=False)
uri_safe_identifier_override: Optional[str] = field(default=None, repr=False)

def validate(self) -> List[ValidationError]:
return CatalogMetadataBase.validate(self) + [] # TODO: augment this
def get_issued(self) -> datetime:
return self.issued

def get_description(self) -> Optional[str]:
return self.description

def get_identifier(self) -> str:
return self.title

def configure_dcat_dataset(self, dataset: dcat.Dataset) -> None:
dt_now = datetime.now()
Expand Down
38 changes: 10 additions & 28 deletions csvqb/csvqb/models/cube/csvqb/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,28 @@
Columns with qb Components
--------------------------
"""
from dataclasses import field, dataclass
from typing import Optional, TypeVar, Generic, List
import pandas as pd


from csvqb.inputs import PandasDataTypes, pandas_input_to_columnar
from .components.datastructuredefinition import ColumnarQbDataStructureDefinition
from csvqb.models.validationerror import ValidationError
from csvqb.models.cube.columns import CsvColumn

from ...validationerror import ValidationError

QbColumnarDsdType = TypeVar(
"QbColumnarDsdType", bound=ColumnarQbDataStructureDefinition, covariant=True
)


@dataclass
class QbColumn(CsvColumn, Generic[QbColumnarDsdType]):
robons marked this conversation as resolved.
Show resolved Hide resolved
"""
A CSV column and the qb components it relates to.
"""
csv_column_title: str
component: QbColumnarDsdType
output_uri_template: Optional[str] = field(default=None, repr=False)
uri_safe_identifier_override: Optional[str] = field(default=None, repr=False)

def __init__(
self,
csv_column_title: str,
component: QbColumnarDsdType,
output_uri_template: Optional[str] = None,
uri_safe_identifier: Optional[str] = None,
):
CsvColumn.__init__(self, csv_column_title, uri_safe_identifier)
if not isinstance(component, ColumnarQbDataStructureDefinition):
raise Exception(
f"{component} of type {type(component)} is not a valid columnar component."
)
self.component: QbColumnarDsdType = component
self.output_uri_template: Optional[str] = output_uri_template

def __str__(self) -> str:
return f"QbColumn('{self.csv_column_title}', {self.component})"

def validate(self, column_data: Optional[pd.Series]) -> List[ValidationError]:
errors = self.component.validate()
if column_data is not None:
errors += self.component.validate_data(column_data)

return errors
def validate_data(self, data: PandasDataTypes) -> List[ValidationError]:
return self.component.validate_data(data)
Loading