From 8136a578814568fac0f9e48063512730f7b13c1d Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Fri, 23 Jul 2021 14:20:25 +0100 Subject: [PATCH] Just doing whatever the `black` formatter tells me to do. --- csvqb/csvqb/models/cube/catalog.py | 11 +- csvqb/csvqb/models/cube/columns.py | 25 +- csvqb/csvqb/models/cube/csvqb/catalog.py | 36 +- csvqb/csvqb/models/cube/csvqb/columns.py | 22 +- .../models/cube/csvqb/components/attribute.py | 58 ++- .../models/cube/csvqb/components/codelist.py | 40 +- .../components/datastructuredefinition.py | 9 +- .../models/cube/csvqb/components/dimension.py | 62 +-- .../models/cube/csvqb/components/measure.py | 36 +- .../cube/csvqb/components/observedvalue.py | 20 +- .../models/cube/csvqb/components/unit.py | 49 +- csvqb/csvqb/models/cube/cube.py | 26 +- .../models/rdf/conceptschemeincatalog.py | 4 +- .../tests/behaviour/steps/skoscodelists.py | 19 +- .../tests/unit/configloaders/infojsontests.py | 89 +++- .../csvqb/tests/unit/cube/errorvalidation.py | 15 +- .../tests/unit/cube/qb/errorvalidation.py | 107 +++-- .../csvqb/tests/unit/writers/qbwritertests.py | 447 ++++++++++++------ .../unit/writers/skoscodelistwritertests.py | 24 +- csvqb/csvqb/utils/dict.py | 6 +- csvqb/csvqb/utils/uri.py | 12 +- csvqb/csvqb/writers/qbwriter.py | 299 ++++++++---- csvqb/csvqb/writers/skoscodelistwriter.py | 49 +- devtools/devtools/behave/csv2rdf.py | 14 +- devtools/devtools/behave/csvlint.py | 13 +- devtools/devtools/behave/rdf.py | 19 +- devtools/devtools/behave/sparqltests.py | 12 +- devtools/devtools/helpers/csvwhelpers.py | 1 - devtools/devtools/helpers/tar.py | 3 +- 29 files changed, 990 insertions(+), 537 deletions(-) diff --git a/csvqb/csvqb/models/cube/catalog.py b/csvqb/csvqb/models/cube/catalog.py index 52127500f..d3dc08b22 100644 --- a/csvqb/csvqb/models/cube/catalog.py +++ b/csvqb/csvqb/models/cube/catalog.py @@ -6,11 +6,12 @@ class CatalogMetadataBase(ABC): - - def __init__(self, - title: str, - description: Optional[str] = None, - issued: Optional[datetime] = None): + def __init__( + self, + title: str, + description: Optional[str] = None, + issued: Optional[datetime] = None, + ): self.title: str = title self.description: Optional[str] = description self.issued: Optional[datetime] = issued diff --git a/csvqb/csvqb/models/cube/columns.py b/csvqb/csvqb/models/cube/columns.py index dc83bbed1..3f6a36ef2 100644 --- a/csvqb/csvqb/models/cube/columns.py +++ b/csvqb/csvqb/models/cube/columns.py @@ -8,30 +8,41 @@ class CsvColumn(ABC): - - def __init__(self, csv_column_title: str, uri_safe_identifier: Optional[str] = None): + def __init__( + self, csv_column_title: str, uri_safe_identifier: Optional[str] = None + ): self.csv_column_title: str = csv_column_title - self.uri_safe_identifier: str = uri_safe(csv_column_title) if uri_safe_identifier is None else uri_safe_identifier + self.uri_safe_identifier: str = ( + uri_safe(csv_column_title) + if uri_safe_identifier is None + else uri_safe_identifier + ) @abstractmethod def __str__(self) -> str: pass @abstractmethod - def validate(self, column_data: Optional[pd.Series] = None) -> List[ValidationError]: + def validate( + self, column_data: Optional[pd.Series] = None + ) -> List[ValidationError]: pass class SuppressedCsvColumn(CsvColumn): """ - A column which is only defined in the CSV and should not be propagated. + A column which is only defined in the CSV and should not be propagated. """ - def __init__(self, csv_column_title: str, uri_safe_identifier: Optional[str] = None): + def __init__( + self, csv_column_title: str, uri_safe_identifier: Optional[str] = None + ): CsvColumn.__init__(self, csv_column_title, uri_safe_identifier) def __str__(self) -> str: return f"SuppressedCsvColumn('{self.csv_column_title}')" - def validate(self, column_data: Optional[pd.Series] = None) -> List[ValidationError]: + def validate( + self, column_data: Optional[pd.Series] = None + ) -> List[ValidationError]: return [] # TODO: implement this diff --git a/csvqb/csvqb/models/cube/csvqb/catalog.py b/csvqb/csvqb/models/cube/csvqb/catalog.py index 33265917a..e0b6996cb 100644 --- a/csvqb/csvqb/models/cube/csvqb/catalog.py +++ b/csvqb/csvqb/models/cube/csvqb/catalog.py @@ -7,21 +7,24 @@ class CatalogMetadata(CatalogMetadataBase): - - def __init__(self, - title: str, - uri_safe_identifier: Optional[str] = None, - summary: Optional[str] = None, - description: Optional[str] = None, - creator_uri: Optional[str] = None, - publisher_uri: Optional[str] = None, - issued: Optional[datetime] = None, - theme_uris: List[str] = [], - keywords: List[str] = [], - landing_page_uri: Optional[str] = None, - license_uri: Optional[str] = None, - public_contact_point_uri: Optional[str] = None): - CatalogMetadataBase.__init__(self, title, description=description, issued=issued) + def __init__( + self, + title: str, + uri_safe_identifier: Optional[str] = None, + summary: Optional[str] = None, + description: Optional[str] = None, + creator_uri: Optional[str] = None, + publisher_uri: Optional[str] = None, + issued: Optional[datetime] = None, + theme_uris: List[str] = [], + keywords: List[str] = [], + landing_page_uri: Optional[str] = None, + license_uri: Optional[str] = None, + public_contact_point_uri: Optional[str] = None, + ): + CatalogMetadataBase.__init__( + self, title, description=description, issued=issued + ) self.uri_safe_identifier: str = uri_safe_identifier or uri_safe(title) self.summary: Optional[str] = summary self.creator_uri: Optional[str] = creator_uri @@ -33,5 +36,4 @@ def __init__(self, self.public_contact_point_uri: Optional[str] = public_contact_point_uri def validate(self) -> List[ValidationError]: - return CatalogMetadataBase.validate(self) \ - + [] # TODO: augment this + return CatalogMetadataBase.validate(self) + [] # TODO: augment this diff --git a/csvqb/csvqb/models/cube/csvqb/columns.py b/csvqb/csvqb/models/cube/csvqb/columns.py index 46959f035..647412f3d 100644 --- a/csvqb/csvqb/models/cube/csvqb/columns.py +++ b/csvqb/csvqb/models/cube/csvqb/columns.py @@ -7,22 +7,28 @@ from csvqb.models.cube.columns import CsvColumn -QbColumnarDsdType = TypeVar("QbColumnarDsdType", bound=ColumnarQbDataStructureDefinition, covariant=True) +QbColumnarDsdType = TypeVar( + "QbColumnarDsdType", bound=ColumnarQbDataStructureDefinition, covariant=True +) class QbColumn(CsvColumn, Generic[QbColumnarDsdType]): """ - A CSV column and the qb components it relates to. + A CSV column and the qb components it relates to. """ - def __init__(self, - csv_column_title: str, - component: QbColumnarDsdType, - output_uri_template: Optional[str] = None, - uri_safe_identifier: Optional[str] = None): + def __init__( + self, + csv_column_title: str, + component: QbColumnarDsdType, + output_uri_template: Optional[str] = None, + uri_safe_identifier: Optional[str] = None, + ): CsvColumn.__init__(self, csv_column_title, uri_safe_identifier) if not isinstance(component, ColumnarQbDataStructureDefinition): - raise Exception(f"{component} of type {type(component)} is not a valid columnar component.") + raise Exception( + f"{component} of type {type(component)} is not a valid columnar component." + ) self.component: QbColumnarDsdType = component self.output_uri_template: Optional[str] = output_uri_template diff --git a/csvqb/csvqb/models/cube/csvqb/components/attribute.py b/csvqb/csvqb/models/cube/csvqb/components/attribute.py index c79fba6a5..7333ad5b2 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/attribute.py +++ b/csvqb/csvqb/models/cube/csvqb/components/attribute.py @@ -17,7 +17,6 @@ def __init__(self, is_required: bool): class ExistingQbAttribute(QbAttribute): - def __init__(self, uri: str, is_required: bool = False): QbAttribute.__init__(self, is_required) self.attribute_uri: str = uri @@ -33,39 +32,46 @@ def validate_data(self, data: pd.Series) -> List[ValidationError]: class NewQbAttribute(QbAttribute): - - def __init__(self, - label: str, - uri_safe_identifier: Optional[str] = None, - description: Optional[str] = None, - code_list: Optional[QbCodeList] = None, - parent_attribute_uri: Optional[str] = None, - source_uri: Optional[str] = None, - is_required: bool = False): + def __init__( + self, + label: str, + uri_safe_identifier: Optional[str] = None, + description: Optional[str] = None, + code_list: Optional[QbCodeList] = None, + parent_attribute_uri: Optional[str] = None, + source_uri: Optional[str] = None, + is_required: bool = False, + ): QbAttribute.__init__(self, is_required) self.label: str = label - self.uri_safe_identifier: str = uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) + self.uri_safe_identifier: str = ( + uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) + ) self.description: Optional[str] = description self.code_list: Optional[QbCodeList] = code_list self.parent_attribute_uri: Optional[str] = parent_attribute_uri self.source_uri: Optional[str] = source_uri @staticmethod - def from_data(label: str, - data: PandasDataTypes, - uri_safe_identifier: Optional[str] = None, - description: Optional[str] = None, - parent_attribute_uri: Optional[str] = None, - source_uri: Optional[str] = None, - is_required: bool = False): - - return NewQbAttribute(label, - uri_safe_identifier=uri_safe_identifier, - description=description, - code_list=NewQbCodeList.from_data(CatalogMetadata(label), data), - parent_attribute_uri=parent_attribute_uri, - source_uri=source_uri, - is_required=is_required) + def from_data( + label: str, + data: PandasDataTypes, + uri_safe_identifier: Optional[str] = None, + description: Optional[str] = None, + parent_attribute_uri: Optional[str] = None, + source_uri: Optional[str] = None, + is_required: bool = False, + ): + + return NewQbAttribute( + label, + uri_safe_identifier=uri_safe_identifier, + description=description, + code_list=NewQbCodeList.from_data(CatalogMetadata(label), data), + parent_attribute_uri=parent_attribute_uri, + source_uri=source_uri, + is_required=is_required, + ) def __str__(self) -> str: return f"NewQbAttribute('{self.label}')" diff --git a/csvqb/csvqb/models/cube/csvqb/components/codelist.py b/csvqb/csvqb/models/cube/csvqb/components/codelist.py index e082acdeb..0f1e69317 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/codelist.py +++ b/csvqb/csvqb/models/cube/csvqb/components/codelist.py @@ -33,13 +33,14 @@ def validate_data(self, data: pd.Series) -> List[ValidationError]: class NewQbConcept: - - def __init__(self, - label: str, - code: Optional[str] = None, - parent_code: Optional[str] = None, - sort_order: Optional[int] = None, - description: Optional[str] = None): + def __init__( + self, + label: str, + code: Optional[str] = None, + parent_code: Optional[str] = None, + sort_order: Optional[int] = None, + description: Optional[str] = None, + ): self.label: str = label self.code: str = code or uri_safe(label) self.parent_code: Optional[str] = parent_code @@ -58,10 +59,12 @@ class NewQbCodeList(QbCodeList): Contains the metadata necessary to create a new skos:ConceptScheme which is local to a dataset. """ - def __init__(self, - metadata: CatalogMetadata, - concepts: List[NewQbConcept], - variant_of_uris: List[str] = []): + def __init__( + self, + metadata: CatalogMetadata, + concepts: List[NewQbConcept], + variant_of_uris: List[str] = [], + ): self.metadata: CatalogMetadata = metadata self.concepts: List[NewQbConcept] = concepts self.variant_of_uris: List[str] = variant_of_uris # For xkos:variant usage. @@ -70,18 +73,17 @@ def __str__(self) -> str: return f"NewQbCodeList('{self.metadata.title}')" @staticmethod - def from_data(metadata: CatalogMetadata, - data: PandasDataTypes, - variant_of_uris: List[str] = []) -> "NewQbCodeList": + def from_data( + metadata: CatalogMetadata, + data: PandasDataTypes, + variant_of_uris: List[str] = [], + ) -> "NewQbCodeList": columnar_data = pandas_input_to_columnar_str(data) concepts = [NewQbConcept(c) for c in sorted(set(columnar_data))] - return NewQbCodeList(metadata, - concepts, - variant_of_uris=variant_of_uris) + return NewQbCodeList(metadata, concepts, variant_of_uris=variant_of_uris) def validate(self) -> List[ValidationError]: - return self.metadata.validate() \ - + [] # TODO: augment this. + return self.metadata.validate() + [] # TODO: augment this. def validate_data(self, data: pd.Series) -> List[ValidationError]: return [] # TODO: implement this. diff --git a/csvqb/csvqb/models/cube/csvqb/components/datastructuredefinition.py b/csvqb/csvqb/models/cube/csvqb/components/datastructuredefinition.py index 447664369..7a589700b 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/datastructuredefinition.py +++ b/csvqb/csvqb/models/cube/csvqb/components/datastructuredefinition.py @@ -8,8 +8,8 @@ class QbDataStructureDefinition(ABC): """ - Base class for entities holding information necessary to generate one or many qb DataStructureDefinition (DSD) - components. + Base class for entities holding information necessary to generate one or many qb DataStructureDefinition (DSD) + components. """ @abstractmethod @@ -30,14 +30,15 @@ def __str__(self) -> str: class ColumnarQbDataStructureDefinition(QbDataStructureDefinition, ABC): """ - Base class representing Qb Data Structure Definitions which can be directly attached to a pd.DataFrame column. + Base class representing Qb Data Structure Definitions which can be directly attached to a pd.DataFrame column. """ + pass class MultiQbDataStructureDefinition(ColumnarQbDataStructureDefinition, ABC): """ - Base class representing an entity which defines a group of `QbDataStructureDefinition`s + Base class representing an entity which defines a group of `QbDataStructureDefinition`s """ @abstractmethod diff --git a/csvqb/csvqb/models/cube/csvqb/components/dimension.py b/csvqb/csvqb/models/cube/csvqb/components/dimension.py index 6213cd62b..8f7d148b0 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/dimension.py +++ b/csvqb/csvqb/models/cube/csvqb/components/dimension.py @@ -17,10 +17,7 @@ def __init__(self, range_uri: Optional[str]): class ExistingQbDimension(QbDimension): - - def __init__(self, - dimension_uri: str, - range_uri: Optional[str] = None): + def __init__(self, dimension_uri: str, range_uri: Optional[str] = None): QbDimension.__init__(self, range_uri) self.dimension_uri: str = dimension_uri self.range_uri: Optional[str] = range_uri @@ -36,20 +33,23 @@ def validate_data(self, data: pd.Series) -> List[ValidationError]: class NewQbDimension(QbDimension): - - def __init__(self, - label: str, - description: Optional[str] = None, - uri_safe_identifier: Optional[str] = None, - # todo: Ensure we link the code-list to the qb column component somehow - code_list: Optional[QbCodeList] = None, - parent_dimension_uri: Optional[str] = None, - source_uri: Optional[str] = None, - range_uri: Optional[str] = None): + def __init__( + self, + label: str, + description: Optional[str] = None, + uri_safe_identifier: Optional[str] = None, + # todo: Ensure we link the code-list to the qb column component somehow + code_list: Optional[QbCodeList] = None, + parent_dimension_uri: Optional[str] = None, + source_uri: Optional[str] = None, + range_uri: Optional[str] = None, + ): QbDimension.__init__(self, range_uri) self.label: str = label self.description: Optional[str] = description - self.uri_safe_identifier: str = uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) + self.uri_safe_identifier: str = ( + uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) + ) self.code_list: Optional[QbCodeList] = code_list self.parent_dimension_uri: Optional[str] = parent_dimension_uri self.source_uri: Optional[str] = source_uri @@ -58,13 +58,15 @@ def __str__(self) -> str: return f"NewQbDimension('{self.label}')" @staticmethod - def from_data(label: str, - data: PandasDataTypes, - description: Optional[str] = None, - uri_safe_identifier: Optional[str] = None, - parent_dimension_uri: Optional[str] = None, - source_uri: Optional[str] = None, - range_uri: Optional[str] = None) -> "NewQbDimension": + def from_data( + label: str, + data: PandasDataTypes, + description: Optional[str] = None, + uri_safe_identifier: Optional[str] = None, + parent_dimension_uri: Optional[str] = None, + source_uri: Optional[str] = None, + range_uri: Optional[str] = None, + ) -> "NewQbDimension": """ Creates a new dimension and code list from the columnar data provided. :param label: @@ -76,13 +78,15 @@ def from_data(label: str, :param range_uri: :return: NewQbDimension """ - return NewQbDimension(label, - description=description, - uri_safe_identifier=uri_safe_identifier, - code_list=NewQbCodeList.from_data(CatalogMetadata(label), data), - parent_dimension_uri=parent_dimension_uri, - source_uri=source_uri, - range_uri=range_uri) + return NewQbDimension( + label, + description=description, + uri_safe_identifier=uri_safe_identifier, + code_list=NewQbCodeList.from_data(CatalogMetadata(label), data), + parent_dimension_uri=parent_dimension_uri, + source_uri=source_uri, + range_uri=range_uri, + ) def validate(self) -> List[ValidationError]: # todo: Add more validation checks diff --git a/csvqb/csvqb/models/cube/csvqb/components/measure.py b/csvqb/csvqb/models/cube/csvqb/components/measure.py index b0d59f9d3..da3da4bef 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/measure.py +++ b/csvqb/csvqb/models/cube/csvqb/components/measure.py @@ -4,7 +4,10 @@ from csvqb.utils.uri import uri_safe -from .datastructuredefinition import MultiQbDataStructureDefinition, QbDataStructureDefinition +from .datastructuredefinition import ( + MultiQbDataStructureDefinition, + QbDataStructureDefinition, +) from .dimension import ExistingQbDimension from csvqb.models.validationerror import ValidationError from csvqb.inputs import PandasDataTypes, pandas_input_to_columnar_str @@ -30,16 +33,20 @@ def validate_data(self, data: pd.Series) -> List[ValidationError]: class NewQbMeasure(QbMeasure): - def __init__(self, - label: str, - description: Optional[str] = None, - uri_safe_identifier: Optional[str] = None, - parent_measure_uri: Optional[str] = None, - source_uri: Optional[str] = None): + def __init__( + self, + label: str, + description: Optional[str] = None, + uri_safe_identifier: Optional[str] = None, + parent_measure_uri: Optional[str] = None, + source_uri: Optional[str] = None, + ): QbMeasure.__init__(self) self.label: str = label self.description: Optional[str] = description - self.uri_safe_identifier: str = uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) + self.uri_safe_identifier: str = ( + uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) + ) self.parent_measure_uri: Optional[str] = parent_measure_uri self.source_uri: Optional[str] = source_uri @@ -55,8 +62,9 @@ def validate_data(self, data: pd.Series) -> List[ValidationError]: class QbMultiMeasureDimension(MultiQbDataStructureDefinition): """ - Represents the measure types permitted in a multi-measure cube. + Represents the measure types permitted in a multi-measure cube. """ + def __init__(self, measures: List[QbMeasure]): self.measures: List[QbMeasure] = measures @@ -67,7 +75,9 @@ def __str__(self) -> str: @staticmethod def new_measures_from_data(data: PandasDataTypes) -> "QbMultiMeasureDimension": columnar_data = pandas_input_to_columnar_str(data) - return QbMultiMeasureDimension([NewQbMeasure(m) for m in sorted(set(columnar_data))]) + return QbMultiMeasureDimension( + [NewQbMeasure(m) for m in sorted(set(columnar_data))] + ) def validate(self) -> List[ValidationError]: return [] # TODO: implement this @@ -81,5 +91,7 @@ def get_qb_components(self) -> List[QbDataStructureDefinition]: return components -QbMeasureTypeDimension = ExistingQbDimension("http://purl.org/linked-data/cube#measureType", - range_uri="http://purl.org/linked-data/cube#MeasureProperty") +QbMeasureTypeDimension = ExistingQbDimension( + "http://purl.org/linked-data/cube#measureType", + range_uri="http://purl.org/linked-data/cube#MeasureProperty", +) diff --git a/csvqb/csvqb/models/cube/csvqb/components/observedvalue.py b/csvqb/csvqb/models/cube/csvqb/components/observedvalue.py index 1df77bf2c..bec53ee9d 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/observedvalue.py +++ b/csvqb/csvqb/models/cube/csvqb/components/observedvalue.py @@ -3,7 +3,10 @@ from abc import ABC -from .datastructuredefinition import MultiQbDataStructureDefinition, QbDataStructureDefinition +from .datastructuredefinition import ( + MultiQbDataStructureDefinition, + QbDataStructureDefinition, +) from .measure import QbMeasure, QbMeasureTypeDimension from .unit import QbUnit, QbUnitAttribute from csvqb.models.validationerror import ValidationError @@ -40,14 +43,17 @@ def get_qb_components(self) -> List[QbDataStructureDefinition]: class QbSingleMeasureObservationValue(QbObservationValue): """ - Represents the unit/measure/datatype components necessary to define a simple qb:Observation. + Represents the unit/measure/datatype components necessary to define a simple qb:Observation. - N.B. Requires `virt_unit` and `virt_measure` columns to be added to CSV-W metadata + N.B. Requires `virt_unit` and `virt_measure` columns to be added to CSV-W metadata """ - def __init__(self, - measure: QbMeasure, - unit: Optional[QbUnit] = None, - data_type: Optional[str] = None): + + def __init__( + self, + measure: QbMeasure, + unit: Optional[QbUnit] = None, + data_type: Optional[str] = None, + ): QbObservationValue.__init__(self, data_type, unit) self.measure: QbMeasure = measure diff --git a/csvqb/csvqb/models/cube/csvqb/components/unit.py b/csvqb/csvqb/models/cube/csvqb/components/unit.py index a2fc2f711..a40b98af7 100644 --- a/csvqb/csvqb/models/cube/csvqb/components/unit.py +++ b/csvqb/csvqb/models/cube/csvqb/components/unit.py @@ -6,7 +6,10 @@ from csvqb.utils.uri import uri_safe from csvqb.models.validationerror import ValidationError from .attribute import ExistingQbAttribute -from .datastructuredefinition import QbDataStructureDefinition, MultiQbDataStructureDefinition +from .datastructuredefinition import ( + QbDataStructureDefinition, + MultiQbDataStructureDefinition, +) from csvqb.inputs import pandas_input_to_columnar_str, PandasDataTypes @@ -16,34 +19,40 @@ def __init__(self, unit_multiplier: Optional[int]): class ExistingQbUnit(QbUnit): - def __init__(self, unit_uri: str, unit_multiplier: Optional[int] = None): QbUnit.__init__(self, unit_multiplier) self.unit_uri: str = unit_uri def __str__(self) -> str: - unit_multiplier_str = "" if self.unit_multiplier is None else f", 10^{self.unit_multiplier}" + unit_multiplier_str = ( + "" if self.unit_multiplier is None else f", 10^{self.unit_multiplier}" + ) return f"ExistingQbUnit('{self.unit_uri}'{unit_multiplier_str})" def validate(self) -> List[ValidationError]: return super(ExistingQbAttribute).validate() # todo: Add more validation here. def validate_data(self, data: pd.Series) -> List[ValidationError]: - return super(ExistingQbAttribute).validate_data(data) # todo: Add more validation here. + return super(ExistingQbAttribute).validate_data( + data + ) # todo: Add more validation here. class NewQbUnit(QbUnit): - - def __init__(self, - label: str, - uri_safe_identifier: Optional[str] = None, - unit_multiplier: Optional[int] = None, - description: Optional[str] = None, - parent_unit_uri: Optional[str] = None, - source_uri: Optional[str] = None): + def __init__( + self, + label: str, + uri_safe_identifier: Optional[str] = None, + unit_multiplier: Optional[int] = None, + description: Optional[str] = None, + parent_unit_uri: Optional[str] = None, + source_uri: Optional[str] = None, + ): QbUnit.__init__(self, unit_multiplier) self.label: str = label - self.uri_safe_identifier: str = uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) + self.uri_safe_identifier: str = ( + uri_safe_identifier if uri_safe_identifier is not None else uri_safe(label) + ) self.description: Optional[str] = description self.parent_unit_uri: Optional[str] = parent_unit_uri self.source_uri: Optional[str] = source_uri @@ -55,12 +64,14 @@ def validate(self) -> List[ValidationError]: return super(ExistingQbAttribute).validate() # todo: Add more validation here. def validate_data(self, data: pd.Series) -> List[ValidationError]: - return super(ExistingQbAttribute).validate_data(data) # todo: Add more validation here. + return super(ExistingQbAttribute).validate_data( + data + ) # todo: Add more validation here. class QbMultiUnits(MultiQbDataStructureDefinition): """ - Represents multiple units used/defined in a cube, typically used in multi-measure cubes. + Represents multiple units used/defined in a cube, typically used in multi-measure cubes. """ def __init__(self, units: List[QbUnit]): @@ -77,7 +88,9 @@ def new_units_from_data(data: PandasDataTypes) -> "QbMultiUnits": :param data: The data column defining the full list of available units. :return: QbMultiUnits """ - return QbMultiUnits([NewQbUnit(u) for u in set(pandas_input_to_columnar_str(data))]) + return QbMultiUnits( + [NewQbUnit(u) for u in set(pandas_input_to_columnar_str(data))] + ) def validate(self) -> List[ValidationError]: return [] # TODO: implement this @@ -91,4 +104,6 @@ def get_qb_components(self) -> List[QbDataStructureDefinition]: return components -QbUnitAttribute = ExistingQbAttribute("http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure") +QbUnitAttribute = ExistingQbAttribute( + "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure" +) diff --git a/csvqb/csvqb/models/cube/cube.py b/csvqb/csvqb/models/cube/cube.py index 8f1182bdf..3fcfa242e 100644 --- a/csvqb/csvqb/models/cube/cube.py +++ b/csvqb/csvqb/models/cube/cube.py @@ -10,10 +10,12 @@ class Cube(Generic[TMetadata]): - def __init__(self, - metadata: TMetadata, - data: Optional[pd.DataFrame] = None, - columns: List[CsvColumn] = []): + def __init__( + self, + metadata: TMetadata, + data: Optional[pd.DataFrame] = None, + columns: List[CsvColumn] = [], + ): self.metadata: TMetadata = metadata self.data: Optional[pd.DataFrame] = data self.columns: List[CsvColumn] = columns @@ -28,14 +30,20 @@ def _validate_columns(self) -> List[ValidationError]: existing_col_titles: Set[str] = set() for col in self.columns: if col.csv_column_title in existing_col_titles: - errors.append(ValidationError(f"Duplicate column title '{col.csv_column_title}'")) + errors.append( + ValidationError(f"Duplicate column title '{col.csv_column_title}'") + ) maybe_column_data = None if self.data is not None: if col.csv_column_title in self.data.columns: maybe_column_data = self.data[col.csv_column_title] else: - errors.append(ValidationError(f"Column '{col.csv_column_title}' not found in data provided.")) + errors.append( + ValidationError( + f"Column '{col.csv_column_title}' not found in data provided." + ) + ) errors += col.validate(pandas_input_to_columnar(maybe_column_data)) @@ -43,6 +51,10 @@ def _validate_columns(self) -> List[ValidationError]: defined_column_titles = [c.csv_column_title for c in self.columns] for column in list(self.data.columns): if column not in defined_column_titles: - errors.append(ValidationError(f"Column '{column}' does not have a mapping defined.")) + errors.append( + ValidationError( + f"Column '{column}' does not have a mapping defined." + ) + ) return errors diff --git a/csvqb/csvqb/models/rdf/conceptschemeincatalog.py b/csvqb/csvqb/models/rdf/conceptschemeincatalog.py index a117a2692..15c0992fb 100644 --- a/csvqb/csvqb/models/rdf/conceptschemeincatalog.py +++ b/csvqb/csvqb/models/rdf/conceptschemeincatalog.py @@ -3,8 +3,8 @@ class ConceptSchemeInCatalog(skos.ConceptScheme, dcat.Dataset): """ - Represents both a skos:ConceptScheme and a dcat:Dataset in one node. Means that we don't have to link - between the two. + Represents both a skos:ConceptScheme and a dcat:Dataset in one node. Means that we don't have to link + between the two. """ def __init__(self, uri: str): diff --git a/csvqb/csvqb/tests/behaviour/steps/skoscodelists.py b/csvqb/csvqb/tests/behaviour/steps/skoscodelists.py index 6a6f37f3a..a3feeeccf 100644 --- a/csvqb/csvqb/tests/behaviour/steps/skoscodelists.py +++ b/csvqb/csvqb/tests/behaviour/steps/skoscodelists.py @@ -6,7 +6,7 @@ from devtools.behave.file import get_context_temp_dir_path -@Given("a NewQbCodeList named \"{code_list_name}\"") +@Given('a NewQbCodeList named "{code_list_name}"') def step_impl(context, code_list_name: str): metadata = CatalogMetadata( code_list_name, @@ -18,13 +18,20 @@ def step_impl(context, code_list_name: str): keywords=["Key word one", "Key word two"], landing_page_uri="http://example.org/landing-page", license_uri="http://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/", - public_contact_point_uri="something@example.org" + public_contact_point_uri="something@example.org", ) - context.code_list = NewQbCodeList(metadata, [ - NewQbConcept("First Concept", code="1st-concept", description="This is the first concept."), - NewQbConcept("Second Concept", parent_code="1st-concept", sort_order=20) - ]) + context.code_list = NewQbCodeList( + metadata, + [ + NewQbConcept( + "First Concept", + code="1st-concept", + description="This is the first concept.", + ), + NewQbConcept("Second Concept", parent_code="1st-concept", sort_order=20), + ], + ) @When("the code list is serialised to CSV-W") diff --git a/csvqb/csvqb/tests/unit/configloaders/infojsontests.py b/csvqb/csvqb/tests/unit/configloaders/infojsontests.py index cccc16798..f434e0a97 100644 --- a/csvqb/csvqb/tests/unit/configloaders/infojsontests.py +++ b/csvqb/csvqb/tests/unit/configloaders/infojsontests.py @@ -12,24 +12,30 @@ class InfoJsonLoaderTests(UnitTestBase): def test_csv_cols_assumed_dimensions(self): """ - If a column isn't defined, assume it is a new local dimension. + If a column isn't defined, assume it is a new local dimension. - Assume that if a column isn't defined in the info.json `transform.columns` section, then it is a - new locally defined dimension. + Assume that if a column isn't defined in the info.json `transform.columns` section, then it is a + new locally defined dimension. - Assert that the newly defined dimension has a codelist created from the values in the CSV. + Assert that the newly defined dimension has a codelist created from the values in the CSV. """ data = pd.read_csv(self.get_test_cases_dir() / "configloaders" / "data.csv") - cube = get_cube_from_info_json(self.get_test_cases_dir() / "configloaders" / "info.json", data) + cube = get_cube_from_info_json( + self.get_test_cases_dir() / "configloaders" / "info.json", data + ) - matching_columns = [c for c in cube.columns if c.csv_column_title == "Undefined Column"] + matching_columns = [ + c for c in cube.columns if c.csv_column_title == "Undefined Column" + ] self.assertEqual(1, len(matching_columns)) undefined_column_assumed_definition: CsvColumn = matching_columns[0] if not isinstance(undefined_column_assumed_definition, QbColumn): raise Exception("Incorrect type") - self.assertIsInstance(undefined_column_assumed_definition.component, NewQbDimension) + self.assertIsInstance( + undefined_column_assumed_definition.component, NewQbDimension + ) new_dimension: NewQbDimension = undefined_column_assumed_definition.component self.assertIsNotNone(new_dimension.code_list) @@ -56,16 +62,27 @@ def test_multiple_measures_and_units_loaded_in_uri_template(self): to be found at . """ - data = pd.read_csv(self.get_test_cases_dir() / "configloaders" / "bottles-test-files" / "bottles-data.csv") + data = pd.read_csv( + self.get_test_cases_dir() + / "configloaders" + / "bottles-test-files" + / "bottles-data.csv" + ) cube = get_cube_from_info_json( - self.get_test_cases_dir() / "configloaders" / "bottles-test-files" / "bottles-info.json", - data) + self.get_test_cases_dir() + / "configloaders" + / "bottles-test-files" + / "bottles-info.json", + data, + ) """Measure URI""" - expected_measure_uris = ['http://gss-data.org.uk/def/x/one-litre-and-less', - 'http://gss-data.org.uk/def/x/more-than-one-litre', - 'http://gss-data.org.uk/def/x/number-of-bottles'] + expected_measure_uris = [ + "http://gss-data.org.uk/def/x/one-litre-and-less", + "http://gss-data.org.uk/def/x/more-than-one-litre", + "http://gss-data.org.uk/def/x/number-of-bottles", + ] measure_column = cube.columns[1] self.assertIsInstance(measure_column, QbColumn) @@ -83,8 +100,10 @@ def test_multiple_measures_and_units_loaded_in_uri_template(self): self.assertIsInstance(unit_column, QbColumn) self.assertIsInstance(unit_column.component, QbMultiUnits) - expected_unit_uris = ['http://gss-data.org.uk/def/concept/measurement-units/count', - 'http://gss-data.org.uk/def/concept/measurement-units/percentage'] + expected_unit_uris = [ + "http://gss-data.org.uk/def/concept/measurement-units/count", + "http://gss-data.org.uk/def/concept/measurement-units/percentage", + ] actual_unit_uris = [x.unit_uri for x in unit_column.component.units] self.assertCountEqual(expected_unit_uris, actual_unit_uris) @@ -96,25 +115,38 @@ def test_multiple_measures_and_units_loaded_in_uri_template(self): def test_cube_metadata_extracted_from_info_json(self): - """Metadata - ['base_uri', 'creator', 'description', 'from_dict', 'issued', 'keywords', 'landing_page', - 'license', 'public_contact_point', 'publisher', 'summary', 'themes', 'title', + """Metadata - ['base_uri', 'creator', 'description', 'from_dict', 'issued', 'keywords', 'landing_page', + 'license', 'public_contact_point', 'publisher', 'summary', 'themes', 'title', 'uri_safe_identifier', 'validate']""" - data = pd.read_csv(self.get_test_cases_dir() / "configloaders" / "bottles-test-files" / "bottles-data.csv") + data = pd.read_csv( + self.get_test_cases_dir() + / "configloaders" + / "bottles-test-files" + / "bottles-data.csv" + ) cube = get_cube_from_info_json( - self.get_test_cases_dir() / "configloaders" / "bottles-test-files" / "bottles-info.json", - data) + self.get_test_cases_dir() + / "configloaders" + / "bottles-test-files" + / "bottles-info.json", + data, + ) # Creator - pass - expected_creator = "https://www.gov.uk/government/organisations/hm-revenue-customs" + expected_creator = ( + "https://www.gov.uk/government/organisations/hm-revenue-customs" + ) actual_creator = cube.metadata.creator_uri self.assertEqual(expected_creator, actual_creator) # Description - pass - expected_description = "All bulletins provide details on percentage of one litre or less & more than " \ - "one litre bottles. This information is provided on a yearly basis." + expected_description = ( + "All bulletins provide details on percentage of one litre or less & more than " + "one litre bottles. This information is provided on a yearly basis." + ) actual_description = cube.metadata.description self.assertEqual(expected_description, actual_description) @@ -132,7 +164,9 @@ def test_cube_metadata_extracted_from_info_json(self): # landingpage - pass - expected_landingpage = "https://www.gov.uk/government/statistics/bottles-bulletin" + expected_landingpage = ( + "https://www.gov.uk/government/statistics/bottles-bulletin" + ) actual_landingpage = cube.metadata.landing_page_uri self.assertEqual(expected_landingpage, actual_landingpage) @@ -151,7 +185,9 @@ def test_cube_metadata_extracted_from_info_json(self): # publisher - pass - expected_publisher = "https://www.gov.uk/government/organisations/hm-revenue-customs" + expected_publisher = ( + "https://www.gov.uk/government/organisations/hm-revenue-customs" + ) actual_publisher = cube.metadata.publisher_uri self.assertEqual(expected_publisher, actual_publisher) @@ -186,5 +222,6 @@ def test_cube_metadata_extracted_from_info_json(self): self.assert_no_validation_errors(errors) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/csvqb/csvqb/tests/unit/cube/errorvalidation.py b/csvqb/csvqb/tests/unit/cube/errorvalidation.py index 2b7377002..80eda5a68 100644 --- a/csvqb/csvqb/tests/unit/cube/errorvalidation.py +++ b/csvqb/csvqb/tests/unit/cube/errorvalidation.py @@ -7,15 +7,12 @@ class InternalApiLoaderTests(UnitTestBase): - def test_column_not_configured_error(self): """ - If the CSV data contains a column which is not defined, we get an error. + If the CSV data contains a column which is not defined, we get an error. """ - data = pd.DataFrame({ - "Some Dimension": ["A", "B", "C"] - }) + data = pd.DataFrame({"Some Dimension": ["A", "B", "C"]}) metadata = CatalogMetadata("Some Dataset") columns = [] @@ -28,15 +25,13 @@ def test_column_not_configured_error(self): def test_column_title_wrong_error(self): """ - If the Cube object contains a column title which is not defined in the CSV data, we get an error. + If the Cube object contains a column title which is not defined in the CSV data, we get an error. """ data = pd.DataFrame() metadata = CatalogMetadata("Some Dataset") - columns: List[CsvColumn] = [ - SuppressedCsvColumn("Some Column Title") - ] + columns: List[CsvColumn] = [SuppressedCsvColumn("Some Column Title")] cube = Cube(metadata, data, columns) validation_errors = cube.validate() @@ -45,5 +40,5 @@ def test_column_title_wrong_error(self): self.assertTrue("Some Column Title" in error.message) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/csvqb/csvqb/tests/unit/cube/qb/errorvalidation.py b/csvqb/csvqb/tests/unit/cube/qb/errorvalidation.py index 8b2367039..f99bad040 100644 --- a/csvqb/csvqb/tests/unit/cube/qb/errorvalidation.py +++ b/csvqb/csvqb/tests/unit/cube/qb/errorvalidation.py @@ -10,24 +10,38 @@ class InternalApiLoaderTests(UnitTestBase): def test_single_measure_qb_definition(self): """ - Single-measure Qbs can be defined. + Single-measure Qbs can be defined. """ - data = pd.DataFrame({ - "Existing Dimension": ["A", "B", "C"], - "Local Dimension": ["D", "E", "F"], - "Value": [2, 2, 2] - }) + data = pd.DataFrame( + { + "Existing Dimension": ["A", "B", "C"], + "Local Dimension": ["D", "E", "F"], + "Value": [2, 2, 2], + } + ) metadata = CatalogMetadata("Some Dataset") columns = [ - QbColumn("Existing Dimension", ExistingQbDimension("https://example.org/dimensions/existing_dimension"), - output_uri_template="https://example.org/concept-scheme/existing_scheme/{+existing_dimension}"), - QbColumn("Local Dimension", NewQbDimension.from_data("Dimension of letters", data["Local Dimension"])), - QbColumn("Value", - QbSingleMeasureObservationValue( - ExistingQbMeasure("http://example.com/measures/existing_measure"), - NewQbUnit("some new unit") - )) + QbColumn( + "Existing Dimension", + ExistingQbDimension( + "https://example.org/dimensions/existing_dimension" + ), + output_uri_template="https://example.org/concept-scheme/existing_scheme/{+existing_dimension}", + ), + QbColumn( + "Local Dimension", + NewQbDimension.from_data( + "Dimension of letters", data["Local Dimension"] + ), + ), + QbColumn( + "Value", + QbSingleMeasureObservationValue( + ExistingQbMeasure("http://example.com/measures/existing_measure"), + NewQbUnit("some new unit"), + ), + ), ] cube = Cube(metadata, data, columns) @@ -38,22 +52,32 @@ def test_single_measure_qb_definition(self): def test_multi_measure_qb_definition(self): """ - Multi-measure Qbs can be defined. + Multi-measure Qbs can be defined. """ - data = pd.DataFrame({ - "Existing Dimension": ["A", "B", "C"], - "Value": [2, 2, 2], - "Measure": ["People", "Children", "Adults"], - "Units": ["Percent", "People", "People"] - }) + data = pd.DataFrame( + { + "Existing Dimension": ["A", "B", "C"], + "Value": [2, 2, 2], + "Measure": ["People", "Children", "Adults"], + "Units": ["Percent", "People", "People"], + } + ) metadata = CatalogMetadata("Some Dataset") columns = [ - QbColumn("Existing Dimension", ExistingQbDimension("https://example.org/dimensions/existing_dimension"), - output_uri_template="https://example.org/concept-scheme/existing_scheme/{+existing_dimension}"), + QbColumn( + "Existing Dimension", + ExistingQbDimension( + "https://example.org/dimensions/existing_dimension" + ), + output_uri_template="https://example.org/concept-scheme/existing_scheme/{+existing_dimension}", + ), QbColumn("Value", QbMultiMeasureObservationValue("number")), - QbColumn("Measure", QbMultiMeasureDimension.new_measures_from_data(data["Measure"])), - QbColumn("Units", QbMultiUnits.new_units_from_data(data["Units"])) + QbColumn( + "Measure", + QbMultiMeasureDimension.new_measures_from_data(data["Measure"]), + ), + QbColumn("Units", QbMultiUnits.new_units_from_data(data["Units"])), ] cube = Cube(metadata, data, columns) @@ -68,16 +92,24 @@ def test_existing_dimension_output_uri_template(self): """ - data = pd.DataFrame({ - "Existing Dimension": ["A", "B", "C"], - "Value": [1, 2, 3] - }) - cube = Cube(CatalogMetadata("Cube's name"), data, [ - QbColumn("Existing Dimension", ExistingQbDimension("http://example.org/dimensions/location")), - QbColumn("Value", - QbSingleMeasureObservationValue(ExistingQbUnit("http://some/unit"), - ExistingQbMeasure("http://some/measure"))) - ]) + data = pd.DataFrame({"Existing Dimension": ["A", "B", "C"], "Value": [1, 2, 3]}) + cube = Cube( + CatalogMetadata("Cube's name"), + data, + [ + QbColumn( + "Existing Dimension", + ExistingQbDimension("http://example.org/dimensions/location"), + ), + QbColumn( + "Value", + QbSingleMeasureObservationValue( + ExistingQbUnit("http://some/unit"), + ExistingQbMeasure("http://some/measure"), + ), + ), + ], + ) errors = cube.validate() errors += validate_qb_component_constraints(cube) @@ -86,8 +118,9 @@ def test_existing_dimension_output_uri_template(self): validation_errors = errors[0] self.assertTrue( "'Existing Dimension' - an ExistingQbDimension must have an output_uri_template defined." - in validation_errors.message) + in validation_errors.message + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/csvqb/csvqb/tests/unit/writers/qbwritertests.py b/csvqb/csvqb/tests/unit/writers/qbwritertests.py index 001d46848..cd38c27b7 100644 --- a/csvqb/csvqb/tests/unit/writers/qbwritertests.py +++ b/csvqb/csvqb/tests/unit/writers/qbwritertests.py @@ -11,25 +11,36 @@ def _get_standard_cube_for_columns(columns: List[CsvColumn]) -> Cube: - data = pd.DataFrame({ - "Country": ["Wales", "Scotland", "England", "Northern Ireland"], - "Observed Value": [101.5, 56.2, 12.4, 77.8], - "Marker": ["Provisional", "Provisional", "Provisional", "Provisional"] - }) + data = pd.DataFrame( + { + "Country": ["Wales", "Scotland", "England", "Northern Ireland"], + "Observed Value": [101.5, 56.2, 12.4, 77.8], + "Marker": ["Provisional", "Provisional", "Provisional", "Provisional"], + } + ) metadata: CatalogMetadata = CatalogMetadata("Cube Name") return Cube(deepcopy(metadata), data.copy(deep=True), columns) -def _assert_component_defined(dataset: qb.DataSet, name: str) -> qb.ComponentSpecification: - component = first(dataset.structure.components, lambda x: str(x.uri) == f"./cube-name.csv#component/{name}") - assert(component is not None) +def _assert_component_defined( + dataset: qb.DataSet, name: str +) -> qb.ComponentSpecification: + component = first( + dataset.structure.components, + lambda x: str(x.uri) == f"./cube-name.csv#component/{name}", + ) + assert component is not None return component -def _assert_component_property_defined(component: qb.ComponentSpecification, property_uri: str) -> None: - property = first(component.componentProperties, lambda x: str(x.uri) == property_uri) - assert(property is not None) +def _assert_component_property_defined( + component: qb.ComponentSpecification, property_uri: str +) -> None: + property = first( + component.componentProperties, lambda x: str(x.uri) == property_uri + ) + assert property is not None return property @@ -39,13 +50,27 @@ def _assert_component_property_defined(component: qb.ComponentSpecification, pro class QbWriterTests(UnitTestBase): def test_structure_defined(self): - cube = _get_standard_cube_for_columns([ - QbColumn("Country", ExistingQbDimension("http://example.org/dimensions/country")), - QbColumn("Marker", ExistingQbAttribute("http://example.org/attributes/marker")), - QbColumn("Observed Value", QbSingleMeasureObservationValue( - ExistingQbMeasure("http://example.org/units/some-existing-measure"), - ExistingQbUnit("http://example.org/units/some-existing-unit"))) - ]) + cube = _get_standard_cube_for_columns( + [ + QbColumn( + "Country", + ExistingQbDimension("http://example.org/dimensions/country"), + ), + QbColumn( + "Marker", + ExistingQbAttribute("http://example.org/attributes/marker"), + ), + QbColumn( + "Observed Value", + QbSingleMeasureObservationValue( + ExistingQbMeasure( + "http://example.org/units/some-existing-measure" + ), + ExistingQbUnit("http://example.org/units/some-existing-unit"), + ), + ), + ] + ) qbwriter = QbWriter(cube) dataset = qbwriter._generate_qb_dataset_dsd_definitions() @@ -64,207 +89,347 @@ def test_structure_defined(self): def test_generating_concept_uri_template_from_global_concept_scheme_uri(self): """ - Given a globally defined skos:ConceptScheme's URI, generate the URI template for a column which maps the - column's value to a concept defined inside the concept scheme. - """ - code_list = ExistingQbCodeList("http://base-uri/concept-scheme/this-concept-scheme-name") - column = QbColumn("Some Column", NewQbDimension("Some Dimension", code_list=code_list)) - - actual_concept_template_uri = empty_qbwriter._get_default_value_uri_for_code_list_concepts(column, code_list) - self.assertEqual("http://base-uri/concept-scheme/this-concept-scheme-name/{+some_column}", - actual_concept_template_uri) + Given a globally defined skos:ConceptScheme's URI, generate the URI template for a column which maps the + column's value to a concept defined inside the concept scheme. + """ + code_list = ExistingQbCodeList( + "http://base-uri/concept-scheme/this-concept-scheme-name" + ) + column = QbColumn( + "Some Column", NewQbDimension("Some Dimension", code_list=code_list) + ) + + actual_concept_template_uri = ( + empty_qbwriter._get_default_value_uri_for_code_list_concepts( + column, code_list + ) + ) + self.assertEqual( + "http://base-uri/concept-scheme/this-concept-scheme-name/{+some_column}", + actual_concept_template_uri, + ) def test_generating_concept_uri_template_from_local_concept_scheme_uri(self): """ - Given a dataset-local skos:ConceptScheme's URI, generate the URI template for a column which maps the - column's value to a concept defined inside the concept scheme. + Given a dataset-local skos:ConceptScheme's URI, generate the URI template for a column which maps the + column's value to a concept defined inside the concept scheme. """ column = SuppressedCsvColumn("Some Column") - code_list = ExistingQbCodeList("http://base-uri/dataset-name#scheme/that-concept-scheme-name") - - actual_concept_template_uri = empty_qbwriter._get_default_value_uri_for_code_list_concepts(column, code_list) - self.assertEqual("http://base-uri/dataset-name#concept/that-concept-scheme-name/{+some_column}", - actual_concept_template_uri) + code_list = ExistingQbCodeList( + "http://base-uri/dataset-name#scheme/that-concept-scheme-name" + ) + + actual_concept_template_uri = ( + empty_qbwriter._get_default_value_uri_for_code_list_concepts( + column, code_list + ) + ) + self.assertEqual( + "http://base-uri/dataset-name#concept/that-concept-scheme-name/{+some_column}", + actual_concept_template_uri, + ) def test_generating_concept_uri_template_from_unexpected_concept_scheme_uri(self): """ - Given a skos:ConceptScheme's URI *that does not follow the global or dataset-local conventions* used in our - tooling, return the column's value as our best guess at the concept's URI. + Given a skos:ConceptScheme's URI *that does not follow the global or dataset-local conventions* used in our + tooling, return the column's value as our best guess at the concept's URI. """ column = SuppressedCsvColumn("Some Column") - code_list = ExistingQbCodeList("http://base-uri/dataset-name#codes/that-concept-scheme-name") - - actual_concept_template_uri = empty_qbwriter._get_default_value_uri_for_code_list_concepts(column, code_list) + code_list = ExistingQbCodeList( + "http://base-uri/dataset-name#codes/that-concept-scheme-name" + ) + + actual_concept_template_uri = ( + empty_qbwriter._get_default_value_uri_for_code_list_concepts( + column, code_list + ) + ) self.assertEqual("{+some_column}", actual_concept_template_uri) def test_default_property_value_uris_existing_dimension_column(self): """ - When an existing dimension is used, we can provide the `propertyUrl`, but we cannot guess the `valueUrl`. - """ - column = QbColumn("Some Column", ExistingQbDimension("http://base-uri/dimensions/existing-dimension")) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) - self.assertEqual("http://base-uri/dimensions/existing-dimension", default_property_uri) + When an existing dimension is used, we can provide the `propertyUrl`, but we cannot guess the `valueUrl`. + """ + column = QbColumn( + "Some Column", + ExistingQbDimension("http://base-uri/dimensions/existing-dimension"), + ) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) + self.assertEqual( + "http://base-uri/dimensions/existing-dimension", default_property_uri + ) self.assertEqual("{+some_column}", default_value_uri) def test_default_property_value_uris_new_dimension_column_without_code_list(self): """ - When a new dimension is defined without a code list, we can provide the `propertyUrl`, - but we cannot guess the `valueUrl`. + When a new dimension is defined without a code list, we can provide the `propertyUrl`, + but we cannot guess the `valueUrl`. """ column = QbColumn("Some Column", NewQbDimension("Some New Dimension")) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) - self.assertEqual("./cube-name.csv#dimension/some-new-dimension", default_property_uri) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) + self.assertEqual( + "./cube-name.csv#dimension/some-new-dimension", default_property_uri + ) self.assertEqual("{+some_column}", default_value_uri) def test_default_property_value_uris_new_dimension_column_with_code_list(self): """ - When an new dimension is defined with a code list, we can provide the `propertyUrl` and the `valueUrl`. - """ - column = QbColumn("Some Column", - NewQbDimension("Some New Dimension", - code_list=ExistingQbCodeList("http://base-uri/concept-scheme/this-scheme"))) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) - self.assertEqual("./cube-name.csv#dimension/some-new-dimension", default_property_uri) - self.assertEqual("http://base-uri/concept-scheme/this-scheme/{+some_column}", default_value_uri) + When an new dimension is defined with a code list, we can provide the `propertyUrl` and the `valueUrl`. + """ + column = QbColumn( + "Some Column", + NewQbDimension( + "Some New Dimension", + code_list=ExistingQbCodeList( + "http://base-uri/concept-scheme/this-scheme" + ), + ), + ) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) + self.assertEqual( + "./cube-name.csv#dimension/some-new-dimension", default_property_uri + ) + self.assertEqual( + "http://base-uri/concept-scheme/this-scheme/{+some_column}", + default_value_uri, + ) def test_default_property_value_uris_existing_attribute_column(self): """ - When an existing attribute is used, we can provide the `propertyUrl`, but we cannot guess the `valueUrl`. - """ - column = QbColumn("Some Column", ExistingQbAttribute("http://base-uri/attributes/existing-attribute")) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) - self.assertEqual("http://base-uri/attributes/existing-attribute", default_property_uri) + When an existing attribute is used, we can provide the `propertyUrl`, but we cannot guess the `valueUrl`. + """ + column = QbColumn( + "Some Column", + ExistingQbAttribute("http://base-uri/attributes/existing-attribute"), + ) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) + self.assertEqual( + "http://base-uri/attributes/existing-attribute", default_property_uri + ) self.assertEqual("{+some_column}", default_value_uri) def test_default_property_value_uris_existing_attribute_column(self): """ - When a new attribute is defined, we can provide the `propertyUrl`, but we cannot guess the `valueUrl`. + When a new attribute is defined, we can provide the `propertyUrl`, but we cannot guess the `valueUrl`. """ column = QbColumn("Some Column", NewQbAttribute("This New Attribute")) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) - self.assertEqual("./cube-name.csv#attribute/this-new-attribute", default_property_uri) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) + self.assertEqual( + "./cube-name.csv#attribute/this-new-attribute", default_property_uri + ) self.assertEqual("{+some_column}", default_value_uri) def test_default_property_value_uris_multi_units_all_new(self): """ - When a QbMultiUnits component is defined using only new/locally defined units, we can provide the - `propertyUrl` and the `valueUrl`. + When a QbMultiUnits component is defined using only new/locally defined units, we can provide the + `propertyUrl` and the `valueUrl`. """ column = QbColumn("Some Column", QbMultiUnits([NewQbUnit("Some New Unit")])) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) - self.assertEqual("http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", default_property_uri) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) + self.assertEqual( + "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", + default_property_uri, + ) self.assertEqual("./cube-name.csv#unit/{+some_column}", default_value_uri) def test_default_property_value_uris_multi_units_all_existing(self): """ - When a QbMultiUnits component is defined using just existing units, we can provide the `propertyUrl` and - `valueUrl`. - """ - column = QbColumn("Some Column", QbMultiUnits([ExistingQbUnit("http://base-uri/units/existing-unit")])) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) - self.assertEqual("http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", default_property_uri) + When a QbMultiUnits component is defined using just existing units, we can provide the `propertyUrl` and + `valueUrl`. + """ + column = QbColumn( + "Some Column", + QbMultiUnits([ExistingQbUnit("http://base-uri/units/existing-unit")]), + ) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) + self.assertEqual( + "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", + default_property_uri, + ) self.assertEqual("{+some_column}", default_value_uri) def test_default_property_value_uris_multi_units_local_and_existing(self): """ - When a QbMultiUnits component is defined using a mixture of existing units and new units, we can't provide - an appropriate and consistent `valueUrl`. - - An exception is raised when this is attempted. - """ - column = QbColumn("Some Column", QbMultiUnits([NewQbUnit("Some New Unit"), - ExistingQbUnit("http://base-uri/units/existing-unit")])) - self.assertRaises(Exception, lambda: empty_qbwriter._get_default_property_value_uris_for_column(column)) + When a QbMultiUnits component is defined using a mixture of existing units and new units, we can't provide + an appropriate and consistent `valueUrl`. + + An exception is raised when this is attempted. + """ + column = QbColumn( + "Some Column", + QbMultiUnits( + [ + NewQbUnit("Some New Unit"), + ExistingQbUnit("http://base-uri/units/existing-unit"), + ] + ), + ) + self.assertRaises( + Exception, + lambda: empty_qbwriter._get_default_property_value_uris_for_column(column), + ) def test_default_property_value_uris_multi_measure_all_new(self): """ - When a QbMultiMeasureDimension component is defined using only new/locally defined measures, - we can provide the `propertyUrl` and the `valueUrl`. - """ - column = QbColumn("Some Column", QbMultiMeasureDimension([NewQbMeasure("Some New Measure")])) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) - self.assertEqual("http://purl.org/linked-data/cube#measureType", default_property_uri) + When a QbMultiMeasureDimension component is defined using only new/locally defined measures, + we can provide the `propertyUrl` and the `valueUrl`. + """ + column = QbColumn( + "Some Column", QbMultiMeasureDimension([NewQbMeasure("Some New Measure")]) + ) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) + self.assertEqual( + "http://purl.org/linked-data/cube#measureType", default_property_uri + ) self.assertEqual("./cube-name.csv#measure/{+some_column}", default_value_uri) def test_default_property_value_uris_multi_measure_all_existing(self): """ - When a QbMultiUnits component is defined using just existing units, we can provide the `propertyUrl` and - `valueUrl`. - """ - column = QbColumn("Some Column", - QbMultiMeasureDimension([ExistingQbMeasure("http://base-uri/measures/existing-measure")])) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) - self.assertEqual("http://purl.org/linked-data/cube#measureType", default_property_uri) + When a QbMultiUnits component is defined using just existing units, we can provide the `propertyUrl` and + `valueUrl`. + """ + column = QbColumn( + "Some Column", + QbMultiMeasureDimension( + [ExistingQbMeasure("http://base-uri/measures/existing-measure")] + ), + ) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) + self.assertEqual( + "http://purl.org/linked-data/cube#measureType", default_property_uri + ) self.assertEqual("{+some_column}", default_value_uri) def test_default_property_value_uris_multi_measure_local_and_existing(self): """ - When a QbMultiUnits component is defined using a mixture of existing units and new units, we can't provide - an appropriate and consistent `valueUrl`. - - An exception is raised when this is attempted. - """ - column = QbColumn("Some Column", - QbMultiMeasureDimension([NewQbMeasure("Some New Measure"), - ExistingQbMeasure("http://base-uri/measures/existing-measure")])) - self.assertRaises(Exception, lambda: empty_qbwriter._get_default_property_value_uris_for_column(column)) + When a QbMultiUnits component is defined using a mixture of existing units and new units, we can't provide + an appropriate and consistent `valueUrl`. + + An exception is raised when this is attempted. + """ + column = QbColumn( + "Some Column", + QbMultiMeasureDimension( + [ + NewQbMeasure("Some New Measure"), + ExistingQbMeasure("http://base-uri/measures/existing-measure"), + ] + ), + ) + self.assertRaises( + Exception, + lambda: empty_qbwriter._get_default_property_value_uris_for_column(column), + ) def test_default_property_value_uris_single_measure_obs_val(self): """ - There should be no `propertyUrl` or `valueUrl` for a `QbSingleMeasureObservationValue`. - """ - column = QbColumn("Some Column", QbSingleMeasureObservationValue(NewQbUnit("New Unit"), - NewQbMeasure("New Qb Measure"))) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) + There should be no `propertyUrl` or `valueUrl` for a `QbSingleMeasureObservationValue`. + """ + column = QbColumn( + "Some Column", + QbSingleMeasureObservationValue( + NewQbUnit("New Unit"), NewQbMeasure("New Qb Measure") + ), + ) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) self.assertIsNone(default_property_uri) self.assertIsNone(default_value_uri) def test_default_property_value_uris_multi_measure_obs_val(self): """ - There should be no `propertyUrl` or `valueUrl` for a `QbMultiMeasureObservationValue`. + There should be no `propertyUrl` or `valueUrl` for a `QbMultiMeasureObservationValue`. """ column = QbColumn("Some Column", QbMultiMeasureObservationValue()) - default_property_uri, default_value_uri = empty_qbwriter._get_default_property_value_uris_for_column(column) + ( + default_property_uri, + default_value_uri, + ) = empty_qbwriter._get_default_property_value_uris_for_column(column) self.assertIsNone(default_property_uri) self.assertIsNone(default_value_uri) def test_csv_col_definition_default_property_value_urls(self): """ - When configuring a CSV-W column definition, if the user has not specified an `output_uri_template` - against the `QbColumn` then the `propertyUrl` and `valueUrl`s should both be populated by the default - values inferred from the component. + When configuring a CSV-W column definition, if the user has not specified an `output_uri_template` + against the `QbColumn` then the `propertyUrl` and `valueUrl`s should both be populated by the default + values inferred from the component. """ column = QbColumn("Some Column", QbMultiUnits([NewQbUnit("Some Unit")])) csv_col = empty_qbwriter._generate_csvqb_column(column) - self.assertEqual("http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", csv_col["propertyUrl"]) + self.assertEqual( + "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", + csv_col["propertyUrl"], + ) self.assertEqual("./cube-name.csv#unit/{+some_column}", csv_col["valueUrl"]) def test_csv_col_definition_output_uri_template_override(self): """ - When configuring a CSV-W column definition, if the user has specified an `output_uri_template` against the - `QbColumn` then this should end up as the resulting CSV-W column's `valueUrl`. + When configuring a CSV-W column definition, if the user has specified an `output_uri_template` against the + `QbColumn` then this should end up as the resulting CSV-W column's `valueUrl`. """ - column = QbColumn("Some Column", ExistingQbDimension("http://base-uri/dimensions/some-dimension"), - output_uri_template="http://base-uri/some-alternative-output-uri/{+some_column}") + column = QbColumn( + "Some Column", + ExistingQbDimension("http://base-uri/dimensions/some-dimension"), + output_uri_template="http://base-uri/some-alternative-output-uri/{+some_column}", + ) csv_col = empty_qbwriter._generate_csvqb_column(column) - self.assertEqual("http://base-uri/dimensions/some-dimension", csv_col["propertyUrl"]) - self.assertEqual("http://base-uri/some-alternative-output-uri/{+some_column}", csv_col["valueUrl"]) + self.assertEqual( + "http://base-uri/dimensions/some-dimension", csv_col["propertyUrl"] + ) + self.assertEqual( + "http://base-uri/some-alternative-output-uri/{+some_column}", + csv_col["valueUrl"], + ) def test_csv_col_definition(self): """ - Test basic configuration of a CSV-W column definition. + Test basic configuration of a CSV-W column definition. """ - column = QbColumn("Some Column", ExistingQbDimension("http://base-uri/dimensions/some-dimension")) + column = QbColumn( + "Some Column", + ExistingQbDimension("http://base-uri/dimensions/some-dimension"), + ) csv_col = empty_qbwriter._generate_csvqb_column(column) self.assertFalse("suppressOutput" in csv_col) self.assertEqual("Some Column", csv_col["titles"]) self.assertEqual("some_column", csv_col["name"]) - self.assertEqual("http://base-uri/dimensions/some-dimension", csv_col["propertyUrl"]) + self.assertEqual( + "http://base-uri/dimensions/some-dimension", csv_col["propertyUrl"] + ) self.assertEqual("{+some_column}", csv_col["valueUrl"]) def test_csv_col_definition_suppressed(self): """ - Test basic configuration of a *suppressed* CSV-W column definition. + Test basic configuration of a *suppressed* CSV-W column definition. """ column = SuppressedCsvColumn("Some Column") csv_col = empty_qbwriter._generate_csvqb_column(column) @@ -276,28 +441,37 @@ def test_csv_col_definition_suppressed(self): def test_virtual_columns_generated_for_single_obs_val(self): """ - Ensure that the virtual columns generated for a `QbSingleMeasureObservationValue`'s unit and measure are - correct. + Ensure that the virtual columns generated for a `QbSingleMeasureObservationValue`'s unit and measure are + correct. """ - obs_val = QbSingleMeasureObservationValue(NewQbMeasure("Some Measure"), NewQbUnit("Some Unit")) + obs_val = QbSingleMeasureObservationValue( + NewQbMeasure("Some Measure"), NewQbUnit("Some Unit") + ) virtual_columns = empty_qbwriter._generate_virtual_columns_for_obs_val(obs_val) virt_unit = first(virtual_columns, lambda x: x["name"] == "virt_unit") self.assertIsNotNone(virt_unit) self.assertTrue(virt_unit["virtual"]) - self.assertEqual("http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", virt_unit["propertyUrl"]) + self.assertEqual( + "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", + virt_unit["propertyUrl"], + ) self.assertEqual("./cube-name.csv#unit/some-unit", virt_unit["valueUrl"]) virt_measure = first(virtual_columns, lambda x: x["name"] == "virt_measure") self.assertIsNotNone(virt_measure) self.assertTrue(virt_measure["virtual"]) - self.assertEqual("http://purl.org/linked-data/cube#measureType", virt_measure["propertyUrl"]) - self.assertEqual("./cube-name.csv#measure/some-measure", virt_measure["valueUrl"]) + self.assertEqual( + "http://purl.org/linked-data/cube#measureType", virt_measure["propertyUrl"] + ) + self.assertEqual( + "./cube-name.csv#measure/some-measure", virt_measure["valueUrl"] + ) def test_virtual_columns_generated_for_multi_meas_obs_val(self): """ - Ensure that the virtual column generated for a `QbMultiMeasureObservationValue`'s unit and measure are - correct. + Ensure that the virtual column generated for a `QbMultiMeasureObservationValue`'s unit and measure are + correct. """ obs_val = QbMultiMeasureObservationValue(unit=NewQbUnit("Some Unit")) virtual_columns = empty_qbwriter._generate_virtual_columns_for_obs_val(obs_val) @@ -305,9 +479,12 @@ def test_virtual_columns_generated_for_multi_meas_obs_val(self): virt_unit = first(virtual_columns, lambda x: x["name"] == "virt_unit") self.assertIsNotNone(virt_unit) self.assertTrue(virt_unit["virtual"]) - self.assertEqual("http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", virt_unit["propertyUrl"]) + self.assertEqual( + "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", + virt_unit["propertyUrl"], + ) self.assertEqual("./cube-name.csv#unit/some-unit", virt_unit["valueUrl"]) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/csvqb/csvqb/tests/unit/writers/skoscodelistwritertests.py b/csvqb/csvqb/tests/unit/writers/skoscodelistwritertests.py index b0c114f71..ee381b02e 100644 --- a/csvqb/csvqb/tests/unit/writers/skoscodelistwritertests.py +++ b/csvqb/csvqb/tests/unit/writers/skoscodelistwritertests.py @@ -6,23 +6,31 @@ from csvqb.tests.unit.unittestbase import UnitTestBase from csvqb.writers.skoscodelistwriter import SkosCodeListWriter -basic_code_list = NewQbCodeList(CatalogMetadata("Some CodeList"), [ - NewQbConcept("First Concept", code="1st-concept", description="This is the first concept."), - NewQbConcept("Second Concept", parent_code="1st-concept", sort_order=20) -]) +basic_code_list = NewQbCodeList( + CatalogMetadata("Some CodeList"), + [ + NewQbConcept( + "First Concept", + code="1st-concept", + description="This is the first concept.", + ), + NewQbConcept("Second Concept", parent_code="1st-concept", sort_order=20), + ], +) class CodeListWriterTests(UnitTestBase): - def test_code_list_data_mapping(self): """ - Test that a `pd.DataFrame` containing the codes is correctly generated from a `NewQbCodeList`. + Test that a `pd.DataFrame` containing the codes is correctly generated from a `NewQbCodeList`. """ writer = SkosCodeListWriter(basic_code_list) data = writer._get_code_list_data() actual_column_names = list(data.columns) - self.assertCountEqual(["Label", "Notation", "Parent Notation", "Sort Priority", "Description"], - actual_column_names) + self.assertCountEqual( + ["Label", "Notation", "Parent Notation", "Sort Priority", "Description"], + actual_column_names, + ) first_concept: Dict[str, Any] = data.iloc[[0]].to_dict("records")[0] self.assertEqual("First Concept", first_concept["Label"]) diff --git a/csvqb/csvqb/utils/dict.py b/csvqb/csvqb/utils/dict.py index 9bb301fae..459b3f2b5 100644 --- a/csvqb/csvqb/utils/dict.py +++ b/csvqb/csvqb/utils/dict.py @@ -12,13 +12,15 @@ def get_from_dict_ensure_exists(config: dict, key: str) -> Any: return val -def get_with_func_or_none(d: dict, prop_name: str, func: Callable[[Any], Any]) -> Optional[Any]: +def get_with_func_or_none( + d: dict, prop_name: str, func: Callable[[Any], Any] +) -> Optional[Any]: return func(d[prop_name]) if d.get(prop_name) is not None else None def rdf_resource_to_json_ld_dict(resource: NewResource) -> dict: """ - Converts a `NewResource` RDF model into a dictionary containing json-ld + Converts a `NewResource` RDF model into a dictionary containing json-ld """ g = rdflib.Graph() resource.to_graph(g) diff --git a/csvqb/csvqb/utils/uri.py b/csvqb/csvqb/utils/uri.py index 68a8066a9..b7ea7c857 100644 --- a/csvqb/csvqb/utils/uri.py +++ b/csvqb/csvqb/utils/uri.py @@ -8,13 +8,13 @@ def uri_safe(label: str) -> str: """ - Convert a label into something that can be used in a URI path segment. + Convert a label into something that can be used in a URI path segment. - The function formerly known as `pathify`. + The function formerly known as `pathify`. """ - return re.sub(r'-$', '', - re.sub(r'-+', '-', - re.sub(r'[^\w/]', '-', unidecode(label).lower()))) + return re.sub( + r"-$", "", re.sub(r"-+", "-", re.sub(r"[^\w/]", "-", unidecode(label).lower())) + ) def csvw_column_name_safe(label: str) -> str: @@ -26,4 +26,4 @@ def get_last_uri_part(uri: str) -> str: if maybe_match: return maybe_match.group(1) - raise Exception("Could not match last URI part") \ No newline at end of file + raise Exception("Could not match last URI part") diff --git a/csvqb/csvqb/writers/qbwriter.py b/csvqb/csvqb/writers/qbwriter.py index d141a00c6..df4aff831 100644 --- a/csvqb/csvqb/writers/qbwriter.py +++ b/csvqb/csvqb/writers/qbwriter.py @@ -5,7 +5,11 @@ from typing import Optional, Tuple, Dict, Any, List, Iterable, Callable import rdflib from sharedmodels.rdf import qb, skos -from sharedmodels.rdf.resource import Resource, ExistingResource, maybe_existing_resource +from sharedmodels.rdf.resource import ( + Resource, + ExistingResource, + maybe_existing_resource, +) from csvqb.models.cube import * @@ -21,7 +25,6 @@ class QbWriter(WriterBase): - def __init__(self, cube: QbCube): self.cube: QbCube = cube self.csv_file_name: str = f"{cube.metadata.uri_safe_identifier}.csv" @@ -30,9 +33,7 @@ def write(self, output_folder: Path): tables = [ { "url": self.csv_file_name, - "tableSchema": { - "columns": self._generate_csvw_columns_for_cube() - } + "tableSchema": {"columns": self._generate_csvw_columns_for_cube()}, } ] @@ -55,7 +56,7 @@ def write(self, output_folder: Path): "dc:title": self.cube.metadata.title, "rdfs:comment": self.cube.metadata.summary, "dc:description": self.cube.metadata.description, - "rdfs:seeAlso": dataset_definition + "rdfs:seeAlso": dataset_definition, } with open(output_folder / f"{self.csv_file_name}-metadata.json", "w+") as f: @@ -63,11 +64,11 @@ def write(self, output_folder: Path): def _doc_rel_uri(self, uri_fragment: str) -> str: """ - URIs declared in the `columns` section of the CSV-W are relative to the CSV's location. - URIs declared in the JSON-LD metadata section of the CSV-W are relative to the metadata file's location. + URIs declared in the `columns` section of the CSV-W are relative to the CSV's location. + URIs declared in the JSON-LD metadata section of the CSV-W are relative to the metadata file's location. - This function makes both point to the same base location - the CSV file's location. This ensures that we - can talk about the same resources in the `columns` section and the JSON-LD metadata section. + This function makes both point to the same base location - the CSV file's location. This ensures that we + can talk about the same resources in the `columns` section and the JSON-LD metadata section. """ return f"./{self.csv_file_name}#{uri_fragment}" @@ -87,28 +88,36 @@ def _generate_virtual_columns_for_cube(self) -> List[Dict[str, Any]]: for column in self.cube.columns: if isinstance(column, QbColumn): if isinstance(column.component, QbObservationValue): - virtual_columns += self._generate_virtual_columns_for_obs_val(column.component) + virtual_columns += self._generate_virtual_columns_for_obs_val( + column.component + ) return virtual_columns - def _generate_virtual_columns_for_obs_val(self, obs_val: QbObservationValue) -> List[Dict[str, Any]]: + def _generate_virtual_columns_for_obs_val( + self, obs_val: QbObservationValue + ) -> List[Dict[str, Any]]: virtual_columns: List[dict] = [] if obs_val.unit is not None: - virtual_columns.append({ - "name": VIRT_UNIT_COLUMN_NAME, - "virtual": True, - "propertyUrl": "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", - "valueUrl": self._get_unit_uri(obs_val.unit) - }) + virtual_columns.append( + { + "name": VIRT_UNIT_COLUMN_NAME, + "virtual": True, + "propertyUrl": "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", + "valueUrl": self._get_unit_uri(obs_val.unit), + } + ) # todo: We can't do the same thing with unti multipler unfortunately. Perhaps we should attach the unit # measure to the qb:DataSet as per the normalised standard and then de-normalise it when we upload to PMD? if isinstance(obs_val, QbSingleMeasureObservationValue): - virtual_columns.append({ - "name": "virt_measure", - "virtual": True, - "propertyUrl": "http://purl.org/linked-data/cube#measureType", - "valueUrl": self._get_measure_uri(obs_val.measure) - }) + virtual_columns.append( + { + "name": "virt_measure", + "virtual": True, + "propertyUrl": "http://purl.org/linked-data/cube#measureType", + "valueUrl": self._get_measure_uri(obs_val.measure), + } + ) return virtual_columns def _generate_qb_metadata_dict(self) -> dict: @@ -120,20 +129,29 @@ def _generate_qb_dataset_dsd_definitions(self): dataset.structure = qb.DataStructureDefinition(self._doc_rel_uri("structure")) for column in self.cube.columns: if isinstance(column, QbColumn): - component_specs_for_col = self._get_qb_component_specs_for_col(column.uri_safe_identifier, - column.component) - component_properties_for_col = [p for s in component_specs_for_col for p in s.componentProperties] - dataset.structure.componentProperties |= set(component_properties_for_col) + component_specs_for_col = self._get_qb_component_specs_for_col( + column.uri_safe_identifier, column.component + ) + component_properties_for_col = [ + p for s in component_specs_for_col for p in s.componentProperties + ] + dataset.structure.componentProperties |= set( + component_properties_for_col + ) dataset.structure.components |= set(component_specs_for_col) return dataset - def _get_qb_component_specs_for_col(self, - column_name_uri_safe: str, - component: QbDataStructureDefinition) -> Iterable[qb.ComponentSpecification]: + def _get_qb_component_specs_for_col( + self, column_name_uri_safe: str, component: QbDataStructureDefinition + ) -> Iterable[qb.ComponentSpecification]: if isinstance(component, QbDimension): - return [self._get_qb_dimension_specification(column_name_uri_safe, component)] + return [ + self._get_qb_dimension_specification(column_name_uri_safe, component) + ] elif isinstance(component, QbAttribute): - return [self._get_qb_attribute_specification(column_name_uri_safe, component)] + return [ + self._get_qb_attribute_specification(column_name_uri_safe, component) + ] elif isinstance(component, QbMultiUnits): return [self._get_qb_units_column_specification(column_name_uri_safe)] elif isinstance(component, QbMultiMeasureDimension): @@ -143,28 +161,43 @@ def _get_qb_component_specs_for_col(self, else: raise Exception(f"Unhandled component type {type(component)}") - def _get_qb_units_column_specification(self, column_name_uri_safe: str) -> qb.AttributeComponentSpecification: - component = qb.AttributeComponentSpecification(self._doc_rel_uri(f"component/{column_name_uri_safe}")) + def _get_qb_units_column_specification( + self, column_name_uri_safe: str + ) -> qb.AttributeComponentSpecification: + component = qb.AttributeComponentSpecification( + self._doc_rel_uri(f"component/{column_name_uri_safe}") + ) component.componentRequired = True - component.attribute = ExistingResource("http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure") + component.attribute = ExistingResource( + "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure" + ) component.componentProperties.add(component.attribute) return component - def _get_qb_obs_val_specifications(self, observation_value: QbObservationValue) -> \ - List[qb.ComponentSpecification]: + def _get_qb_obs_val_specifications( + self, observation_value: QbObservationValue + ) -> List[qb.ComponentSpecification]: specs: List[qb.ComponentSpecification] = [] if observation_value.unit is not None: - unit_uri_safe_identifier = self._get_unit_uri_safe_identifier(observation_value.unit) - specs.append(self._get_qb_units_column_specification(unit_uri_safe_identifier)) + unit_uri_safe_identifier = self._get_unit_uri_safe_identifier( + observation_value.unit + ) + specs.append( + self._get_qb_units_column_specification(unit_uri_safe_identifier) + ) if isinstance(observation_value, QbSingleMeasureObservationValue): - specs.append(self._get_qb_measure_component_specification(observation_value.measure)) + specs.append( + self._get_qb_measure_component_specification(observation_value.measure) + ) elif isinstance(observation_value, QbMultiMeasureObservationValue): pass else: - raise Exception(f"Unmatched Observation value component of type {type(observation_value)}.") + raise Exception( + f"Unmatched Observation value component of type {type(observation_value)}." + ) return specs @@ -177,54 +210,73 @@ def _get_unit_uri_safe_identifier(unit: QbUnit) -> str: else: raise Exception(f"Unhandled unit type {type(unit)}") - def _get_qb_measure_dimension_specifications(self, measure_dimension: QbMultiMeasureDimension) -> \ - List[qb.MeasureComponentSpecification]: + def _get_qb_measure_dimension_specifications( + self, measure_dimension: QbMultiMeasureDimension + ) -> List[qb.MeasureComponentSpecification]: measure_specs: List[qb.MeasureComponentSpecification] = [] for measure in measure_dimension.measures: measure_specs.append(self._get_qb_measure_component_specification(measure)) return measure_specs - def _get_qb_measure_component_specification(self, measure: QbMeasure) -> qb.MeasureComponentSpecification: + def _get_qb_measure_component_specification( + self, measure: QbMeasure + ) -> qb.MeasureComponentSpecification: if isinstance(measure, ExistingQbMeasure): # todo: ideally we would find the measures's label, however, we want to support offline-only working too. # Offline-first is a good approach. - component_uri = self._doc_rel_uri(f"component/{get_last_uri_part(measure.measure_uri)}") + component_uri = self._doc_rel_uri( + f"component/{get_last_uri_part(measure.measure_uri)}" + ) component = qb.MeasureComponentSpecification(component_uri) component.measure = ExistingResource(measure.measure_uri) component.componentProperties.add(component.measure) return component elif isinstance(measure, NewQbMeasure): - component = qb.MeasureComponentSpecification(self._doc_rel_uri(f"component/{measure.uri_safe_identifier}")) - component.measure = qb.MeasureProperty(self._doc_rel_uri(f"measure/{measure.uri_safe_identifier}")) + component = qb.MeasureComponentSpecification( + self._doc_rel_uri(f"component/{measure.uri_safe_identifier}") + ) + component.measure = qb.MeasureProperty( + self._doc_rel_uri(f"measure/{measure.uri_safe_identifier}") + ) component.measure.label = measure.label component.measure.comment = measure.description - component.measure.subPropertyOf = maybe_existing_resource(measure.parent_measure_uri) + component.measure.subPropertyOf = maybe_existing_resource( + measure.parent_measure_uri + ) component.measure.source = maybe_existing_resource(measure.source_uri) component.componentProperties.add(component.measure) return component else: raise Exception(f"Unhandled measure type {type(measure)}") - def _get_qb_dimension_specification(self, - column_name_uri_safe: str, - dimension: QbDimension) -> qb.DimensionComponentSpecification: + def _get_qb_dimension_specification( + self, column_name_uri_safe: str, dimension: QbDimension + ) -> qb.DimensionComponentSpecification: if isinstance(dimension, ExistingQbDimension): - component = qb.DimensionComponentSpecification(self._doc_rel_uri(f"component/{column_name_uri_safe}")) + component = qb.DimensionComponentSpecification( + self._doc_rel_uri(f"component/{column_name_uri_safe}") + ) component.dimension = ExistingResource(dimension.dimension_uri) elif isinstance(dimension, NewQbDimension): component = qb.DimensionComponentSpecification( self._doc_rel_uri(f"component/{dimension.uri_safe_identifier}") ) - component.dimension = qb.DimensionProperty(self._doc_rel_uri(f"dimension/{dimension.uri_safe_identifier}")) + component.dimension = qb.DimensionProperty( + self._doc_rel_uri(f"dimension/{dimension.uri_safe_identifier}") + ) component.dimension.label = dimension.label component.dimension.comment = dimension.description - component.dimension.subPropertyOf = maybe_existing_resource(dimension.parent_dimension_uri) + component.dimension.subPropertyOf = maybe_existing_resource( + dimension.parent_dimension_uri + ) component.dimension.source = maybe_existing_resource(dimension.source_uri) component.dimension.range = ExistingResource(rdflib.SKOS.Concept) if dimension.code_list is not None: - component.dimension.code_list = self._get_code_list_resource(dimension.code_list) + component.dimension.code_list = self._get_code_list_resource( + dimension.code_list + ) else: raise Exception(f"Unhandled dimension component type {type(dimension)}.") @@ -233,7 +285,9 @@ def _get_qb_dimension_specification(self, return component - def _get_code_list_resource(self, code_list: QbCodeList) -> Resource[skos.ConceptScheme]: + def _get_code_list_resource( + self, code_list: QbCodeList + ) -> Resource[skos.ConceptScheme]: if isinstance(code_list, ExistingQbCodeList): return ExistingResource(code_list.concept_scheme_uri) elif isinstance(code_list, NewQbCodeList): @@ -242,20 +296,26 @@ def _get_code_list_resource(self, code_list: QbCodeList) -> Resource[skos.Concep else: raise Exception(f"Unhandled code list type {type(code_list)}") - def _get_qb_attribute_specification(self, - column_name_uri_safe: str, - attribute: QbAttribute) -> qb.AttributeComponentSpecification: + def _get_qb_attribute_specification( + self, column_name_uri_safe: str, attribute: QbAttribute + ) -> qb.AttributeComponentSpecification: if isinstance(attribute, ExistingQbAttribute): - component = qb.AttributeComponentSpecification(self._doc_rel_uri(f"component/{column_name_uri_safe}")) + component = qb.AttributeComponentSpecification( + self._doc_rel_uri(f"component/{column_name_uri_safe}") + ) component.attribute = ExistingResource(attribute.attribute_uri) elif isinstance(attribute, NewQbAttribute): component = qb.AttributeComponentSpecification( self._doc_rel_uri(f"component/{attribute.uri_safe_identifier}") ) - component.attribute = qb.AttributeProperty(self._doc_rel_uri(f"attribute/{attribute.uri_safe_identifier}")) + component.attribute = qb.AttributeProperty( + self._doc_rel_uri(f"attribute/{attribute.uri_safe_identifier}") + ) component.attribute.label = attribute.label component.attribute.comment = attribute.description - component.attribute.subPropertyOf = maybe_existing_resource(attribute.parent_attribute_uri) + component.attribute.subPropertyOf = maybe_existing_resource( + attribute.parent_attribute_uri + ) component.attribute.source = maybe_existing_resource(attribute.source_uri) # todo: Find some way to link the codelist we have to the # ComponentProperty? @@ -270,7 +330,7 @@ def _get_qb_attribute_specification(self, def _generate_csvqb_column(self, column: CsvColumn) -> Dict[str, Any]: csvw_col: Dict[str, Any] = { "titles": column.csv_column_title, - "name": csvw_column_name_safe(column.uri_safe_identifier) + "name": csvw_column_name_safe(column.uri_safe_identifier), } if isinstance(column, SuppressedCsvColumn): @@ -278,12 +338,19 @@ def _generate_csvqb_column(self, column: CsvColumn) -> Dict[str, Any]: elif isinstance(column, QbColumn): self._define_csvw_column_for_qb_column(csvw_col, column) else: - raise Exception(f"Unhandled column type ({type(column)}) with title '{column.csv_column_title}'") + raise Exception( + f"Unhandled column type ({type(column)}) with title '{column.csv_column_title}'" + ) return csvw_col - def _define_csvw_column_for_qb_column(self, csvw_col: dict, column: QbColumn) -> None: - (property_url, default_value_url) = self._get_default_property_value_uris_for_column(column) + def _define_csvw_column_for_qb_column( + self, csvw_col: dict, column: QbColumn + ) -> None: + ( + property_url, + default_value_url, + ) = self._get_default_property_value_uris_for_column(column) if property_url is not None: csvw_col["propertyUrl"] = property_url @@ -296,12 +363,14 @@ def _define_csvw_column_for_qb_column(self, csvw_col: dict, column: QbColumn) -> if isinstance(column.component, QbObservationValue): csvw_col["datatype"] = column.component.data_type - def _get_default_property_value_uris_for_multi_units(self, - column: QbColumn, - multi_units: QbMultiUnits) -> Tuple[str, str]: + def _get_default_property_value_uris_for_multi_units( + self, column: QbColumn, multi_units: QbMultiUnits + ) -> Tuple[str, str]: column_template_fragment = self._get_column_uri_template_fragment(column) all_units_new = all([isinstance(u, NewQbUnit) for u in multi_units.units]) - all_units_existing = all([isinstance(u, ExistingQbUnit) for u in multi_units.units]) + all_units_existing = all( + [isinstance(u, ExistingQbUnit) for u in multi_units.units] + ) unit_value_uri: str if all_units_new: @@ -310,16 +379,25 @@ def _get_default_property_value_uris_for_multi_units(self, unit_value_uri = column_template_fragment else: # todo: Come up with a solution for this! - raise Exception("Cannot handle a mix of new units and existing defined units.") + raise Exception( + "Cannot handle a mix of new units and existing defined units." + ) - return "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", unit_value_uri + return ( + "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure", + unit_value_uri, + ) - def _get_default_property_value_uris_for_multi_measure(self, - column: QbColumn, - measure_dimension: QbMultiMeasureDimension) -> Tuple[str, str]: + def _get_default_property_value_uris_for_multi_measure( + self, column: QbColumn, measure_dimension: QbMultiMeasureDimension + ) -> Tuple[str, str]: column_template_fragment = self._get_column_uri_template_fragment(column) - all_measures_new = all([isinstance(m, NewQbMeasure) for m in measure_dimension.measures]) - all_measures_existing = all([isinstance(m, ExistingQbMeasure) for m in measure_dimension.measures]) + all_measures_new = all( + [isinstance(m, NewQbMeasure) for m in measure_dimension.measures] + ) + all_measures_existing = all( + [isinstance(m, ExistingQbMeasure) for m in measure_dimension.measures] + ) measure_value_uri: str if all_measures_new: @@ -328,51 +406,74 @@ def _get_default_property_value_uris_for_multi_measure(self, measure_value_uri = column_template_fragment else: # todo: Come up with a solution for this! - raise Exception("Cannot handle a mix of new measures and existing defined measures.") + raise Exception( + "Cannot handle a mix of new measures and existing defined measures." + ) return "http://purl.org/linked-data/cube#measureType", measure_value_uri - def _get_default_property_value_uris_for_column(self, column: QbColumn) -> \ - Tuple[Optional[str], Optional[str]]: + def _get_default_property_value_uris_for_column( + self, column: QbColumn + ) -> Tuple[Optional[str], Optional[str]]: if isinstance(column.component, QbDimension): return self._get_default_property_value_uris_for_dimension(column) elif isinstance(column.component, QbAttribute): return self._get_default_property_value_uris_for_attribute(column) elif isinstance(column.component, QbMultiUnits): - return self._get_default_property_value_uris_for_multi_units(column, column.component) + return self._get_default_property_value_uris_for_multi_units( + column, column.component + ) elif isinstance(column.component, QbMultiMeasureDimension): - return self._get_default_property_value_uris_for_multi_measure(column, column.component) + return self._get_default_property_value_uris_for_multi_measure( + column, column.component + ) elif isinstance(column.component, QbObservationValue): return None, None else: raise Exception(f"Unhandled component type {type(column.component)}") - def _get_default_property_value_uris_for_dimension(self, column: QbColumn[QbDimension]) -> Tuple[str, Optional[str]]: + def _get_default_property_value_uris_for_dimension( + self, column: QbColumn[QbDimension] + ) -> Tuple[str, Optional[str]]: dimension = column.component if isinstance(dimension, ExistingQbDimension): - return dimension.dimension_uri, self._get_column_uri_template_fragment(column) + return dimension.dimension_uri, self._get_column_uri_template_fragment( + column + ) elif isinstance(dimension, NewQbDimension): - local_dimension_uri = self._doc_rel_uri(f"dimension/{dimension.uri_safe_identifier}") + local_dimension_uri = self._doc_rel_uri( + f"dimension/{dimension.uri_safe_identifier}" + ) value_uri = self._get_column_uri_template_fragment(column) if dimension.code_list is not None: - value_uri = self._get_default_value_uri_for_code_list_concepts(column, dimension.code_list) + value_uri = self._get_default_value_uri_for_code_list_concepts( + column, dimension.code_list + ) return local_dimension_uri, value_uri else: raise Exception(f"Unhandled dimension type {type(dimension)}") - def _get_default_property_value_uris_for_attribute(self, column: QbColumn[QbAttribute]) -> Tuple[str, str]: + def _get_default_property_value_uris_for_attribute( + self, column: QbColumn[QbAttribute] + ) -> Tuple[str, str]: attribute = column.component if isinstance(attribute, ExistingQbAttribute): - return attribute.attribute_uri, self._get_column_uri_template_fragment(column) + return attribute.attribute_uri, self._get_column_uri_template_fragment( + column + ) elif isinstance(attribute, NewQbAttribute): - local_attribute_uri = self._doc_rel_uri(f"attribute/{attribute.uri_safe_identifier}") + local_attribute_uri = self._doc_rel_uri( + f"attribute/{attribute.uri_safe_identifier}" + ) value_uri = self._get_column_uri_template_fragment(column) return local_attribute_uri, value_uri else: raise Exception(f"Unhandled attribute type {type(attribute)}") - def _get_column_uri_template_fragment(self, column: CsvColumn, escape_value: bool = False) -> str: + def _get_column_uri_template_fragment( + self, column: CsvColumn, escape_value: bool = False + ) -> str: if escape_value: return "{" + csvw_column_name_safe(column.uri_safe_identifier) + "}" @@ -384,11 +485,17 @@ def _get_new_code_list_scheme_uri(self, code_list: NewQbCodeList) -> str: external_code_list_pattern = re.compile("^(.*)/concept-scheme/(.*)$") dataset_local_code_list_pattern = re.compile("^(.*)#scheme/(.*)$") - def _get_default_value_uri_for_code_list_concepts(self, column: CsvColumn, code_list: QbCodeList) -> str: + def _get_default_value_uri_for_code_list_concepts( + self, column: CsvColumn, code_list: QbCodeList + ) -> str: column_uri_fragment = self._get_column_uri_template_fragment(column) if isinstance(code_list, ExistingQbCodeList): - external_match = self.external_code_list_pattern.match(code_list.concept_scheme_uri) - local_match = self.dataset_local_code_list_pattern.match(code_list.concept_scheme_uri) + external_match = self.external_code_list_pattern.match( + code_list.concept_scheme_uri + ) + local_match = self.dataset_local_code_list_pattern.match( + code_list.concept_scheme_uri + ) if external_match: m: re.Match = external_match # ConceptScheme URI: @@ -408,7 +515,9 @@ def _get_default_value_uri_for_code_list_concepts(self, column: CsvColumn, code_ return column_uri_fragment elif isinstance(code_list, NewQbCodeList): - return self._doc_rel_uri(f"concept/{code_list.metadata.uri_safe_identifier}/{column_uri_fragment}") + return self._doc_rel_uri( + f"concept/{code_list.metadata.uri_safe_identifier}/{column_uri_fragment}" + ) else: raise Exception(f"Unhandled codelist type {type(code_list)}") diff --git a/csvqb/csvqb/writers/skoscodelistwriter.py b/csvqb/csvqb/writers/skoscodelistwriter.py index 130d81d9d..ce90e8fc2 100644 --- a/csvqb/csvqb/writers/skoscodelistwriter.py +++ b/csvqb/csvqb/writers/skoscodelistwriter.py @@ -15,14 +15,15 @@ class SkosCodeListWriter(WriterBase): - def __init__(self, new_code_list: NewQbCodeList): self.csv_file_name = f"{new_code_list.metadata.uri_safe_identifier}.csv" self.new_code_list: NewQbCodeList = new_code_list def write(self, output_directory: Path) -> None: csv_file_path = (output_directory / self.csv_file_name).absolute() - metadata_file_path = (output_directory / f"{self.csv_file_name}-metadata.json").absolute() + metadata_file_path = ( + output_directory / f"{self.csv_file_name}-metadata.json" + ).absolute() csvw_metadata, data = self._new_code_list_to_csvw_parts() @@ -33,11 +34,11 @@ def write(self, output_directory: Path) -> None: def _doc_rel_uri(self, fragment: str) -> str: """ - URIs declared in the `columns` section of the CSV-W are relative to the CSV's location. - URIs declared in the JSON-LD metadata section of the CSV-W are relative to the metadata file's location. + URIs declared in the `columns` section of the CSV-W are relative to the CSV's location. + URIs declared in the JSON-LD metadata section of the CSV-W are relative to the metadata file's location. - This function makes both point to the same base location - the CSV file's location. This ensures that we - can talk about the same resources in the `columns` section and the JSON-LD metadata section. + This function makes both point to the same base location - the CSV file's location. This ensures that we + can talk about the same resources in the `columns` section and the JSON-LD metadata section. """ return f"./{self.csv_file_name}#{fragment}" @@ -55,41 +56,41 @@ def _get_csvw_metadata(self) -> dict: "titles": "Label", "name": "label", "required": True, - "propertyUrl": "rdfs:label" + "propertyUrl": "rdfs:label", }, { "titles": "Notation", "name": "notation", "required": True, - "propertyUrl": "skos:notation" + "propertyUrl": "skos:notation", }, { "titles": "Parent Notation", "name": "parent_notation", "required": False, "propertyUrl": "skos:broader", - "valueUrl": self._doc_rel_uri("concept/{+parent_notation}") + "valueUrl": self._doc_rel_uri("concept/{+parent_notation}"), }, { "titles": "Sort Priority", "name": "sort_priority", "required": False, "datatype": "integer", - "propertyUrl": "http://www.w3.org/ns/ui#sortPriority" + "propertyUrl": "http://www.w3.org/ns/ui#sortPriority", }, { "titles": "Description", "name": "description", "required": False, - "propertyUrl": "rdfs:comment" + "propertyUrl": "rdfs:comment", }, { "virtual": True, "name": "virt_inScheme", "required": False, "propertyUrl": "skos:inScheme", - "valueUrl": self._doc_rel_uri("scheme") - } + "valueUrl": self._doc_rel_uri("scheme"), + }, ] csvw_metadata = { @@ -98,9 +99,9 @@ def _get_csvw_metadata(self) -> dict: "url": self.csv_file_name, "tableSchema": { "columns": csvw_columns, - "aboutUrl": self._doc_rel_uri("concept/{+notation}") + "aboutUrl": self._doc_rel_uri("concept/{+notation}"), }, - "rdfs:seeAlso": rdf_resource_to_json_ld_dict(additional_metadata) + "rdfs:seeAlso": rdf_resource_to_json_ld_dict(additional_metadata), } return csvw_metadata @@ -124,10 +125,14 @@ def _get_catalog_metadata(self) -> ConceptSchemeInCatalog: return concept_scheme def _get_code_list_data(self) -> pd.DataFrame: - return pd.DataFrame({ - "Label": [c.label for c in self.new_code_list.concepts], - "Notation": [c.code for c in self.new_code_list.concepts], - "Parent Notation": [c.parent_code for c in self.new_code_list.concepts], - "Sort Priority": [c.sort_order or i for i, c in enumerate(self.new_code_list.concepts)], - "Description": [c.description for c in self.new_code_list.concepts] - }) + return pd.DataFrame( + { + "Label": [c.label for c in self.new_code_list.concepts], + "Notation": [c.code for c in self.new_code_list.concepts], + "Parent Notation": [c.parent_code for c in self.new_code_list.concepts], + "Sort Priority": [ + c.sort_order or i for i, c in enumerate(self.new_code_list.concepts) + ], + "Description": [c.description for c in self.new_code_list.concepts], + } + ) diff --git a/devtools/devtools/behave/csv2rdf.py b/devtools/devtools/behave/csv2rdf.py index 0bafc4467..ce4e0065f 100644 --- a/devtools/devtools/behave/csv2rdf.py +++ b/devtools/devtools/behave/csv2rdf.py @@ -19,17 +19,17 @@ def _run_csv2rdf(context, metadata_file_path: Path) -> Tuple[int, str, Optional[ tmp_dir = Path(tmp_dir) client = docker.from_env() csv2rdf = client.containers.create( - 'gsscogs/csv2rdf', - command=f'csv2rdf -u /tmp/{metadata_file_path.name} -o /tmp/csv2rdf.ttl -m annotated' + "gsscogs/csv2rdf", + command=f"csv2rdf -u /tmp/{metadata_file_path.name} -o /tmp/csv2rdf.ttl -m annotated", ) csv2rdf.put_archive("/tmp", dir_to_tar(metadata_file_path.parent)) csv2rdf.start() response: dict = csv2rdf.wait() exit_code = response["StatusCode"] - sys.stdout.write(csv2rdf.logs().decode('utf-8')) + sys.stdout.write(csv2rdf.logs().decode("utf-8")) - output_stream, output_stat = csv2rdf.get_archive('/tmp/csv2rdf.ttl') + output_stream, output_stat = csv2rdf.get_archive("/tmp/csv2rdf.ttl") extract_tar(output_stream, tmp_dir) maybe_output_file = tmp_dir / "csv2rdf.ttl" if maybe_output_file.exists(): @@ -40,10 +40,10 @@ def _run_csv2rdf(context, metadata_file_path: Path) -> Tuple[int, str, Optional[ context.turtle = ttl_out - return exit_code, csv2rdf.logs().decode('utf-8'), ttl_out + return exit_code, csv2rdf.logs().decode("utf-8"), ttl_out -@step("csv2rdf on \"{file}\" should succeed") +@step('csv2rdf on "{file}" should succeed') def step_impl(context, file: str): temp_dir = get_context_temp_dir_path(context) exit_code, logs, ttl_out = _run_csv2rdf(context, temp_dir / file) @@ -52,7 +52,7 @@ def step_impl(context, file: str): context.turtle = ttl_out -@step('csv2rdf on \"{file}\" should fail with "{expected}"') +@step('csv2rdf on "{file}" should fail with "{expected}"') def step_impl(context, file: str, expected: str): temp_dir = get_context_temp_dir_path(context) exit_code, logs, ttl_out = _run_csv2rdf(context, temp_dir / file) diff --git a/devtools/devtools/behave/csvlint.py b/devtools/devtools/behave/csvlint.py index c5c7ae7ee..2224da0f0 100644 --- a/devtools/devtools/behave/csvlint.py +++ b/devtools/devtools/behave/csvlint.py @@ -14,26 +14,25 @@ def _run_csvlint(metadata_file_path: Path) -> Tuple[int, str]: client = docker.from_env() - csvlint = client.containers.create( - 'gsscogs/csvlint', - command=f"csvlint -s '/tmp/{metadata_file_path.name}'" + csvlint = client.containers.create( + "gsscogs/csvlint", command=f"csvlint -s '/tmp/{metadata_file_path.name}'" ) csvlint.put_archive("/tmp", dir_to_tar(metadata_file_path.parent)) csvlint.start() response: dict = csvlint.wait() exit_code = response["StatusCode"] - sys.stdout.write(csvlint.logs().decode('utf-8')) - return exit_code, csvlint.logs().decode('utf-8') + sys.stdout.write(csvlint.logs().decode("utf-8")) + return exit_code, csvlint.logs().decode("utf-8") -@step("csvlint validation of \"{file}\" should succeed") +@step('csvlint validation of "{file}" should succeed') def step_impl(context, file: str): temp_dir = get_context_temp_dir_path(context) exit_code, logs = _run_csvlint(temp_dir / file) assert_equal(exit_code, 0) -@step('csvlint validation of \"{file}\" should fail with "{expected}"') +@step('csvlint validation of "{file}" should fail with "{expected}"') def step_impl(context, file: str, expected: str): temp_dir = get_context_temp_dir_path(context) exit_code, logs = _run_csvlint(temp_dir / file) diff --git a/devtools/devtools/behave/rdf.py b/devtools/devtools/behave/rdf.py index d4a483a49..69157386f 100644 --- a/devtools/devtools/behave/rdf.py +++ b/devtools/devtools/behave/rdf.py @@ -7,23 +7,28 @@ def test_graph_diff(g1, g2): - in_both, only_in_first, only_in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2)) + in_both, only_in_first, only_in_second = graph_diff( + to_isomorphic(g1), to_isomorphic(g2) + ) only_in_first.namespace_manager = g1.namespace_manager only_in_second.namespace_manager = g2.namespace_manager - ok_(len(only_in_second) == 0, f""" + ok_( + len(only_in_second) == 0, + f""" <<< {only_in_first.serialize(format='n3').decode('utf-8')} === {only_in_second.serialize(format='n3').decode('utf-8')} >>> -""") +""", + ) @step("the RDF should contain") def step_impl(context): test_graph_diff( - Graph().parse(format='turtle', data=context.turtle), - Graph().parse(format='turtle', data=context.text) + Graph().parse(format="turtle", data=context.turtle), + Graph().parse(format="turtle", data=context.text), ) @@ -32,8 +37,8 @@ def step_impl(context, query_file: str, expected_query_result: str): query_file = Path(query_file) with open(query_file) as f: query = f.read() - g = Graph().parse(format='turtle', data=context.turtle) + g = Graph().parse(format="turtle", data=context.turtle) results = list(g.query(query)) ask_result = results[0] expected_ask_result = bool(distutils.util.strtobool(expected_query_result)) - assert(ask_result == expected_ask_result) + assert ask_result == expected_ask_result diff --git a/devtools/devtools/behave/sparqltests.py b/devtools/devtools/behave/sparqltests.py index 99ffbb292..267f4a8b0 100644 --- a/devtools/devtools/behave/sparqltests.py +++ b/devtools/devtools/behave/sparqltests.py @@ -25,8 +25,8 @@ def _run_sparql_tests(context, tests_to_run: List[str] = []) -> Tuple[int, str]: test_dir_params = " ".join([f"-t '/usr/local/tests/{t}'" for t in tests_to_run]) sparql_test_runner = client.containers.create( - 'gsscogs/gdp-sparql-tests', - command=f'sparql-test-runner {test_dir_params} /tmp/content.ttl' + "gsscogs/gdp-sparql-tests", + command=f"sparql-test-runner {test_dir_params} /tmp/content.ttl", ) sparql_test_runner.put_archive("/tmp", dir_to_tar(temp_dir)) @@ -34,18 +34,18 @@ def _run_sparql_tests(context, tests_to_run: List[str] = []) -> Tuple[int, str]: sparql_test_runner.start() response: dict = sparql_test_runner.wait() exit_code = response["StatusCode"] - sys.stdout.write(sparql_test_runner.logs().decode('utf-8')) + sys.stdout.write(sparql_test_runner.logs().decode("utf-8")) - return exit_code, sparql_test_runner.logs().decode('utf-8') + return exit_code, sparql_test_runner.logs().decode("utf-8") -@step("the RDF should pass \"{test_types}\" SPARQL tests") +@step('the RDF should pass "{test_types}" SPARQL tests') def step_impl(context, test_types: str): exit_code, logs = _run_sparql_tests(context, test_types.split(", ")) nose.assert_equal(exit_code, 0) -@step("the RDF should fail \"{test_types}\" SPARQL tests with \"{expected}\"") +@step('the RDF should fail "{test_types}" SPARQL tests with "{expected}"') def step_impl(context, test_types: str, expected: str): exit_code, logs = _run_sparql_tests(context, test_types.split(", ")) nose.assert_equal(exit_code, 1) diff --git a/devtools/devtools/helpers/csvwhelpers.py b/devtools/devtools/helpers/csvwhelpers.py index 675e7aafc..0170b8b24 100644 --- a/devtools/devtools/helpers/csvwhelpers.py +++ b/devtools/devtools/helpers/csvwhelpers.py @@ -24,4 +24,3 @@ def delete_csvw(metadata_file: Path): csv_file.unlink() metadata_file.unlink() - diff --git a/devtools/devtools/helpers/tar.py b/devtools/devtools/helpers/tar.py index 585028216..b851f795f 100644 --- a/devtools/devtools/helpers/tar.py +++ b/devtools/devtools/helpers/tar.py @@ -27,6 +27,5 @@ def extract_tar(output_stream: Iterable[bytes], output_directory: Path) -> None: for line in output_stream: output_archive.write(line) output_archive.seek(0) - with TarFile(fileobj=output_archive, mode='r') as t: + with TarFile(fileobj=output_archive, mode="r") as t: t.extractall(path=output_directory) -