diff --git a/src/csvcubed/writers/helpers/qbwriter/dsdtordfmodelshelper.py b/src/csvcubed/writers/helpers/qbwriter/dsdtordfmodelshelper.py index e41ad506d..2f8730f55 100644 --- a/src/csvcubed/writers/helpers/qbwriter/dsdtordfmodelshelper.py +++ b/src/csvcubed/writers/helpers/qbwriter/dsdtordfmodelshelper.py @@ -5,7 +5,7 @@ Help Generate the DSD necessary for an RDF Data Cube. """ import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Iterable, List, Set from csvcubedmodels import rdf @@ -73,6 +73,11 @@ class DsdToRdfModelsHelper: cube: QbCube _uris: UriHelper + _units_component_already_defined: bool = field(init=False, default=False) + """ + Records whether or not a units component has already been defined in this cube. + If it has, don't define it again. + """ def generate_data_structure_definitions(self) -> List[dict]: """ @@ -301,7 +306,7 @@ def _get_qb_component_specs_for_col( self._get_qb_attribute_specification(column_name_uri_safe, component) ] elif isinstance(component, QbMultiUnits): - return [self._get_qb_units_column_specification(column_name_uri_safe)] + return self._get_qb_units_column_specification(column_name_uri_safe) elif isinstance(component, QbMultiMeasureDimension): return self._get_qb_measure_dimension_specifications(component) elif isinstance(component, QbObservationValue): @@ -311,7 +316,18 @@ def _get_qb_component_specs_for_col( def _get_qb_units_column_specification( self, column_name_uri_safe: str - ) -> rdf.qb.AttributeComponentSpecification: + ) -> List[rdf.qb.AttributeComponentSpecification]: + if self._units_component_already_defined: + _logger.debug( + "Units component already generated. Not generating a second one, %s.", + column_name_uri_safe, + ) + # Don't define a second units component, the first one will work just fine. + + return [] + + self._units_component_already_defined = True + component = rdf.qb.AttributeComponentSpecification( self._uris.get_component_uri(column_name_uri_safe) ) @@ -326,7 +342,7 @@ def _get_qb_units_column_specification( component.uri, ) - return component + return [component] def _get_qb_obs_val_specifications( self, observation_value: QbObservationValue @@ -339,7 +355,7 @@ def _get_qb_obs_val_specifications( unit = observation_value.unit if unit is not None: - specs.append(self._get_qb_units_column_specification("unit")) + specs += self._get_qb_units_column_specification("unit") if observation_value.is_pivoted_shape_observation: assert observation_value.measure is not None diff --git a/tests/unit/writers/qbwriter/test_dsdtordfmodelshelper.py b/tests/unit/writers/qbwriter/test_dsdtordfmodelshelper.py index 5256752b7..e466ff04a 100644 --- a/tests/unit/writers/qbwriter/test_dsdtordfmodelshelper.py +++ b/tests/unit/writers/qbwriter/test_dsdtordfmodelshelper.py @@ -1,6 +1,7 @@ import pandas as pd import pytest from csvcubedmodels import rdf +from csvcubedmodels.rdf import qb from rdflib import RDFS, Graph, Literal, URIRef from csvcubed.models.cube.cube import Cube @@ -577,5 +578,62 @@ def test_qb_order_of_components(): ) in graph +def test_units_component_duplication(): + """This test checks if there are multiple observation value columns with units, only one unit component will be generated. + link to the issue: https://github.com/GSS-Cogs/csvcubed/issues/755 + """ + + data = pd.DataFrame( + { + "Existing Dimension": ["A", "B", "C"], + "Value": [1, 2, 3], + "Existing Attribute": ["Provisional", "Final", "Provisional"], + } + ) + + cube = Cube( + CatalogMetadata("Some Dataset"), + data, + [ + QbColumn( + "Existing Dimension", + ExistingQbDimension( + "https://example.org/dimensions/existing_dimension", + arbitrary_rdf=[ + TripleFragment(RDFS.label, "Existing Dimension Component") + ], + ), + ), + QbColumn( + "Value", + QbObservationValue( + NewQbMeasure("Some Measure"), NewQbUnit("Some Unit") + ), + ), + QbColumn( + "Other Value", + QbObservationValue( + NewQbMeasure("Some Other Measure"), NewQbUnit("Some Other Unit") + ), + ), + ], + ) + + dsd_helper = DsdToRdfModelsHelper(cube, UriHelper(cube)) + dataset = dsd_helper._generate_qb_dataset_dsd_definitions() + + list_of_units = [ + component + for component in dataset.structure.components + if ( + isinstance(component, qb.AttributeComponentSpecification) + and str(component.attribute.uri) + == "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure" + ) + ] + + assert len(list_of_units) == 1 + + if __name__ == "__main__": pytest.main()