Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#755 FIX unit component duplication BUG #756

Merged
merged 3 commits into from
Mar 9, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions src/csvcubed/writers/helpers/qbwriter/dsdtordfmodelshelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,17 +301,29 @@ def _get_qb_component_specs_for_col(
self._get_qb_attribute_specification(column_name_uri_safe, component)
]
elif isinstance(component, QbMultiUnits):
return [self._get_qb_units_column_specification(column_name_uri_safe)]
return self._get_qb_units_column_specification(column_name_uri_safe)
elif isinstance(component, QbMultiMeasureDimension):
return self._get_qb_measure_dimension_specifications(component)
elif isinstance(component, QbObservationValue):
return self._get_qb_obs_val_specifications(component)
else:
raise TypeError(f"Unhandled component type {type(component)}")

_units_component_already_defined: bool = False
nimshi89 marked this conversation as resolved.
Show resolved Hide resolved
"""
nimshi89 marked this conversation as resolved.
Show resolved Hide resolved
Records whether or not a units component has already been defined in this cube.
If it has, don't define it again.
"""

def _get_qb_units_column_specification(
self, column_name_uri_safe: str
) -> rdf.qb.AttributeComponentSpecification:
) -> List[rdf.qb.AttributeComponentSpecification]:
if self._units_component_already_defined:
# Don't define a second units component, the first one will work just fine.
return []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some debug logging here so we can track that we're avoiding serialising a second units component?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

        self, column_name_uri_safe: str
    ) -> List[rdf.qb.AttributeComponentSpecification]:
        if self._units_component_already_defined:
            _logger.debug(
                "The component type has already been serialised, %s.",
                column_name_uri_safe,
            )
            # Don't define a second units component, the first one will work just fine.

            return []```

is this the right place for it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's definitely the right place, but could it say something like Units component already generated. Not generating a second one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah cool I'll sort it out


self._units_component_already_defined = True

component = rdf.qb.AttributeComponentSpecification(
self._uris.get_component_uri(column_name_uri_safe)
)
Expand All @@ -326,7 +338,7 @@ def _get_qb_units_column_specification(
component.uri,
)

return component
return [component]

def _get_qb_obs_val_specifications(
self, observation_value: QbObservationValue
Expand All @@ -339,7 +351,7 @@ def _get_qb_obs_val_specifications(

unit = observation_value.unit
if unit is not None:
specs.append(self._get_qb_units_column_specification("unit"))
specs += self._get_qb_units_column_specification("unit")

if observation_value.is_pivoted_shape_observation:
assert observation_value.measure is not None
Expand Down
65 changes: 65 additions & 0 deletions tests/unit/writers/qbwriter/test_dsdtordfmodelshelper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
import pytest
from csvcubedmodels import rdf
from csvcubedmodels.rdf import qb
from rdflib import RDFS, Graph, Literal, URIRef

from csvcubed.models.cube.cube import Cube
Expand Down Expand Up @@ -577,5 +578,69 @@ def test_qb_order_of_components():
) in graph


def test_units_component_duplication():
"""This test checks if there are multiple observation value columns with units, only one unit component will be generated.
link to the issue: https://github.com/GSS-Cogs/csvcubed/issues/755
"""

data = pd.DataFrame(
{
"Existing Dimension": ["A", "B", "C"],
"Value": [1, 2, 3],
"Existing Attribute": ["Provisional", "Final", "Provisional"],
}
)

cube = Cube(
CatalogMetadata("Some Dataset"),
data,
[
QbColumn(
"Existing Dimension",
ExistingQbDimension(
"https://example.org/dimensions/existing_dimension",
arbitrary_rdf=[
TripleFragment(RDFS.label, "Existing Dimension Component")
],
),
),
QbColumn(
"Value",
QbObservationValue(
NewQbMeasure("Some Measure"), NewQbUnit("Some Unit")
),
),
QbColumn(
"Other Value",
QbObservationValue(
NewQbMeasure("Some Other Measure"), NewQbUnit("Some Other Unit")
),
),
],
)

dsd_helper = DsdToRdfModelsHelper(cube, UriHelper(cube))
dataset = dsd_helper._generate_qb_dataset_dsd_definitions()
graph = dataset.to_graph(Graph())
nimshi89 marked this conversation as resolved.
Show resolved Hide resolved

assert (
URIRef("some-dataset.csv#component/unit"),
rdf.QB.order,
Literal(3),
) in graph

list_of_units = [
component
for component in dataset.structure.components
if (
isinstance(component, qb.AttributeComponentSpecification)
and str(component.attribute.uri)
== "http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure"
)
]

assert len(list_of_units) == 1


if __name__ == "__main__":
pytest.main()