Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aboutUrl function #98

Merged
merged 7 commits into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions csvqb/csvqb/tests/unit/writers/test_qbwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,65 @@ def test_virtual_columns_generated_for_multi_meas_obs_val():
)
assert "./cube-name.csv#unit/some-unit" == virt_unit["valueUrl"]

def test_about_url_generation():
"""
Ensuring that when an aboutUrl is defined for a non-multimeasure cube, the resulting URL
is built in the order in which dimensions appear in the cube.
"""
data = pd.DataFrame({
"Existing Dimension": ["A", "B", "C"],
"Local Dimension": ["D", "E", "F"],
"Value": [2, 2, 2]
})

metadata = CatalogMetadata("Some Dataset")
columns = [
QbColumn("Existing Dimension",
ExistingQbDimension("https://example.org/dimensions/existing_dimension")),
QbColumn("Local Dimension",
NewQbDimension.from_data("Name of New Dimension",
data["Local Dimension"])),
QbColumn("Value",
QbSingleMeasureObservationValue(ExistingQbMeasure("http://example.com/measures/existing_measure"),
NewQbUnit("New Unit")))

]

cube = Cube(metadata, data, columns)

actual_about_url = QbWriter(cube)._get_about_url()
expected_about_url = "./some-dataset.csv#obs/{+existing_dimension}/{+local_dimension}"
assert actual_about_url == expected_about_url

def test_about_url_generation_with_multiple_measures():
"""
Ensuring that when an aboutUrl is defined for a multimeasure cube, the resulting URL
is built in the order in which dimensions appear in the cube except for the multi-measure
dimensions which are appended to the end of the URL.
"""
data = pd.DataFrame({
"Measure": ["People", "Children", "Adults"],
"Existing Dimension": ["A", "B", "C"],
"Value": [2, 2, 2],
"Local Dimension": ["D", "E", "F"],
"Units": ["Percent", "People", "People"]
})

metadata = CatalogMetadata("Some Dataset")
columns = [
QbColumn("Measure", QbMultiMeasureDimension.new_measures_from_data(data["Measure"])),
QbColumn("Existing Dimension", ExistingQbDimension("https://example.org/dimensions/existing_dimension")),
QbColumn("Local Dimension", NewQbDimension.from_data("Name of New Dimension", data["Local Dimension"])),
QbColumn("Value", QbMultiMeasureObservationValue("number")),
QbColumn("Units", QbMultiUnits.new_units_from_data(data["Units"]))

]

cube = Cube(metadata, data, columns)

actual_about_url = QbWriter(cube)._get_about_url()
expected_about_url = "./some-dataset.csv#obs/{+existing_dimension}/{+local_dimension}/{+measure}"
assert actual_about_url == expected_about_url

if __name__ == "__main__":
pytest.main()
23 changes: 20 additions & 3 deletions csvqb/csvqb/writers/qbwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
from pathlib import Path
from typing import Optional, Tuple, Dict, Any, List, Iterable, Callable
import rdflib
from sharedmodels.rdf import qb, skos
from sharedmodels.rdf import qb, rdfs, skos, namespaces
from sharedmodels.rdf.resource import (
Resource,
ExistingResource,
maybe_existing_resource,
)


from csvqb.models.cube import *
from csvqb.utils.uri import get_last_uri_part, csvw_column_name_safe, looks_like_uri
from csvqb.utils.qb.cube import get_columns_of_dsd_type
Expand All @@ -20,6 +19,7 @@
from .writerbase import WriterBase
from ..models.rdf.qbdatasetincatalog import QbDataSetInCatalog


VIRT_UNIT_COLUMN_NAME = "virt_unit"


Expand Down Expand Up @@ -549,4 +549,21 @@ def _get_measure_uri(self, measure: QbMeasure) -> str:
elif isinstance(measure, NewQbMeasure):
return self._doc_rel_uri(f"measure/{measure.uri_safe_identifier}")
else:
raise Exception(f"Unmatched unit type {type(unit)}")
raise Exception(f"Unmatched measure type {type(measure)}")

def _get_about_url(self) -> str:
# Todo: Dimensions are currently appended in the order in which the appear in the cube.
# We may want to alter this in the future so that the ordering is from
# least entropic dimension -> most entropic.
# E.g. http://base-uri/observations/male/1996/all-males-1996
aboutUrl = self._doc_rel_uri("obs")
multi_measure_col = ""
for c in self.cube.columns:
if isinstance(c, QbColumn):
if isinstance(c.component, QbDimension):
aboutUrl = aboutUrl + f"/{{+{csvw_column_name_safe(c.uri_safe_identifier)}}}"
elif isinstance(c.component, QbMultiMeasureDimension):
multi_measure_col = csvw_column_name_safe(c.uri_safe_identifier)
if len(multi_measure_col) != 0:
aboutUrl = aboutUrl + f"/{{+{multi_measure_col}}}"
return aboutUrl