Skip to content

Commit

Permalink
Issue-68 Behaviour test info.json config.loader (#165)
Browse files Browse the repository at this point in the history
* 1.A cube is loaded using the info.json with a CSV.
2.Cube&qb validation succeeds.
3.Cube is serialised to CSV-W
4.csvlint validation succeeds
5.csv2rdf succeeds
6.RDF passes "SKOS, qb" SPARQL tests
7.Ouput ttl is compared with ttl as we do in gss-utils
8.Additional ttl is appended to the resulting RDF for multi-measure-data.csv as the Measure dimension and unit attribute defined in info.json is an existing dimension and attribute.

* 1. Refactoring of "some-existing unit" to "unit"
  • Loading branch information
santhosh-thangavel authored Sep 8, 2021
1 parent 7e542e5 commit 8629341
Show file tree
Hide file tree
Showing 8 changed files with 202 additions and 9 deletions.
87 changes: 86 additions & 1 deletion csvqb/csvqb/tests/behaviour/qbwriter.feature
Original file line number Diff line number Diff line change
Expand Up @@ -335,4 +335,89 @@ Feature: Test outputting CSV-Ws with Qb flavouring.
And the file at "code-list.csv" should exist
And csvlint validation of all CSV-Ws should succeed
And csv2rdf on all CSV-Ws should succeed
And the RDF should pass "skos, qb" SPARQL tests
And the RDF should pass "skos, qb" SPARQL tests

Scenario: Using the info.json config loader, a single-measure csv can be correctly serialised and converted to the correct RDF
Given the existing test-case file "configloaders/single-measure-info-json-test-files/single-measure-data.csv"
And the existing test-case file "configloaders/single-measure-info-json-test-files/single-measure-info.json"
And we load a cube using the info.json from "configloaders/single-measure-info-json-test-files/single-measure-info.json" with CSV from "configloaders/single-measure-info-json-test-files/single-measure-data.csv"
Then the CSVqb should pass all validations
When the cube is serialised to CSV-W
Then csvlint validation of "single-measure-bulletin.csv-metadata.json" should succeed
Then csv2rdf on all CSV-Ws should succeed
And the RDF should pass "skos, qb" SPARQL tests
And the RDF should contain
"""
@prefix qb: <http://purl.org/linked-data/cube#>.
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix : <file:/tmp/single-measure-bulletin.csv#>.
@prefix dimension: <file:/tmp/single-measure-bulletin.csv#dimension/>.
@prefix attribute: <file:/tmp/single-measure-bulletin.csv#attribute/>.
@prefix markervalues: <file:/tmp/single-measure-bulletin.csv#attribute/marker/>.
@prefix component:<file:/tmp/single-measure-bulletin.csv#component/>.
@prefix measure: <file:/tmp/single-measure-bulletin.csv#measure/>.
:dataset a qb:DataSet;
qb:structure :structure.
:structure qb:component component:period, component:one-litre-and-less, component:unit, component:marker.
component:period qb:dimension dimension:period.
dimension:period a qb:DimensionProperty.
component:one-litre-and-less qb:measure measure:one-litre-and-less.
measure:one-litre-and-less a qb:MeasureProperty.
component:marker qb:attribute attribute:marker.
markervalues:provisional a rdfs:Resource.
component:unit qb:attribute <http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure>.
"""

Scenario: Using the info.json config loader, a multi-measure csv can be correctly serialised and converted to the correct RDF
Given the existing test-case file "configloaders/multi-measure-info-json-test-files/multi-measure-data.csv"
And the existing test-case file "configloaders/multi-measure-info-json-test-files/multi-measure-info.json"
And we load a cube using the info.json from "configloaders/multi-measure-info-json-test-files/multi-measure-info.json" with CSV from "configloaders/multi-measure-info-json-test-files/multi-measure-data.csv"
Then the CSVqb should pass all validations
When the cube is serialised to CSV-W
Then csvlint validation of "multi-measure-bulletin.csv-metadata.json" should succeed
And csv2rdf on all CSV-Ws should succeed
And some additional turtle is appended to the resulting RDF
"""
@prefix qb: <http://purl.org/linked-data/cube#>.
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
<http://gss-data.org.uk/def/x/number-of-bottles> a qb:MeasureProperty;
rdfs:label "Number of bottles"@en.
<http://gss-data.org.uk/def/x/more-than-one-litre> a qb:MeasureProperty;
rdfs:label "Number of bottles more th an one litre"@en.
<http://gss-data.org.uk/def/x/one-litre-and-less> a qb:MeasureProperty;
rdfs:label "Number of bottles one litre and less"@en.
"""
And the RDF should pass "skos, qb" SPARQL tests
And the RDF should contain
"""
@prefix qb: <http://purl.org/linked-data/cube#>.
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix : <file:/tmp/multi-measure-bulletin.csv#>.
@prefix dimension: <file:/tmp/multi-measure-bulletin.csv#dimension/>.
@prefix component: <file:/tmp/multi-measure-bulletin.csv#component/>.
@prefix measure: <http://gss-data.org.uk/def/x/>.
:dataset a qb:DataSet;
qb:structure :structure.
:structure qb:component component:period, component:one-litre-and-less, component:more-than-one-litre,
component:number-of-bottles, component:unit.
component:period qb:dimension dimension:period.
dimension:period a qb:DimensionProperty.
component:one-litre-and-less qb:measure measure:one-litre-and-less.
component:more-than-one-litre qb:measure measure:more-than-one-litre.
component:number-of-bottles qb:measure measure:number-of-bottles.
component:unit qb:attribute <http://purl.org/linked-data/sdmx/2009/attribute#unitMeasure>.
"""

14 changes: 14 additions & 0 deletions csvqb/csvqb/tests/behaviour/steps/qbwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from csvqb.writers.qbwriter import QbWriter
from csvqb.utils.qb.cube import validate_qb_component_constraints
from csvqb.utils.csvw import get_first_table_schema
import csvqb.configloaders.infojson as infojsonloader


def get_standard_catalog_metadata_for_name(
Expand Down Expand Up @@ -605,3 +606,16 @@ def step_impl(context, cube_name: str):
def step_impl(context):
rdf_to_add = context.text
context.turtle += rdf_to_add


@Step(
'we load a cube using the info.json from "{some_json}" with CSV from "{some_csv}"'
)
def step_impl(context, some_json, some_csv):
tmp_dir = get_context_temp_dir_path(context)
data = pd.read_csv(tmp_dir / some_csv)

context.cube = infojsonloader.get_cube_from_info_json(
tmp_dir / some_json,
data,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Period,Measure,Unit,Value
2021,one-litre-and-less,percentage,4
2022,more-than-one-litre,percentage,6
2023,number-of-bottles,count,5
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"id": "multi-measure-bulletin",
"title": "multi-measure-bottles",
"publisher": "HM Revenue & Customs",
"description": "All bulletins provide details on percentage of one litre or less & more than one litre bottles. This information is provided on a yearly basis.",
"landingPage": "https://www.gov.uk/government/statistics/bottles-bulletin",
"datasetNotes": [
"\"UK bottles-bulletin Tables\" Excel file, latest version on page"
],
"published": "2019-02-28",
"families": [
"Trade"
],
"extract": {
"source": "XLS",
"stage": "Done"
},
"transform": {
"airtable": "recys4OhEtE0gE14P",
"columns": {
"Period": {
"parent": "http://purl.org/linked-data/sdmx/2009/dimension#refPeriod",
"value": "http://reference.data.gov.uk/id/{+period}",
"codelist": false
},
"Measure": {
"type": "measures",
"value": "http://gss-data.org.uk/def/x/{+measure}"
},
"Unit": {
"type": "units",
"value": "http://gss-data.org.uk/def/concept/measurement-units/{+unit}"
},
"Value": {
"datatype": "integer"
}
},
"main_issue": 67
},
"sizingNotes": "",
"notes": ""
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Period,Value,Marker
2021,40,Provisional
2022,50,Provisional
2023,60,Provisional
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"id": "single-measure-bulletin",
"title": "single-measure-bottles-bulletin",
"publisher": "HM Revenue & Customs",
"description": "All bulletins provide details on percentage of one litre or less bottles. This information is provided on a yearly basis.",
"landingPage": "https://www.gov.uk/government/statistics/bottles-bulletin",
"datasetNotes": [
"\"UK bottles-bulletin Tables\" Excel file, latest version on page"
],
"published": "2019-02-28",
"families": [
"Trade"
],
"extract": {
"source": "XLS",
"stage": "Done"
},
"transform": {
"airtable": "recys4OhEtE0gE14P",
"columns": {
"Period": {
"type": "dimension",
"new": {
"subPropertyOf": "http://purl.org/linked-data/sdmx/2009/dimension#refPeriod",
"codelist": false
},
"value": "http://reference.data.gov.uk/id/{+period}"
},
"Marker": {
"type": "attribute"
},
"Value": {
"type": "observations",
"datatype": "integer",
"measure": {
"label": "One litre and less"
},
"unit": "http://gss-data.org.uk/def/concept/measurement-units/percentage"
}
},
"main_issue": 67
},
"sizingNotes": "",
"notes": ""
}
4 changes: 2 additions & 2 deletions csvqb/csvqb/tests/unit/writers/test_qbwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_structure_defined():
"Observed Value",
QbSingleMeasureObservationValue(
ExistingQbMeasure("http://example.org/units/some-existing-measure"),
ExistingQbUnit("http://example.org/units/some-existing-unit"),
ExistingQbUnit("http://example.org/units/some-exisiting-unit"),
),
),
]
Expand All @@ -85,7 +85,7 @@ def test_structure_defined():

_assert_component_defined(dataset, "country")
_assert_component_defined(dataset, "marker")
_assert_component_defined(dataset, "some-existing-unit")
_assert_component_defined(dataset, "unit")
_assert_component_defined(dataset, "some-existing-measure")


Expand Down
11 changes: 5 additions & 6 deletions csvqb/csvqb/writers/qbwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from typing import Tuple, Dict, Any, List, Iterable, Set
import rdflib
from sharedmodels import rdf
from sharedmodels.rdf import skos
from sharedmodels.rdf import skos, rdfs
from sharedmodels.rdf.resource import (
ExistingResourceWithLiteral,
Resource,
Expand Down Expand Up @@ -318,10 +318,7 @@ def _get_qb_obs_val_specifications(

unit = observation_value.unit
if unit is not None:
unit_uri_safe_identifier = self._get_unit_uri_safe_identifier(unit)
specs.append(
self._get_qb_units_column_specification(unit_uri_safe_identifier)
)
specs.append(self._get_qb_units_column_specification("unit"))

if isinstance(observation_value, QbSingleMeasureObservationValue):
specs.append(
Expand Down Expand Up @@ -433,7 +430,9 @@ def _get_qb_dimension_specification(
dimension.parent_dimension_uri
)
component.dimension.source = maybe_existing_resource(dimension.source_uri)
component.dimension.range = ExistingResource(rdflib.SKOS.Concept)
component.dimension.range = rdfs.Class(
self._doc_rel_uri(f"class/{dimension.uri_safe_identifier}")
)

dimension.copy_arbitrary_triple_fragments_to_resources(
{
Expand Down

0 comments on commit 8629341

Please sign in to comment.