From f8d3f2727845b9a25a94c10cfb21b800902a19d0 Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Thu, 19 May 2022 13:04:11 +0100 Subject: [PATCH 1/2] Defining the SHACL shapes for code lists. --- csvcubed/shacl/README.md | 74 +++++++ csvcubed/shacl/v1/0/code-list.json | 99 ++++++++++ csvcubed/shacl/v1/0/deps/dcat.json | 133 +++++++++++++ csvcubed/shacl/v1/0/deps/shacl-context.json | 44 +++++ csvcubed/shacl/v1/0/shapes-diagram.svg | 2 + csvcubed/shacl/v1/0/ttl/README.md | 3 + csvcubed/shacl/v1/0/ttl/shapes.ttl | 205 ++++++++++++++++++++ csvcubed/shacl/v1/0/ttl/to-turtle.sh | 12 ++ 8 files changed, 572 insertions(+) create mode 100644 csvcubed/shacl/README.md create mode 100644 csvcubed/shacl/v1/0/code-list.json create mode 100644 csvcubed/shacl/v1/0/deps/dcat.json create mode 100644 csvcubed/shacl/v1/0/deps/shacl-context.json create mode 100644 csvcubed/shacl/v1/0/shapes-diagram.svg create mode 100644 csvcubed/shacl/v1/0/ttl/README.md create mode 100644 csvcubed/shacl/v1/0/ttl/shapes.ttl create mode 100755 csvcubed/shacl/v1/0/ttl/to-turtle.sh diff --git a/csvcubed/shacl/README.md b/csvcubed/shacl/README.md new file mode 100644 index 000000000..800d40801 --- /dev/null +++ b/csvcubed/shacl/README.md @@ -0,0 +1,74 @@ +# SHACL Shapes + +**N.B. SHACL Shapes are a work-in-progress.** + +This directory contains a versioned history of the RDF shape of csvcubed's outputs. + +The purpose of these SHACL shape definitions is to provide a well-defined and versioned description of the RDF which is +produced by csvcubed. These definition specify the interface (or API) between csvcubed and applications which may wish to +process, consume or display the contents of CSV-Ws generated by the application. + +The SHACL shapes defined herein can also be used to check/validate whether a given CSV-W matches the csvcubed +specifications. It is also possible to use [visualise]() these SHACL constraints to get an overview of the key +RDF relationships. + +## Versioning + +Versioning of csvcubed's outputs uses a `MAJOR.minor` approach to versioning. + +**Any changes to the RDF structure of said output files must be accompanied by a new version in this directory.** + +### Major Versions + +Major versions are for any and all breaking changes, including defining new triples which fundamentally change +how existing triples should be interpreted. + +### Minor Versions + +Minor versions may only **add** triple definitions which are **optional** and can safely be ignored by applications +reading and interpreting CSV-Ws. + +## Folder Structure + +Each major version should have a `v*` folder with folders for each minor version inside, e.g. for version `1.0`, +the following folder structure exists: + +``` +v1 - MAJOR version +└── 0 - Minor version + └── ...SHACL shape files... +``` + +### Files + +Each version folder should: + +* Contain one JSON-LD file (at the top level) for each type of CSV-W output that csvcubed is capable of generating. + * For example a user should be able to locate the file defining a code-list's shape and validate their RDF directly against that. +* Any shared or extraneous files should be defined in a sub-directory so as not to clutter up the definitions. + +## Visualisation + +The SHACL shapes defined in turtle format can be visualised using the [SHACL Play!](https://shacl-play.sparna.fr/play/draw) tool. + +## Validation + +### Apache Jena + +You can use the [Apache Jena shacl command](https://jena.apache.org/documentation/tools/#other-handy-command-line-tools) +to validate SHACL shapes in turtle format against RDF data. + + +```bash +shacl validate --shapes shapes.ttl --data subsector.ttl +``` + +### pySHACL + +[pySHACL](https://github.com/RDFLib/pySHACL) supports directly validating RDF against SON-LD definitions: + +```bash +pyshacl -s code-list.json subsector.ttl --imports --metashacl +``` + +N.B. The `imports` flag above is important when using the JSON-LD definitions since they are spread across multiple files. \ No newline at end of file diff --git a/csvcubed/shacl/v1/0/code-list.json b/csvcubed/shacl/v1/0/code-list.json new file mode 100644 index 000000000..0fd521af2 --- /dev/null +++ b/csvcubed/shacl/v1/0/code-list.json @@ -0,0 +1,99 @@ +{ + "@context": [ + "deps/shacl-context.json", + { + "this": { + "@id": "https://purl.org/csv-cubed/shacl/v1.0/code-list.json#", + "prefix": true + }, + "ui": "http://www.w3.org/ns/ui#", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", + "dcatshapes": { + "@id": "https://purl.org/csv-cubed/shacl/v1.0/deps/dcat.json#" + } + } + ], + "@id": "https://purl.org/csv-cubed/shacl/v1.0/code-list.json#ontology", + "@type": "owl:Ontology", + "owl:imports": "deps/dcat.json", + "shape": [ + { + "@id": "this:ConceptSchemeShape", + "@type": "sh:NodeShape", + "targetClass": "skos:ConceptScheme", + "property": [ + { + "path": "rdfs:label", + "name": "Code list's label", + "description": "The code list's label must be a non-zero length `rdf:languageString` with no duplicate languages present. Each label has a matching `dct:title` triple.", + "datatype": "rdf:langString", + "uniqueLang": true, + "minCount": 1, + "minLength": 1 + }, + { + "path": "xkos:variant", + "name": "Variant of", + "description": "The concept scheme is a variant of another existing code list", + "minCount": 0, + "nodeKind": "sh:IRI", + "class": "skos:ConceptScheme", + "node": "this:ConceptSchemeShape" + } + ], + "and": [ + "dcatshapes:CatalogDatasetShape" + ] + }, + { + "@id": "this:ConceptShape", + "@type": "sh:NodeShape", + "targetClass": "skos:Concept", + "property": [ + { + "path": "rdfs:label", + "name": "Label", + "description": "A concept's label must be a non-zero length `xsd:string`.", + "minCount": 1, + "datatype": "xsd:string", + "minLength": 1 + }, + { + "path": "skos:notation", + "name": "Notation", + "description": "A concept must have a single notation. The notation must be a non-zero length `xsd:string`.", + "minCount": 1, + "maxCount": 1, + "datatype": "xsd:string", + "minLength": 1 + }, + { + "path": "skos:inScheme", + "name": "In Scheme", + "description": "A concept must be part of at least one concept scheme (code list). The concept scheme must be referred to by IRI.", + "minCount": 1, + "nodeKind": "sh:IRI", + "class": "skos:ConceptScheme", + "node": "this:ConceptSchemeShape" + }, + { + "path": "ui:sortPriority", + "name": "Sort Priority", + "description": "A concept must have a single integer sort priority to guide user interfaces on the order in which to sort concepts for display.", + "minCount": 1, + "maxCount": 1, + "datatype": "xsd:integer" + }, + { + "path": "skos:broader", + "name": "Broader Concept", + "description": "A concept may have one or more concepts which are broader than itself. These concepts must be referred to by IRI.", + "minCount": 0, + "nodeKind": "sh:IRI", + "class": "skos:Concept", + "node": "this:ConceptShape" + } + ] + } + ] +} \ No newline at end of file diff --git a/csvcubed/shacl/v1/0/deps/dcat.json b/csvcubed/shacl/v1/0/deps/dcat.json new file mode 100644 index 000000000..379e4250a --- /dev/null +++ b/csvcubed/shacl/v1/0/deps/dcat.json @@ -0,0 +1,133 @@ +{ + "@context": [ + "shacl-context.json", + { + "this": { + "@id": "https://purl.org/csv-cubed/shacl/v1.0/deps/dcat.json#", + "prefix": true + } + } + ], + "@id": "https://purl.org/csv-cubed/shacl/v1.0/deps/dcat.json#ontology", + "@type": "owl:Ontology", + "shape": [ + { + "@id": "this:CatalogDatasetShape", + "@type": "sh:NodeShape", + "targetClass": "dcat:Dataset", + "property": [ + { + "path": "dct:title", + "name": "Title", + "equals": "rdfs:label", + "description": "A short title describing the catalogued item.", + "datatype": "rdf:langString", + "uniqueLang": true, + "minCount": 1, + "maxCount": 1, + "minLength": 1 + }, + { + "path": "dct:identifier", + "name": "Identifier", + "description": "An identifier for this catalogued item.", + "datatype": "xsd:string", + "minCount": 1, + "maxCount": 1, + "minLength": 1 + }, + { + "path": "dct:issued", + "name": "Date-time Issued", + "description": "The point in time when the catalogued item was first issued.", + "datatype": "xsd:dateTime", + "minCount": 1, + "maxCount": 1 + }, + { + "path": "dct:modified", + "name": "Date-time Modified", + "description": "The point in time when the catalogued item was last updated.", + "datatype": "xsd:dateTime", + "minCount": 1, + "maxCount": 1 + }, + { + "path": "rdfs:comment", + "name": "Comment", + "description": "Short sentence describing the item being catalogued.", + "datatype": "rdf:langString", + "uniqueLang": true, + "minCount": 0, + "maxCount": 1, + "minLength": 1 + }, + { + "path": "dct:description", + "name": "Description", + "description": "Longer multi-paragraph description of what this catalogued item represents.", + "datatype": "rdf:langString", + "uniqueLang": true, + "minCount": 0, + "maxCount": 1, + "minLength": 1 + }, + { + "path": "dct:license", + "name": "License", + "description": "The license under which the catalogue item is available.", + "nodeKind": "sh:IRI", + "minCount": 0, + "maxCount": 1 + }, + { + "path": "dct:creator", + "name": "Creator", + "description": "The original creator of the catalogued item.", + "nodeKind": "sh:IRI", + "minCount": 0, + "maxCount": 1 + }, + { + "path": "dct:publisher", + "name": "Publisher", + "description": "The publisher of the catalogued item.", + "nodeKind": "sh:IRI", + "minCount": 0, + "maxCount": 1 + }, + { + "path": "dcat:landingPage", + "name": "Landing Page", + "description": "Landing page where a copy of the catalogued item's data can be found for download.", + "nodeKind": "sh:IRI", + "minCount": 0 + }, + { + "path": "dcat:theme", + "name": "Theme", + "description": "Pre-defined theme URIs which the catalogued item sits within.", + "nodeKind": "sh:IRI", + "minCount": 0 + }, + { + "path": "dcat:keyword", + "name": "Keyword", + "description": "Free-text key words which catagorise the catalogued item.", + "datatype": "rdf:langString", + "uniqueLang": true, + "minCount": 0, + "minLength": 1 + }, + { + "path": "dcat:contactPoint", + "name": "Contact Point", + "description": "A URI which can be used to get in contact to discuss the catalogued item.", + "nodeKind": "sh:IRI", + "minCount": 0, + "maxCount": 1 + } + ] + } + ] +} \ No newline at end of file diff --git a/csvcubed/shacl/v1/0/deps/shacl-context.json b/csvcubed/shacl/v1/0/deps/shacl-context.json new file mode 100644 index 000000000..f8df88cca --- /dev/null +++ b/csvcubed/shacl/v1/0/deps/shacl-context.json @@ -0,0 +1,44 @@ +{ + "@context": [ + "https://raw.githubusercontent.com/w3c/shacl/master/shacl-jsonld-context/shacl.context.ld.json", + { + "uniqueLang": { + "@id": "sh:uniqueLang", + "@type": "xsd:boolean" + }, + "description": { + "@id": "sh:description", + "@type": "xsd:string" + }, + "name": { + "@id": "sh:name", + "@type": "xsd:string" + }, + + "skos": "http://www.w3.org/2004/02/skos/core#", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "dct": "http://purl.org/dc/terms/", + "owl": "http://www.w3.org/2002/07/owl#", + "dcat": "http://www.w3.org/ns/dcat#", + + "@base": "https://purl.org/csv-cubed/shacl/v1.0/deps/shacl-context.json", + "@vocab": "https://purl.org/csv-cubed/shacl/v1.0/deps/shacl-context.json#", + "shape": { + "@type": "@id" + }, + "owl:imports": { + "@type": "@id" + } + } + ], + "@id": "https://purl.org/csv-cubed/shacl/v1.0/deps/shacl-context.json", + "rdfs:seeAlso": [ + { + "@id": "#shape", + "@type": "rdf:Property", + "rdfs:label": "Defines a SHACL shape." + } + ] +} \ No newline at end of file diff --git a/csvcubed/shacl/v1/0/shapes-diagram.svg b/csvcubed/shacl/v1/0/shapes-diagram.svg new file mode 100644 index 000000000..e96e13d6b --- /dev/null +++ b/csvcubed/shacl/v1/0/shapes-diagram.svg @@ -0,0 +1,2 @@ + +https://purl.org/csv-cubed/shacl/v1.0/deps/dcathttps://purl.org/csv-cubed/shacl/v1.0/code-listjson#CatalogDatasetShapehttp://purl.org/dc/terms/creator [0..1]http:purl.org/dc/terms/description : http:www.w3.org/1999/02/22-rdf-syntax-ns#langString [0..1] uniqueLanghttp:purl.org/dc/terms/identifier : http:www.w3.org/2001/XMLSchema#string [1..1]http:purl.org/dc/terms/issued : http:www.w3.org/2001/XMLSchema#dateTime [1..1]http://purl.org/dc/terms/license [0..1]http:purl.org/dc/terms/modified : http:www.w3.org/2001/XMLSchema#dateTime [1..1]http://purl.org/dc/terms/publisher [0..1]http:purl.org/dc/terms/title : http:www.w3.org/1999/02/22-rdf-syntax-ns#langString [1..1] uniqueLanghttp:www.w3.org/2000/01/rdf-schema#comment : http:www.w3.org/1999/02/22-rdf-syntax-ns#langString [0..1] uniqueLanghttp://www.w3.org/ns/dcat#contactPoint [0..1]http:www.w3.org/ns/dcat#keyword : http:www.w3.org/1999/02/22-rdf-syntax-ns#langString [0..*] uniqueLanghttp://www.w3.org/ns/dcat#landingPage [0..*]http://www.w3.org/ns/dcat#theme [0..*]json#ConceptSchemeShapehttp:www.w3.org/2000/01/rdf-schema#label : http:www.w3.org/1999/02/22-rdf-syntax-ns#langString [1..*] uniqueLangjson#ConceptShapehttp:www.w3.org/2000/01/rdf-schema#label : http:www.w3.org/2001/XMLSchema#string [1..*]http:www.w3.org/2004/02/skos/core#notation : http:www.w3.org/2001/XMLSchema#string [1..1]http:www.w3.org/ns/ui#sortPriority : http:www.w3.org/2001/XMLSchema#integer [1..1]http:rdf-vocabulary.ddialliance.org/xkos#variant [0..*] http:www.w3.org/ns/shacl#IRIhttp:www.w3.org/2004/02/skos/core#broader [0..*] http:www.w3.org/ns/shacl#IRIhttp:www.w3.org/2004/02/skos/core#inScheme [1..*] http:www.w3.org/ns/shacl#IRI \ No newline at end of file diff --git a/csvcubed/shacl/v1/0/ttl/README.md b/csvcubed/shacl/v1/0/ttl/README.md new file mode 100644 index 000000000..ad8e81252 --- /dev/null +++ b/csvcubed/shacl/v1/0/ttl/README.md @@ -0,0 +1,3 @@ +# Turtle Shapes + +The parent directory's SHACL shapes defined in turtle format in a single file for ease of use. diff --git a/csvcubed/shacl/v1/0/ttl/shapes.ttl b/csvcubed/shacl/v1/0/ttl/shapes.ttl new file mode 100644 index 000000000..6a9902c68 --- /dev/null +++ b/csvcubed/shacl/v1/0/ttl/shapes.ttl @@ -0,0 +1,205 @@ + + . + + + a ; + "Defines a SHACL shape." . + + + a ; + ; + . + + + a ; + _:b0 . + +_:b0 ; + . + + + _:b1 ; + _:b2 ; + . + +_:b1 ; + "The code list's label must be a non-zero length `rdf:languageString` with no duplicate languages present. Each label has a matching `dct:title` triple." ; + 1 ; + 1 ; + "Code list's label" ; + ; + true . + +_:b2 ; + "The concept scheme is a variant of another existing code list" ; + 0 ; + "Variant of" ; + ; + ; + . + + + a ; + _:b3 ; + _:b4 ; + _:b5 ; + _:b6 ; + _:b7 ; + . + +_:b3 ; + "A concept's label must be a non-zero length `xsd:string`." ; + 1 ; + 1 ; + "Label" ; + . + +_:b4 ; + "A concept must have a single notation. The notation must be a non-zero length `xsd:string`." ; + 1 ; + 1 ; + 1 ; + "Notation" ; + . + +_:b5 ; + "A concept must be part of at least one concept scheme (code list). The concept scheme must be referred to by IRI." ; + 1 ; + "In Scheme" ; + ; + ; + . + +_:b6 ; + "A concept must have a single integer sort priority to guide user interfaces on the order in which to sort concepts for display." ; + 1 ; + 1 ; + "Sort Priority" ; + . + +_:b7 ; + "A concept may have one or more concepts which are broader than itself. These concepts must be referred to by IRI." ; + 0 ; + "Broader Concept" ; + ; + ; + . + + + a ; + . + + + a ; + _:b8 ; + _:b9 ; + _:b10 ; + _:b11 ; + _:b12 ; + _:b13 ; + _:b14 ; + _:b15 ; + _:b16 ; + _:b17 ; + _:b18 ; + _:b19 ; + _:b20 ; + . + +_:b8 ; + "A short title describing the catalogued item." ; + ; + 1 ; + 1 ; + 1 ; + "Title" ; + ; + true . + +_:b9 ; + "An identifier for this catalogued item." ; + 1 ; + 1 ; + 1 ; + "Identifier" ; + . + +_:b10 ; + "The point in time when the catalogued item was first issued." ; + 1 ; + 1 ; + "Date-time Issued" ; + . + +_:b11 ; + "The point in time when the catalogued item was last updated." ; + 1 ; + 1 ; + "Date-time Modified" ; + . + +_:b12 ; + "Short sentence describing the item being catalogued." ; + 1 ; + 0 ; + 1 ; + "Comment" ; + ; + true . + +_:b13 ; + "Longer multi-paragraph description of what this catalogued item represents." ; + 1 ; + 0 ; + 1 ; + "Description" ; + ; + true . + +_:b14 "The license under which the catalogue item is available." ; + 1 ; + 0 ; + "License" ; + ; + . + +_:b15 "The original creator of the catalogued item." ; + 1 ; + 0 ; + "Creator" ; + ; + . + +_:b16 "The publisher of the catalogued item." ; + 1 ; + 0 ; + "Publisher" ; + ; + . + +_:b17 "Landing page where a copy of the catalogued item's data can be found for download." ; + 0 ; + "Landing Page" ; + ; + . + +_:b18 "Pre-defined theme URIs which the catalogued item sits within." ; + 0 ; + "Theme" ; + ; + . + +_:b19 ; + "Free-text key words which catagorise the catalogued item." ; + 0 ; + 1 ; + "Keyword" ; + ; + true . + +_:b20 "A URI which can be used to get in contact to discuss the catalogued item." ; + 1 ; + 0 ; + "Contact Point" ; + ; + . diff --git a/csvcubed/shacl/v1/0/ttl/to-turtle.sh b/csvcubed/shacl/v1/0/ttl/to-turtle.sh new file mode 100755 index 000000000..54889844c --- /dev/null +++ b/csvcubed/shacl/v1/0/ttl/to-turtle.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# A script to build a single `shapes.ttl` files from the shapes defined in many JSON files. + +riot --syntax json-ld --out ttl ../deps/shacl-context.json > context.ttl || exit 1 +riot --syntax json-ld --out ttl ../deps/dcat.json > dcat.ttl || exit 1 +riot --syntax json-ld --out ttl ../code-list.json > code-list.ttl || exit 1 + +riot --output=nt context.ttl code-list.ttl dcat.ttl | grep -v "http://www.w3.org/2002/07/owl#imports" | riot --syntax nt --output ttl > shapes.ttl + +rm -rf context.ttl dcat.ttl code-list.ttl + From 13e80fb0a27179e49e73b59ab31c8d66790e1e5e Mon Sep 17 00:00:00 2001 From: Rob Barry Date: Thu, 19 May 2022 13:06:40 +0100 Subject: [PATCH 2/2] Oops, slight correction. --- csvcubed/shacl/v1/0/code-list.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/csvcubed/shacl/v1/0/code-list.json b/csvcubed/shacl/v1/0/code-list.json index 0fd521af2..9b7fa05c9 100644 --- a/csvcubed/shacl/v1/0/code-list.json +++ b/csvcubed/shacl/v1/0/code-list.json @@ -9,7 +9,8 @@ "ui": "http://www.w3.org/ns/ui#", "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", "dcatshapes": { - "@id": "https://purl.org/csv-cubed/shacl/v1.0/deps/dcat.json#" + "@id": "https://purl.org/csv-cubed/shacl/v1.0/deps/dcat.json#", + "prefix": true } } ],