From 63b192402c3078feb9e7620a07ade0e29a24ad7a Mon Sep 17 00:00:00 2001 From: nicholascar Date: Fri, 3 Dec 2021 11:38:12 +1000 Subject: [PATCH 01/25] tidy plugin.py file formatting --- rdflib/plugin.py | 335 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 283 insertions(+), 52 deletions(-) diff --git a/rdflib/plugin.py b/rdflib/plugin.py index c5ffd0939..22293d13a 100644 --- a/rdflib/plugin.py +++ b/rdflib/plugin.py @@ -169,16 +169,54 @@ def plugins( yield p -# Register Store Plugins -register("default", Store, "rdflib.plugins.stores.memory", "Memory") -register("Memory", Store, "rdflib.plugins.stores.memory", "Memory") -register("SimpleMemory", Store, "rdflib.plugins.stores.memory", "SimpleMemory") -register("Auditable", Store, "rdflib.plugins.stores.auditable", "AuditableStore") -register("Concurrent", Store, "rdflib.plugins.stores.concurrent", "ConcurrentStore") -register("BerkeleyDB", Store, "rdflib.plugins.stores.berkeleydb", "BerkeleyDB") -register("SPARQLStore", Store, "rdflib.plugins.stores.sparqlstore", "SPARQLStore") +# Register Stores register( - "SPARQLUpdateStore", Store, "rdflib.plugins.stores.sparqlstore", "SPARQLUpdateStore" + "default", + Store, + "rdflib.plugins.stores.memory", + "Memory", +) +register( + "Memory", + Store, + "rdflib.plugins.stores.memory", + "Memory", +) +register( + "SimpleMemory", + Store, + "rdflib.plugins.stores.memory", + "SimpleMemory", +) +register( + "Auditable", + Store, + "rdflib.plugins.stores.auditable", + "AuditableStore", +) +register( + "Concurrent", + Store, + "rdflib.plugins.stores.concurrent", + "ConcurrentStore", +) +register( + "BerkeleyDB", + Store, + "rdflib.plugins.stores.berkeleydb", + "BerkeleyDB", +) +register( + "SPARQLStore", + Store, + "rdflib.plugins.stores.sparqlstore", + "SPARQLStore", +) +register( + "SPARQLUpdateStore", + Store, + "rdflib.plugins.stores.sparqlstore", + "SPARQLUpdateStore", ) # Register Triple Serializers @@ -188,25 +226,84 @@ def plugins( "rdflib.plugins.serializers.rdfxml", "XMLSerializer", ) -register("xml", Serializer, "rdflib.plugins.serializers.rdfxml", "XMLSerializer") register( - "pretty-xml", Serializer, "rdflib.plugins.serializers.rdfxml", "PrettyXMLSerializer" + "xml", + Serializer, + "rdflib.plugins.serializers.rdfxml", + "XMLSerializer", +) +register( + "pretty-xml", + Serializer, + "rdflib.plugins.serializers.rdfxml", + "PrettyXMLSerializer", +) +register( + "text/n3", + Serializer, + "rdflib.plugins.serializers.n3", + "N3Serializer", ) -register("text/n3", Serializer, "rdflib.plugins.serializers.n3", "N3Serializer") -register("n3", Serializer, "rdflib.plugins.serializers.n3", "N3Serializer") register( - "text/turtle", Serializer, "rdflib.plugins.serializers.turtle", "TurtleSerializer" + "n3", + Serializer, + "rdflib.plugins.serializers.n3", + "N3Serializer", +) +register( + "text/turtle", + Serializer, + "rdflib.plugins.serializers.turtle", + "TurtleSerializer", ) -register("turtle", Serializer, "rdflib.plugins.serializers.turtle", "TurtleSerializer") -register("turtle2", Serializer, "rdflib.plugins.serializers.turtle2", "TurtleSerializer2") -register("ttl", Serializer, "rdflib.plugins.serializers.turtle", "TurtleSerializer") register( - "application/n-triples", Serializer, "rdflib.plugins.serializers.nt", "NTSerializer" + "turtle", + Serializer, + "rdflib.plugins.serializers.turtle", + "TurtleSerializer", +) +register( + "turtle2", + Serializer, + "rdflib.plugins.serializers.turtle2", + "TurtleSerializer2", +) +register( + "ttl", + Serializer, + "rdflib.plugins.serializers.turtle", + "TurtleSerializer", +) +register( + "application/n-triples", + Serializer, + "rdflib.plugins.serializers.nt", + "NTSerializer", +) +register( + "ntriples", + Serializer, + "rdflib.plugins.serializers.nt", + "NTSerializer", +) +register( + "nt", + Serializer, + "rdflib.plugins.serializers.nt", + "NTSerializer", +) +register( + "nt11", + Serializer, + "rdflib.plugins.serializers.nt", + "NT11Serializer", +) +register( + "json-ld", + Serializer, + "rdflib.plugins.serializers.jsonld", + "JsonLDSerializer", ) -register("ntriples", Serializer, "rdflib.plugins.serializers.nt", "NTSerializer") -register("nt", Serializer, "rdflib.plugins.serializers.nt", "NTSerializer") -register("nt11", Serializer, "rdflib.plugins.serializers.nt", "NT11Serializer") -register("json-ld", Serializer, "rdflib.plugins.serializers.jsonld", "JsonLDSerializer") register( "application/ld+json", Serializer, @@ -221,43 +318,168 @@ def plugins( "rdflib.plugins.serializers.nquads", "NQuadsSerializer", ) -register("nquads", Serializer, "rdflib.plugins.serializers.nquads", "NQuadsSerializer") register( - "application/trix", Serializer, "rdflib.plugins.serializers.trix", "TriXSerializer" + "nquads", + Serializer, + "rdflib.plugins.serializers.nquads", + "NQuadsSerializer", +) +register( + "application/trix", + Serializer, + "rdflib.plugins.serializers.trix", + "TriXSerializer", +) +register( + "trix", + Serializer, + "rdflib.plugins.serializers.trix", + "TriXSerializer", +) +register( + "application/trig", + Serializer, + "rdflib.plugins.serializers.trig", + "TrigSerializer", ) -register("trix", Serializer, "rdflib.plugins.serializers.trix", "TriXSerializer") register( - "application/trig", Serializer, "rdflib.plugins.serializers.trig", "TrigSerializer" + "trig", + Serializer, + "rdflib.plugins.serializers.trig", + "TrigSerializer", ) -register("trig", Serializer, "rdflib.plugins.serializers.trig", "TrigSerializer") # Register Triple Parsers -register("application/rdf+xml", Parser, "rdflib.plugins.parsers.rdfxml", "RDFXMLParser") -register("xml", Parser, "rdflib.plugins.parsers.rdfxml", "RDFXMLParser") -register("text/n3", Parser, "rdflib.plugins.parsers.notation3", "N3Parser") -register("n3", Parser, "rdflib.plugins.parsers.notation3", "N3Parser") -register("text/turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser") -register("turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser") -register("ttl", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser") -register("application/n-triples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") -register("ntriples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") -register("nt", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") -register("nt11", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") -register("application/ld+json", Parser, "rdflib.plugins.parsers.jsonld", "JsonLDParser") -register("json-ld", Parser, "rdflib.plugins.parsers.jsonld", "JsonLDParser") - +register( + "application/rdf+xml", + Parser, + "rdflib.plugins.parsers.rdfxml", + "RDFXMLParser", +) +register( + "xml", + Parser, + "rdflib.plugins.parsers.rdfxml", + "RDFXMLParser", +) +register( + "text/n3", + Parser, + "rdflib.plugins.parsers.notation3", + "N3Parser", +) +register( + "n3", + Parser, + "rdflib.plugins.parsers.notation3", + "N3Parser", +) +register( + "text/turtle", + Parser, + "rdflib.plugins.parsers.notation3", + "TurtleParser", +) +register( + "turtle", + Parser, + "rdflib.plugins.parsers.notation3", + "TurtleParser", +) +register( + "ttl", + Parser, + "rdflib.plugins.parsers.notation3", + "TurtleParser", +) +register( + "application/n-triples", + Parser, + "rdflib.plugins.parsers.ntriples", + "NTParser", +) +register( + "ntriples", + Parser, + "rdflib.plugins.parsers.ntriples", + "NTParser", +) +register( + "nt", + Parser, + "rdflib.plugins.parsers.ntriples", + "NTParser", +) +register( + "nt11", + Parser, + "rdflib.plugins.parsers.ntriples", + "NTParser", +) +register( + "application/ld+json", + Parser, + "rdflib.plugins.parsers.jsonld", + "JsonLDParser", +) +register( + "json-ld", + Parser, + "rdflib.plugins.parsers.jsonld", + "JsonLDParser", +) # Register Quad Parsers -register("application/n-quads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser") -register("nquads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser") -register("application/trix", Parser, "rdflib.plugins.parsers.trix", "TriXParser") -register("trix", Parser, "rdflib.plugins.parsers.trix", "TriXParser") -register("application/trig", Parser, "rdflib.plugins.parsers.trig", "TrigParser") -register("trig", Parser, "rdflib.plugins.parsers.trig", "TrigParser") +register( + "application/n-quads", + Parser, + "rdflib.plugins.parsers.nquads", + "NQuadsParser", +) +register( + "nquads", + Parser, + "rdflib.plugins.parsers.nquads", + "NQuadsParser", +) +register( + "application/trix", + Parser, + "rdflib.plugins.parsers.trix", + "TriXParser", +) +register( + "trix", + Parser, + "rdflib.plugins.parsers.trix", + "TriXParser", +) +register( + "application/trig", + Parser, + "rdflib.plugins.parsers.trig", + "TrigParser", +) +register( + "trig", + Parser, + "rdflib.plugins.parsers.trig", + "TrigParser", +) # Register SPARQL Processors -register("sparql", Result, "rdflib.plugins.sparql.processor", "SPARQLResult") -register("sparql", Processor, "rdflib.plugins.sparql.processor", "SPARQLProcessor") +register( + "sparql", + Result, + "rdflib.plugins.sparql.processor", + "SPARQLResult", +) +register( + "sparql", + Processor, + "rdflib.plugins.sparql.processor", + "SPARQLProcessor", +) register( "sparql", UpdateProcessor, @@ -311,7 +533,10 @@ def plugins( # Register SPARQL Result Parsers register( - "xml", ResultParser, "rdflib.plugins.sparql.results.xmlresults", "XMLResultParser" + "xml", + ResultParser, + "rdflib.plugins.sparql.results.xmlresults", + "XMLResultParser", ) register( "application/sparql-results+xml", @@ -344,7 +569,10 @@ def plugins( "JSONResultParser", ) register( - "csv", ResultParser, "rdflib.plugins.sparql.results.csvresults", "CSVResultParser" + "csv", + ResultParser, + "rdflib.plugins.sparql.results.csvresults", + "CSVResultParser", ) register( "text/csv", @@ -353,7 +581,10 @@ def plugins( "CSVResultParser", ) register( - "tsv", ResultParser, "rdflib.plugins.sparql.results.tsvresults", "TSVResultParser" + "tsv", + ResultParser, + "rdflib.plugins.sparql.results.tsvresults", + "TSVResultParser", ) register( "text/tab-separated-values", From 3a783a5886b7d699f13c5b9762b1bfc46e2a97b5 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Fri, 3 Dec 2021 11:52:56 +1000 Subject: [PATCH 02/25] rename turtle2 longturtle; update parser & plubing docco --- docs/plugin_parsers.rst | 9 ++------ docs/plugin_serializers.rst | 5 ++-- rdflib/plugin.py | 12 +++++----- .../serializers/{turtle2.py => longturtle.py} | 23 ++++++++----------- test/test_rdfxml.py | 2 +- ...urtle2.py => test_serialize_longturtle.py} | 6 ++--- ...ix_serialize.py => test_serialize_trix.py} | 0 ..._serialize.py => test_serialize_turtle.py} | 0 ..._serializexml.py => test_serialize_xml.py} | 0 9 files changed, 25 insertions(+), 32 deletions(-) rename rdflib/plugins/serializers/{turtle2.py => longturtle.py} (93%) rename test/{test_turtle2.py => test_serialize_longturtle.py} (98%) rename test/{test_trix_serialize.py => test_serialize_trix.py} (100%) rename test/{test_turtle_serialize.py => test_serialize_turtle.py} (100%) rename test/{test_serializexml.py => test_serialize_xml.py} (100%) diff --git a/docs/plugin_parsers.rst b/docs/plugin_parsers.rst index 81ab7ae6b..a89e08fab 100644 --- a/docs/plugin_parsers.rst +++ b/docs/plugin_parsers.rst @@ -13,23 +13,18 @@ The ``html`` parser will auto-detect RDFa, HTurtle or Microdata. It is also possible to pass a mime-type for the ``format`` parameter:: - graph.parse(my_url, format='application/rdf+xml') + graph.parse(my_url, format='application/rdf+xml') If you are not sure what format your file will be, you can use :func:`rdflib.util.guess_format` which will guess based on the file extension. ========= ==================================================================== Name Class ========= ==================================================================== +json-ld :class:`~rdflib.plugins.parsers.jsonld.JsonLDParser` html :class:`~rdflib.plugins.parsers.structureddata.StructuredDataParser` -hturtle :class:`~rdflib.plugins.parsers.hturtle.HTurtleParser` -mdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser` -microdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser` n3 :class:`~rdflib.plugins.parsers.notation3.N3Parser` nquads :class:`~rdflib.plugins.parsers.nquads.NQuadsParser` nt :class:`~rdflib.plugins.parsers.ntriples.NTParser` -rdfa :class:`~rdflib.plugins.parsers.structureddata.RDFaParser` -rdfa1.0 :class:`~rdflib.plugins.parsers.structureddata.RDFa10Parser` -rdfa1.1 :class:`~rdflib.plugins.parsers.structureddata.RDFaParser` trix :class:`~rdflib.plugins.parsers.trix.TriXParser` turtle :class:`~rdflib.plugins.parsers.notation3.TurtleParser` xml :class:`~rdflib.plugins.parsers.rdfxml.RDFXMLParser` diff --git a/docs/plugin_serializers.rst b/docs/plugin_serializers.rst index 0aedd566f..9da4f7aca 100644 --- a/docs/plugin_serializers.rst +++ b/docs/plugin_serializers.rst @@ -11,11 +11,12 @@ passing the name to a graph's :meth:`~rdflib.graph.Graph.serialize` method:: It is also possible to pass a mime-type for the ``format`` parameter:: - graph.serialize(my_url, format='application/rdf+xml') + graph.serialize(my_url, format='application/rdf+xml') ========== =============================================================== Name Class ========== =============================================================== +json-ld :class:`~rdflib.plugins.serializers.jsonld.JsonLDSerializer` n3 :class:`~rdflib.plugins.serializers.n3.N3Serializer` nquads :class:`~rdflib.plugins.serializers.nquads.NQuadsSerializer` nt :class:`~rdflib.plugins.serializers.nt.NTSerializer` @@ -23,6 +24,6 @@ pretty-xml :class:`~rdflib.plugins.serializers.rdfxml.PrettyXMLSerializer` trig :class:`~rdflib.plugins.serializers.trig.TrigSerializer` trix :class:`~rdflib.plugins.serializers.trix.TriXSerializer` turtle :class:`~rdflib.plugins.serializers.turtle.TurtleSerializer` +longturtle :class:`~rdflib.plugins.serializers.turtle.LongTurtleSerializer` xml :class:`~rdflib.plugins.serializers.rdfxml.XMLSerializer` ========== =============================================================== - diff --git a/rdflib/plugin.py b/rdflib/plugin.py index 22293d13a..63c3ead77 100644 --- a/rdflib/plugin.py +++ b/rdflib/plugin.py @@ -262,18 +262,18 @@ def plugins( "rdflib.plugins.serializers.turtle", "TurtleSerializer", ) -register( - "turtle2", - Serializer, - "rdflib.plugins.serializers.turtle2", - "TurtleSerializer2", -) register( "ttl", Serializer, "rdflib.plugins.serializers.turtle", "TurtleSerializer", ) +register( + "longturtle", + Serializer, + "rdflib.plugins.serializers.longturtle", + "LongTurtleSerializer", +) register( "application/n-triples", Serializer, diff --git a/rdflib/plugins/serializers/turtle2.py b/rdflib/plugins/serializers/longturtle.py similarity index 93% rename from rdflib/plugins/serializers/turtle2.py rename to rdflib/plugins/serializers/longturtle.py index 49e9cbadb..0d50e47a3 100644 --- a/rdflib/plugins/serializers/turtle2.py +++ b/rdflib/plugins/serializers/longturtle.py @@ -1,8 +1,8 @@ """ -Turtle2 RDF graph serializer for RDFLib. +LongTurtle RDF graph serializer for RDFLib. See for syntax specification. -This variant, turtle2 as opposed to just turtle, makes some small format changes +This variant, longturtle as opposed to just turtle, makes some small format changes to turtle - the original turtle serializer. It: * uses PREFIX instead of @prefix @@ -21,7 +21,7 @@ from .turtle import RecursiveSerializer from rdflib.namespace import RDF -__all__ = ["TurtleSerializer2"] +__all__ = ["LongTurtleSerializer"] SUBJECT = 0 VERB = 1 @@ -31,14 +31,14 @@ _SPACIOUS_OUTPUT = False -class TurtleSerializer2(RecursiveSerializer): +class LongTurtleSerializer(RecursiveSerializer): - short_name = "turtle2" + short_name = "longturtle" indentString = " " def __init__(self, store): self._ns_rewrite = {} - super(TurtleSerializer2, self).__init__(store) + super(LongTurtleSerializer, self).__init__(store) self.keywords = {RDF.type: "a"} self.reset() self.stream = None @@ -66,11 +66,11 @@ def addNamespace(self, prefix, namespace): prefix = self._ns_rewrite.get(prefix, prefix) - super(TurtleSerializer2, self).addNamespace(prefix, namespace) + super(LongTurtleSerializer, self).addNamespace(prefix, namespace) return prefix def reset(self): - super(TurtleSerializer2, self).reset() + super(LongTurtleSerializer, self).reset() self._shortNames = {} self._started = False self._ns_rewrite = {} @@ -78,7 +78,7 @@ def reset(self): def serialize(self, stream, base=None, encoding=None, spacious=None, **args): self.reset() self.stream = stream - # if base is given here, use that, if not and a base is set for the graph use that + # if base is given here, use, if not and a base is set for the graph use that if base is not None: self.base = base elif self.store.base is not None: @@ -107,7 +107,7 @@ def serialize(self, stream, base=None, encoding=None, spacious=None, **args): self.base = None def preprocessTriple(self, triple): - super(TurtleSerializer2, self).preprocessTriple(triple) + super(LongTurtleSerializer, self).preprocessTriple(triple) for i, node in enumerate(triple): if node in self.keywords: continue @@ -123,12 +123,9 @@ def getQName(self, uri, gen_prefix=True): if not isinstance(uri, URIRef): return None - parts = None - try: parts = self.store.compute_qname(uri, generate=gen_prefix) except: - # is the uri a namespace in itself? pfx = self.store.store.prefix(uri) diff --git a/test/test_rdfxml.py b/test/test_rdfxml.py index b0b222dc5..e695355c4 100644 --- a/test/test_rdfxml.py +++ b/test/test_rdfxml.py @@ -84,7 +84,7 @@ def cached_file(url): def relative(url): - return url[len(RDFCOREBASE) :] + return url[len(RDFCOREBASE):] def resolve(rel): diff --git a/test/test_turtle2.py b/test/test_serialize_longturtle.py similarity index 98% rename from test/test_turtle2.py rename to test/test_serialize_longturtle.py index fef0efab7..cc184787a 100644 --- a/test/test_turtle2.py +++ b/test/test_serialize_longturtle.py @@ -1,9 +1,9 @@ -# tests for the turtle2 serializer +# tests for the longturtle Serializer from rdflib import Graph -def test_turtle2(): +def test_longturtle(): g = Graph() g.parse( @@ -86,7 +86,7 @@ def test_turtle2(): """, format="turtle", ) - s = g.serialize(format="turtle2") + s = g.serialize(format="longturtle") lines = s.split("\n") assert "ex:b" in lines diff --git a/test/test_trix_serialize.py b/test/test_serialize_trix.py similarity index 100% rename from test/test_trix_serialize.py rename to test/test_serialize_trix.py diff --git a/test/test_turtle_serialize.py b/test/test_serialize_turtle.py similarity index 100% rename from test/test_turtle_serialize.py rename to test/test_serialize_turtle.py diff --git a/test/test_serializexml.py b/test/test_serialize_xml.py similarity index 100% rename from test/test_serializexml.py rename to test/test_serialize_xml.py From 7c3353d5678cff6beab20bb7cbfb3a48af8e107a Mon Sep 17 00:00:00 2001 From: nicholascar Date: Fri, 3 Dec 2021 17:54:42 +1000 Subject: [PATCH 03/25] basic hextuples serializer --- rdflib/plugin.py | 6 ++++ rdflib/plugins/serializers/hext.py | 54 ++++++++++++++++++++++++++++++ test/test_serialize_hext.py | 54 ++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 rdflib/plugins/serializers/hext.py create mode 100644 test/test_serialize_hext.py diff --git a/rdflib/plugin.py b/rdflib/plugin.py index 63c3ead77..96a2b6108 100644 --- a/rdflib/plugin.py +++ b/rdflib/plugin.py @@ -348,6 +348,12 @@ def plugins( "rdflib.plugins.serializers.trig", "TrigSerializer", ) +register( + "hext", + Serializer, + "rdflib.plugins.serializers.hext", + "HextuplesSerializer", +) # Register Triple Parsers register( diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py new file mode 100644 index 000000000..85a3b151b --- /dev/null +++ b/rdflib/plugins/serializers/hext.py @@ -0,0 +1,54 @@ +""" +HextuplesSerializer RDF graph serializer for RDFLib. +See for details about the format. +""" +from typing import IO, Optional + +from rdflib.graph import Graph +from rdflib.term import Literal, URIRef, BNode +from rdflib.serializer import Serializer + +__all__ = ["HextuplesSerializer"] + + +class HextuplesSerializer(Serializer): + """ + Serializes RDF graphs to NTriples format. + """ + + def __init__(self, store: Graph): + Serializer.__init__(self, store) + self.encoding = "utf-8" + + def serialize( + self, + stream: IO[bytes], + base: Optional[str] = None, + encoding: Optional[str] = None, + **args + ): + self.encoding = encoding + for context in self.store.contexts(): + for triple in context: + stream.write( + _hex_line(triple, context.identifier).encode(self.encoding) + ) + stream.write("\n".encode(self.encoding)) + + +def _hex_line(triple, context): + return "[%s, %s, %s, %s, %s, %s]\n" % ( + _iri_or_bn(triple[0]), + _iri_or_bn(triple[1]), + triple[2] if type(triple[2]) == Literal else _iri_or_bn(triple[2]), + (f'"{triple[2].datatype}"' if triple[2].datatype is not None else '""') if type(triple[2]) == Literal else '""', + (f'"{triple[2].language}"' if triple[2].language is not None else '""') if type(triple[2]) == Literal else '""', + _iri_or_bn(context) + ) + + +def _iri_or_bn(i_): + if type(i_) == URIRef: + return f"\"{i_}\"" + else: + return f"\"{i_.n3()}\"" diff --git a/test/test_serialize_hext.py b/test/test_serialize_hext.py new file mode 100644 index 000000000..100d2aa9f --- /dev/null +++ b/test/test_serialize_hext.py @@ -0,0 +1,54 @@ +from rdflib import Dataset, URIRef, Namespace, Literal, BNode +from test import TEST_DIR + + +def test_hext_01(): + d = Dataset() + trig = """ + PREFIX ex: + PREFIX owl: + PREFIX rdf: + PREFIX rdfs: + PREFIX xsd: + + ex:g1 { + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + ex:p2 [ + a owl:Thing ; + rdf:value "thingy" ; + ] ; + ex:p3 "Object 3" , "Object 4 - English"@en ; + ex:p4 "2021-12-03"^^xsd:date ; + ex:p5 42 ; + ex:p6 "42" ; + . + } + + ex:g2 { + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + . + ex:s11 ex:p11 ex:o11 , ex:o12 . + } + + # default graph triples + ex:s1 ex:p1 ex:o1 , ex:o2 . + ex:s21 ex:p21 ex:o21 , ex:o22 . + + """ + d.parse(data=trig, format="trig") + out = d.serialize(format="hext") + testing_lines = [ + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", "", "http://example.com/g2"]'], + [False, '["http://example.com/s1", "http://example.com/p3", Object 3, "", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p5", 42, "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", thingy, "", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p4", 2021-12-03, "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'] + ] + for line in out.splitlines(): + for test in testing_lines: + if test[1] in line: + test[0] = True + + assert all([x[0] for x in testing_lines]) From da5e015631aaac31a5a5e8e0eed4f6f37b683a73 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Fri, 3 Dec 2021 17:59:02 +1000 Subject: [PATCH 04/25] hext docco --- docs/plugin_serializers.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/plugin_serializers.rst b/docs/plugin_serializers.rst index 9da4f7aca..a6fc74bda 100644 --- a/docs/plugin_serializers.rst +++ b/docs/plugin_serializers.rst @@ -20,6 +20,7 @@ json-ld :class:`~rdflib.plugins.serializers.jsonld.JsonLDSerializer` n3 :class:`~rdflib.plugins.serializers.n3.N3Serializer` nquads :class:`~rdflib.plugins.serializers.nquads.NQuadsSerializer` nt :class:`~rdflib.plugins.serializers.nt.NTSerializer` +hext :class:`~rdflib.plugins.serializers.hext.HextuplesSerializer` pretty-xml :class:`~rdflib.plugins.serializers.rdfxml.PrettyXMLSerializer` trig :class:`~rdflib.plugins.serializers.trig.TrigSerializer` trix :class:`~rdflib.plugins.serializers.trix.TriXSerializer` @@ -27,3 +28,12 @@ turtle :class:`~rdflib.plugins.serializers.turtle.TurtleSerializer` longturtle :class:`~rdflib.plugins.serializers.turtle.LongTurtleSerializer` xml :class:`~rdflib.plugins.serializers.rdfxml.XMLSerializer` ========== =============================================================== + + +JSON-LD +------- +JSON-LD - 'json-ld' - has been incorprated in rdflib since v6.0.0. + +HexTuples +--------- +The HexTuples Serializer - 'hext' - uses the HexTuples format defined at https://github.com/ontola/hextuples From 64e3e311fe0550e62573713eb9eeb86b3f799dcb Mon Sep 17 00:00:00 2001 From: nicholascar Date: Fri, 3 Dec 2021 22:06:34 +1000 Subject: [PATCH 05/25] ensure both context aware and not work --- rdflib/plugins/serializers/hext.py | 23 ++++++++++++++----- test/test_serialize_hext.py | 36 +++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index 85a3b151b..aa2448207 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -2,10 +2,9 @@ HextuplesSerializer RDF graph serializer for RDFLib. See for details about the format. """ -from typing import IO, Optional - -from rdflib.graph import Graph -from rdflib.term import Literal, URIRef, BNode +from typing import IO, TYPE_CHECKING, Optional, Union +from rdflib.graph import Graph, ConjunctiveGraph +from rdflib.term import Literal, URIRef, Node from rdflib.serializer import Serializer __all__ = ["HextuplesSerializer"] @@ -16,7 +15,19 @@ class HextuplesSerializer(Serializer): Serializes RDF graphs to NTriples format. """ - def __init__(self, store: Graph): + def __init__(self, store: Union[Graph, ConjunctiveGraph]): + self.default_context: Optional[Node] + if store.context_aware: + if TYPE_CHECKING: + assert isinstance(store, ConjunctiveGraph) + self.contexts = list(store.contexts()) + self.default_context = store.default_context.identifier + if store.default_context: + self.contexts.append(store.default_context) + else: + self.contexts = [store] + self.default_context = None + Serializer.__init__(self, store) self.encoding = "utf-8" @@ -28,7 +39,7 @@ def serialize( **args ): self.encoding = encoding - for context in self.store.contexts(): + for context in self.contexts: for triple in context: stream.write( _hex_line(triple, context.identifier).encode(self.encoding) diff --git a/test/test_serialize_hext.py b/test/test_serialize_hext.py index 100d2aa9f..94394a7c9 100644 --- a/test/test_serialize_hext.py +++ b/test/test_serialize_hext.py @@ -1,14 +1,38 @@ -from rdflib import Dataset, URIRef, Namespace, Literal, BNode -from test import TEST_DIR +from rdflib import Dataset, Graph -def test_hext_01(): +def test_hext_graph(): + g = Graph() + turtle_data = """ + PREFIX ex: + PREFIX owl: + PREFIX rdf: + PREFIX xsd: + + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + ex:p2 [ + a owl:Thing ; + rdf:value "thingy" ; + ] ; + ex:p3 "Object 3" , "Object 4 - English"@en ; + ex:p4 "2021-12-03"^^xsd:date ; + ex:p5 42 ; + ex:p6 "42" ; + . + """ + + g.parse(data=turtle_data, format="turtle") + out = g.serialize(format="hext") + print(out) + + +def test_hext_dataset(): d = Dataset() - trig = """ + trig_data = """ PREFIX ex: PREFIX owl: PREFIX rdf: - PREFIX rdfs: PREFIX xsd: ex:g1 { @@ -37,7 +61,7 @@ def test_hext_01(): ex:s21 ex:p21 ex:o21 , ex:o22 . """ - d.parse(data=trig, format="trig") + d.parse(data=trig_data, format="trig") out = d.serialize(format="hext") testing_lines = [ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", "", "http://example.com/g2"]'], From e0a636c90f957121c9db5eb2f4e35f5aa295161a Mon Sep 17 00:00:00 2001 From: nicholascar Date: Fri, 3 Dec 2021 22:49:08 +1000 Subject: [PATCH 06/25] ensure both tests actually test --- test/test_serialize_hext.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/test/test_serialize_hext.py b/test/test_serialize_hext.py index 94394a7c9..0d6fe992b 100644 --- a/test/test_serialize_hext.py +++ b/test/test_serialize_hext.py @@ -24,7 +24,19 @@ def test_hext_graph(): g.parse(data=turtle_data, format="turtle") out = g.serialize(format="hext") - print(out) + testing_lines = [ + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", ""'], + [False, '["http://example.com/s1", "http://example.com/p3", Object 3, "", ""'], + [False, '["http://example.com/s1", "http://example.com/p5", 42, "http://www.w3.org/2001/XMLSchema#integer", ""'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", thingy, "", ""'], + [False, '["http://example.com/s1", "http://example.com/p4", 2021-12-03, "http://www.w3.org/2001/XMLSchema#date", ""'] + ] + for line in out.splitlines(): + for test in testing_lines: + if test[1] in line: + test[0] = True + + assert all([x[0] for x in testing_lines]) def test_hext_dataset(): From 6c47908c026112adecc2fc31dda5c34bf9894a00 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Sat, 4 Dec 2021 11:19:13 +1000 Subject: [PATCH 07/25] improve hext and nt encoding - all utf-8 --- rdflib/plugins/serializers/hext.py | 12 ++++++++---- rdflib/plugins/serializers/nt.py | 13 ++++++------- test/test_nt_misc.py | 10 +++++----- test/test_roundtrip.py | 2 +- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index aa2448207..b15619590 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -6,6 +6,7 @@ from rdflib.graph import Graph, ConjunctiveGraph from rdflib.term import Literal, URIRef, Node from rdflib.serializer import Serializer +import warnings __all__ = ["HextuplesSerializer"] @@ -29,7 +30,6 @@ def __init__(self, store: Union[Graph, ConjunctiveGraph]): self.default_context = None Serializer.__init__(self, store) - self.encoding = "utf-8" def serialize( self, @@ -38,13 +38,17 @@ def serialize( encoding: Optional[str] = None, **args ): - self.encoding = encoding + if base is not None: + warnings.warn("HextuplesSerializer does not support base.") + if encoding != "utf-8": + warnings.warn("NTSerializer always uses UTF-8 encoding.") + for context in self.contexts: for triple in context: stream.write( - _hex_line(triple, context.identifier).encode(self.encoding) + _hex_line(triple, context.identifier).encode() ) - stream.write("\n".encode(self.encoding)) + stream.write("\n".encode()) def _hex_line(triple, context): diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py index 467de4613..dfb73b1f6 100644 --- a/rdflib/plugins/serializers/nt.py +++ b/rdflib/plugins/serializers/nt.py @@ -22,23 +22,22 @@ class NTSerializer(Serializer): def __init__(self, store: Graph): Serializer.__init__(self, store) - self.encoding = "ascii" # n-triples are ascii encoded def serialize( self, stream: IO[bytes], base: Optional[str] = None, - encoding: Optional[str] = None, + encoding: Optional[str] = "utf-8", **args ): if base is not None: warnings.warn("NTSerializer does not support base.") - if encoding is not None and encoding.lower() != self.encoding.lower(): - warnings.warn("NTSerializer does not use custom encoding.") - encoding = self.encoding + if encoding != "utf-8": + warnings.warn("NTSerializer always uses UTF-8 encoding.") + for triple in self.store: - stream.write(_nt_row(triple).encode(self.encoding, "_rdflib_nt_escape")) - stream.write("\n".encode("latin-1")) + stream.write(_nt_row(triple).encode()) + stream.write("\n".encode()) class NT11Serializer(NTSerializer): diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py index a90798591..710774871 100644 --- a/test/test_nt_misc.py +++ b/test/test_nt_misc.py @@ -33,15 +33,15 @@ def testIssue859(self): def testIssue78(self): g = Graph() g.add((URIRef("foo"), URIRef("foo"), Literal("R\u00E4ksm\u00F6rg\u00E5s"))) - s = g.serialize(format="nt", encoding="latin-1") - self.assertEqual(type(s), bytes) - self.assertTrue(r"R\u00E4ksm\u00F6rg\u00E5s".encode("latin-1") in s) + s = g.serialize(format="nt") + self.assertEqual(type(s), str) + self.assertTrue("R\u00E4ksm\u00F6rg\u00E5s" in s) def testIssue146(self): g = Graph() g.add((URIRef("foo"), URIRef("foo"), Literal("test\n", lang="en"))) - s = g.serialize(format="nt", encoding="latin-1").strip() - self.assertEqual(s, b' "test\\n"@en .') + s = g.serialize(format="nt").strip() + self.assertEqual(s, ' "test\\n"@en .') def testIssue1144_rdflib(self): fname = "test/nt/lists-02.nt" diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py index f076e576a..2729beafc 100644 --- a/test/test_roundtrip.py +++ b/test/test_roundtrip.py @@ -15,7 +15,7 @@ from test.test_n3_suite import all_n3_files """ -Test round-tripping by all serializers/parser that are registerd. +Test round-tripping by all serializers/parser that are registered. This means, you may test more than just core rdflib! run with no arguments to test all formats + all files From 1f565ef9c4194c95a21cbe6697e477ef182740f9 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Sat, 4 Dec 2021 17:38:04 +1000 Subject: [PATCH 08/25] fixed boolean JSOn representation --- rdflib/plugins/serializers/hext.py | 34 +++++++++++++++++++++++++++++- test/test_serialize_hext.py | 18 ++++++++++------ 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index b15619590..6977e1b04 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -55,7 +55,7 @@ def _hex_line(triple, context): return "[%s, %s, %s, %s, %s, %s]\n" % ( _iri_or_bn(triple[0]), _iri_or_bn(triple[1]), - triple[2] if type(triple[2]) == Literal else _iri_or_bn(triple[2]), + _literal(triple[2]) if type(triple[2]) == Literal else _iri_or_bn(triple[2]), (f'"{triple[2].datatype}"' if triple[2].datatype is not None else '""') if type(triple[2]) == Literal else '""', (f'"{triple[2].language}"' if triple[2].language is not None else '""') if type(triple[2]) == Literal else '""', _iri_or_bn(context) @@ -67,3 +67,35 @@ def _iri_or_bn(i_): return f"\"{i_}\"" else: return f"\"{i_.n3()}\"" + + +def _literal(i_): + raw_datatype = [ + "http://www.w3.org/2001/XMLSchema#integer", + "http://www.w3.org/2001/XMLSchema#long", + "http://www.w3.org/2001/XMLSchema#int", + "http://www.w3.org/2001/XMLSchema#short", + "http://www.w3.org/2001/XMLSchema#positiveInteger", + "http://www.w3.org/2001/XMLSchema#negativeInteger", + "http://www.w3.org/2001/XMLSchema#nonPositiveInteger", + "http://www.w3.org/2001/XMLSchema#nonNegativeInteger", + "http://www.w3.org/2001/XMLSchema#unsignedLong", + "http://www.w3.org/2001/XMLSchema#unsignedInt", + "http://www.w3.org/2001/XMLSchema#unsignedShort", + + "http://www.w3.org/2001/XMLSchema#float", + "http://www.w3.org/2001/XMLSchema#double", + "http://www.w3.org/2001/XMLSchema#decimal", + + "http://www.w3.org/2001/XMLSchema#boolean" + ] + if hasattr(i_, "datatype"): + if str(i_.datatype) in raw_datatype: + return f"{i_}" + else: + return f"\"{i_}\"" + else: + if str(i_) in ["true", "false"]: + return f"{i_}" + else: + return f"\"{i_}\"" diff --git a/test/test_serialize_hext.py b/test/test_serialize_hext.py index 0d6fe992b..9c6addcb2 100644 --- a/test/test_serialize_hext.py +++ b/test/test_serialize_hext.py @@ -19,6 +19,8 @@ def test_hext_graph(): ex:p4 "2021-12-03"^^xsd:date ; ex:p5 42 ; ex:p6 "42" ; + ex:p7 true ; + ex:p8 "false"^^xsd:boolean ; . """ @@ -26,10 +28,13 @@ def test_hext_graph(): out = g.serialize(format="hext") testing_lines = [ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", ""'], - [False, '["http://example.com/s1", "http://example.com/p3", Object 3, "", ""'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "", ""'], [False, '["http://example.com/s1", "http://example.com/p5", 42, "http://www.w3.org/2001/XMLSchema#integer", ""'], - [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", thingy, "", ""'], - [False, '["http://example.com/s1", "http://example.com/p4", 2021-12-03, "http://www.w3.org/2001/XMLSchema#date", ""'] + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "", ""'], + [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", ""'], + [False, '["http://example.com/s1", "http://example.com/p6", "42", "", ""'], + [False, '["http://example.com/s1", "http://example.com/p7", true, "http://www.w3.org/2001/XMLSchema#boolean", ""'], + [False, '["http://example.com/s1", "http://example.com/p8", false, "http://www.w3.org/2001/XMLSchema#boolean", ""'], ] for line in out.splitlines(): for test in testing_lines: @@ -71,16 +76,15 @@ def test_hext_dataset(): # default graph triples ex:s1 ex:p1 ex:o1 , ex:o2 . ex:s21 ex:p21 ex:o21 , ex:o22 . - """ d.parse(data=trig_data, format="trig") out = d.serialize(format="hext") testing_lines = [ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", "", "http://example.com/g2"]'], - [False, '["http://example.com/s1", "http://example.com/p3", Object 3, "", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "", "", "http://example.com/g1"]'], [False, '["http://example.com/s1", "http://example.com/p5", 42, "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'], - [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", thingy, "", "", "http://example.com/g1"]'], - [False, '["http://example.com/s1", "http://example.com/p4", 2021-12-03, "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'] + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'] ] for line in out.splitlines(): for test in testing_lines: From 60af6c371033256a935703146576e51fb888af00 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Sat, 4 Dec 2021 17:40:36 +1000 Subject: [PATCH 09/25] fixed boolean JSOn representation --- test/test_serialize_hext.py | 41 +++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/test_serialize_hext.py b/test/test_serialize_hext.py index 9c6addcb2..e5ad3e2c9 100644 --- a/test/test_serialize_hext.py +++ b/test/test_serialize_hext.py @@ -1,4 +1,5 @@ from rdflib import Dataset, Graph +import json def test_hext_graph(): @@ -92,3 +93,43 @@ def test_hext_dataset(): test[0] = True assert all([x[0] for x in testing_lines]) + + +def test_hext_json_representation(): + d = Dataset() + trig_data = """ + PREFIX ex: + PREFIX owl: + PREFIX rdf: + PREFIX xsd: + + ex:g1 { + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + ex:p2 [ + a owl:Thing ; + rdf:value "thingy" ; + ] ; + ex:p3 "Object 3" , "Object 4 - English"@en ; + ex:p4 "2021-12-03"^^xsd:date ; + ex:p5 42 ; + ex:p6 "42" ; + . + } + + ex:g2 { + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + . + ex:s11 ex:p11 ex:o11 , ex:o12 . + } + + # default graph triples + ex:s1 ex:p1 ex:o1 , ex:o2 . + ex:s21 ex:p21 ex:o21 , ex:o22 . + """ + d.parse(data=trig_data, format="trig") + out = d.serialize(format="hext") + for line in out.splitlines(): + j = json.loads(line) + assert type(j, list) From bd5dc689bc5b564a54f1f2ad5d8fcb499225791c Mon Sep 17 00:00:00 2001 From: nicholascar Date: Sat, 4 Dec 2021 17:47:00 +1000 Subject: [PATCH 10/25] fixed multiline JSON representation --- rdflib/plugins/serializers/hext.py | 1 - test/test_serialize_hext.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index 6977e1b04..aaba9ca1b 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -48,7 +48,6 @@ def serialize( stream.write( _hex_line(triple, context.identifier).encode() ) - stream.write("\n".encode()) def _hex_line(triple, context): diff --git a/test/test_serialize_hext.py b/test/test_serialize_hext.py index e5ad3e2c9..359553770 100644 --- a/test/test_serialize_hext.py +++ b/test/test_serialize_hext.py @@ -132,4 +132,4 @@ def test_hext_json_representation(): out = d.serialize(format="hext") for line in out.splitlines(): j = json.loads(line) - assert type(j, list) + assert isinstance(j, list) From 2fdffd195c23cd19ef4cb300bfe1e9ea49201e74 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Mon, 6 Dec 2021 11:41:41 +1000 Subject: [PATCH 11/25] small code style touch-ups --- rdflib/plugins/parsers/ntriples.py | 3 +-- rdflib/plugins/shared/jsonld/util.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index 9a66df7f4..a86bd66f0 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -54,7 +54,6 @@ def triple(self, s, p, o): def unquote(s): """Unquote an N-Triples string.""" if not validate: - if isinstance(s, str): # nquads s = decodeUnicodeEscape(s) else: @@ -187,7 +186,7 @@ def readline(self): while True: m = r_line.match(self.buffer) if m: # the more likely prospect - self.buffer = self.buffer[m.end() :] + self.buffer = self.buffer[m.end():] return m.group(1) else: buffer = self.file.read(bufsiz) diff --git a/rdflib/plugins/shared/jsonld/util.py b/rdflib/plugins/shared/jsonld/util.py index cf71742f2..707b9ce24 100644 --- a/rdflib/plugins/shared/jsonld/util.py +++ b/rdflib/plugins/shared/jsonld/util.py @@ -88,4 +88,4 @@ def context_from_urlinputsource(source): if ' rel="http://www.w3.org/ns/json-ld#context"' in link: i, j = link.index("<"), link.index(">") if i > -1 and j > -1: - return urljoin(source.url, link[i + 1 : j]) + return urljoin(source.url, link[i + 1: j]) From f6529bf0805858d7a31c759187d89f6ab7018621 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Mon, 6 Dec 2021 11:47:14 +1000 Subject: [PATCH 12/25] first version of parser --- rdflib/plugin.py | 6 +++ rdflib/plugins/parsers/hext.py | 78 +++++++++++++++++++++++++++++++++ test/test_parser_hext.py | 32 ++++++++++++++ test/test_parser_hext_01.ndjson | 10 +++++ 4 files changed, 126 insertions(+) create mode 100644 rdflib/plugins/parsers/hext.py create mode 100644 test/test_parser_hext.py create mode 100644 test/test_parser_hext_01.ndjson diff --git a/rdflib/plugin.py b/rdflib/plugin.py index 96a2b6108..b7edbc624 100644 --- a/rdflib/plugin.py +++ b/rdflib/plugin.py @@ -472,6 +472,12 @@ def plugins( "rdflib.plugins.parsers.trig", "TrigParser", ) +register( + "hext", + Parser, + "rdflib.plugins.parsers.hext", + "HextuplesParser", +) # Register SPARQL Processors register( diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py new file mode 100644 index 000000000..b83ff7217 --- /dev/null +++ b/rdflib/plugins/parsers/hext.py @@ -0,0 +1,78 @@ +""" +This is a rdflib plugin for parsing Hextuple files, which are Newline-Delimited JSON +(ndjson) files, into Conjunctive. The store that backs the graph *must* be able to +handle contexts, i.e. multiple graphs. +""" +import json + +from rdflib.parser import Parser +from rdflib import ConjunctiveGraph, URIRef, Literal, BNode +import warnings + + +__all__ = ["HextuplesParser"] + + +class HextuplesParser(Parser): + """ + An RDFLib parser for Hextuples + + """ + + def __init__(self): + pass + + def _load_json_line(self, line: str): + return [x if x != "" else None for x in json.loads(line)] + + def _parse_hextuple(self, cg: ConjunctiveGraph, tup: [str]): + # 1 - subject + if tup[0].startswith("_"): + s = BNode(value=tup[0]) + else: + s = URIRef(tup[0]) + + # 2 - predicate + p = URIRef(tup[1]) + + # 3 - value + if tup[3] is None: + if tup[0].startswith("_"): + o = BNode(value=tup[2]) + else: + o = URIRef(tup[2]) + else: + if tup[4] is None: + o = Literal(tup[2], datatype=URIRef(tup[3])) + else: + o = Literal(tup[2], lang=tup[4]) + + # 6 - context + if tup[5] is not None: + c = URIRef(tup[5]) + cg.add((s, p, o, c)) + else: + cg.add((s, p, o)) + + def parse(self, source, graph, **kwargs): + if kwargs.get("encoding") not in [None, "utf-8"]: + warnings.warn( + f"Hextuples files are always utf-8 encoded, " + f"I was passed: {kwargs.get('encoding')}, " + "but I'm still going to use utf-8" + ) + + assert graph.store.context_aware, \ + "Hextuples Parser needs a context-aware store!" + + cg = ConjunctiveGraph(store=graph.store, identifier=graph.identifier) + cg.default_context = graph + + if hasattr(source, "file"): + with open(source.file.name) as fp: + for l in fp: + self._parse_hextuple(cg, self._load_json_line(l)) + elif hasattr(source, "_InputSource__bytefile"): + if hasattr(source._InputSource__bytefile, "wrapped"): + for l in source._InputSource__bytefile.wrapped.strip().splitlines(): + self._parse_hextuple(cg, self._load_json_line(l)) diff --git a/test/test_parser_hext.py b/test/test_parser_hext.py new file mode 100644 index 000000000..0f9a2f489 --- /dev/null +++ b/test/test_parser_hext.py @@ -0,0 +1,32 @@ +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent.absolute())) +from rdflib import Dataset + + +def test_small_string(): + s = """ + ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "", "", ""] + ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/2001/XMLSchema#string", "en", ""] + ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""] + ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""] + ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] + ["http://example.com/s01", "http://example.com/age", 42, "http://www.w3.org/2001/XMLSchema#integer", "", ""] + ["http://example.com/s01", "http://example.com/trueFalse", false, "http://www.w3.org/2001/XMLSchema#boolean", "", ""] + ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "", "", ""] + ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "", "", ""] + ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "", "", ""] + """ + d = Dataset().parse(data=s, format="hext") + assert len(d) == 10 + + +def test_small_file(): + d = Dataset().parse(Path(__file__).parent / "test_parser_hext_01.ndjson", format="hext") + print(d.serialize()) + assert len(d) == 10 + + +if __name__ == "__main__": + test_small_string() + test_small_file() diff --git a/test/test_parser_hext_01.ndjson b/test/test_parser_hext_01.ndjson new file mode 100644 index 000000000..c0a7c6eea --- /dev/null +++ b/test/test_parser_hext_01.ndjson @@ -0,0 +1,10 @@ +["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "", "", ""] +["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/2001/XMLSchema#string", "en", ""] +["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""] +["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""] +["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] +["http://example.com/s01", "http://example.com/age", 42, "http://www.w3.org/2001/XMLSchema#integer", "", ""] +["http://example.com/s01", "http://example.com/trueFalse", false, "http://www.w3.org/2001/XMLSchema#boolean", "", ""] +["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "", "", ""] +["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "", "", ""] +["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "", "", ""] From a91dde6c5738d7c3a26c3f5a9d16412b27a31fcb Mon Sep 17 00:00:00 2001 From: nicholascar Date: Mon, 6 Dec 2021 11:47:44 +1000 Subject: [PATCH 13/25] fix hext serializer to always indicate string literals with datatype --- rdflib/plugins/serializers/hext.py | 13 +++++---- test/test_serialize_hext.py | 44 ++++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index aaba9ca1b..820a49e8d 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -35,13 +35,16 @@ def serialize( self, stream: IO[bytes], base: Optional[str] = None, - encoding: Optional[str] = None, - **args + **kwargs ): if base is not None: warnings.warn("HextuplesSerializer does not support base.") - if encoding != "utf-8": - warnings.warn("NTSerializer always uses UTF-8 encoding.") + if kwargs.get("encoding") not in [None, "utf-8"]: + warnings.warn( + f"Hextuples files are always utf-8 encoded. " + f"I was passed: {kwargs.get('encoding')}, " + "but I'm still going to use utf-8 anyway!" + ) for context in self.contexts: for triple in context: @@ -55,7 +58,7 @@ def _hex_line(triple, context): _iri_or_bn(triple[0]), _iri_or_bn(triple[1]), _literal(triple[2]) if type(triple[2]) == Literal else _iri_or_bn(triple[2]), - (f'"{triple[2].datatype}"' if triple[2].datatype is not None else '""') if type(triple[2]) == Literal else '""', + (f'"{triple[2].datatype}"' if triple[2].datatype is not None else '"http://www.w3.org/2001/XMLSchema#string"') if type(triple[2]) == Literal else '""', (f'"{triple[2].language}"' if triple[2].language is not None else '""') if type(triple[2]) == Literal else '""', _iri_or_bn(context) ) diff --git a/test/test_serialize_hext.py b/test/test_serialize_hext.py index 359553770..dc5c8d1c3 100644 --- a/test/test_serialize_hext.py +++ b/test/test_serialize_hext.py @@ -1,4 +1,7 @@ -from rdflib import Dataset, Graph +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent.absolute())) +from rdflib import Dataset, Graph, Namespace, Literal import json @@ -29,11 +32,11 @@ def test_hext_graph(): out = g.serialize(format="hext") testing_lines = [ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", ""'], - [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "", ""'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", ""'], [False, '["http://example.com/s1", "http://example.com/p5", 42, "http://www.w3.org/2001/XMLSchema#integer", ""'], - [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "", ""'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", ""'], [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", ""'], - [False, '["http://example.com/s1", "http://example.com/p6", "42", "", ""'], + [False, '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", ""'], [False, '["http://example.com/s1", "http://example.com/p7", true, "http://www.w3.org/2001/XMLSchema#boolean", ""'], [False, '["http://example.com/s1", "http://example.com/p8", false, "http://www.w3.org/2001/XMLSchema#boolean", ""'], ] @@ -82,9 +85,10 @@ def test_hext_dataset(): out = d.serialize(format="hext") testing_lines = [ [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", "", "http://example.com/g2"]'], - [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/2001/XMLSchema#string", "en", "http://example.com/g1"]'], [False, '["http://example.com/s1", "http://example.com/p5", 42, "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'], - [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "", "", "http://example.com/g1"]'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'], [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'] ] for line in out.splitlines(): @@ -133,3 +137,31 @@ def test_hext_json_representation(): for line in out.splitlines(): j = json.loads(line) assert isinstance(j, list) + + +# def _make_large_graph(): +# import random +# +# EX = Namespace("http://example.com/") +# g = Graph() +# +# for i in range(1000): +# s = EX["s" + str(random.randint(1, 10000)).zfill(5)] +# p = EX["p" + str(random.randint(1, 10000)).zfill(5)] +# o_r = random.randint(1, 10000) +# if o_r > 5000: +# o = EX["p" + str(o_r).zfill(5)] +# else: +# o = Literal("p" + str(o_r).zfill(5)) +# g.add((s, p, o)) +# return g +# +# +# def test_hext_scaling(): +# g = _make_large_graph() +# g.serialize(destination="large.ndjson", format="hext") +# +# +# if __name__ == "__main__": +# import cProfile +# cProfile.run("test_hext_scaling()", sort=1) From 16aa03f28b029c8e2a03cb1aeab09da29fdb4122 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Mon, 6 Dec 2021 12:24:54 +1000 Subject: [PATCH 14/25] some roundftripling tests; better default graph IRI ('""') --- rdflib/plugins/parsers/hext.py | 1 + rdflib/plugins/serializers/hext.py | 2 +- test/test_parser_hext.py | 3 +-- test/test_parser_hext_01.ndjson | 2 +- test/test_serialize_hext.py | 16 +++++++++++++--- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py index b83ff7217..9f34ac830 100644 --- a/rdflib/plugins/parsers/hext.py +++ b/rdflib/plugins/parsers/hext.py @@ -68,6 +68,7 @@ def parse(self, source, graph, **kwargs): cg = ConjunctiveGraph(store=graph.store, identifier=graph.identifier) cg.default_context = graph + # handle different source types - only file and string (data) for now if hasattr(source, "file"): with open(source.file.name) as fp: for l in fp: diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index 820a49e8d..3dfabb1b6 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -60,7 +60,7 @@ def _hex_line(triple, context): _literal(triple[2]) if type(triple[2]) == Literal else _iri_or_bn(triple[2]), (f'"{triple[2].datatype}"' if triple[2].datatype is not None else '"http://www.w3.org/2001/XMLSchema#string"') if type(triple[2]) == Literal else '""', (f'"{triple[2].language}"' if triple[2].language is not None else '""') if type(triple[2]) == Literal else '""', - _iri_or_bn(context) + _iri_or_bn(context) if not str(context).startswith(("_", "file://")) else '""' ) diff --git a/test/test_parser_hext.py b/test/test_parser_hext.py index 0f9a2f489..ec211475f 100644 --- a/test/test_parser_hext.py +++ b/test/test_parser_hext.py @@ -10,7 +10,7 @@ def test_small_string(): ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/2001/XMLSchema#string", "en", ""] ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""] ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""] - ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] + ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] ["http://example.com/s01", "http://example.com/age", 42, "http://www.w3.org/2001/XMLSchema#integer", "", ""] ["http://example.com/s01", "http://example.com/trueFalse", false, "http://www.w3.org/2001/XMLSchema#boolean", "", ""] ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "", "", ""] @@ -23,7 +23,6 @@ def test_small_string(): def test_small_file(): d = Dataset().parse(Path(__file__).parent / "test_parser_hext_01.ndjson", format="hext") - print(d.serialize()) assert len(d) == 10 diff --git a/test/test_parser_hext_01.ndjson b/test/test_parser_hext_01.ndjson index c0a7c6eea..5d8f67c86 100644 --- a/test/test_parser_hext_01.ndjson +++ b/test/test_parser_hext_01.ndjson @@ -2,7 +2,7 @@ ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/2001/XMLSchema#string", "en", ""] ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""] ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""] -["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] +["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] ["http://example.com/s01", "http://example.com/age", 42, "http://www.w3.org/2001/XMLSchema#integer", "", ""] ["http://example.com/s01", "http://example.com/trueFalse", false, "http://www.w3.org/2001/XMLSchema#boolean", "", ""] ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "", "", ""] diff --git a/test/test_serialize_hext.py b/test/test_serialize_hext.py index dc5c8d1c3..2f1072e16 100644 --- a/test/test_serialize_hext.py +++ b/test/test_serialize_hext.py @@ -139,6 +139,16 @@ def test_hext_json_representation(): assert isinstance(j, list) +def test_roundtrip(): + d = Dataset().parse(Path(__file__).parent / "test_parser_hext_01.ndjson", format="hext") + with open(str(Path(__file__).parent / "test_parser_hext_01.ndjson")) as i: + ordered_input = "".join(sorted(i.readlines())).strip() + + ordered_output = "\n".join(sorted(d.serialize(format="hext").split("\n"))).strip() + + assert ordered_output == ordered_input + + # def _make_large_graph(): # import random # @@ -162,6 +172,6 @@ def test_hext_json_representation(): # g.serialize(destination="large.ndjson", format="hext") # # -# if __name__ == "__main__": -# import cProfile -# cProfile.run("test_hext_scaling()", sort=1) +if __name__ == "__main__": + test_roundtrip() + From 7f3f5b6eb11e10e7355bed9f2dd8892a499e4410 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 13:16:50 +1000 Subject: [PATCH 15/25] exclude hext from rountrip testing; add hext own troundrip tests, update hext format --- docs/plugin_parsers.rst | 16 +++ docs/plugin_serializers.rst | 8 +- rdflib/plugins/parsers/hext.py | 13 +- rdflib/plugins/serializers/hext.py | 122 ++++++++++-------- test/n3/n3-writer-test-29.n3 | 46 +++---- test/n3/strquot.n3 | 12 +- test/test_n3_suite.py | 1 - test/test_parser_hext.py | 107 +++++++++++++-- test/test_parser_hext_multigraph.ndjson | 22 ++++ ...on => test_parser_hext_singlegraph.ndjson} | 14 +- test/test_roundtrip.py | 46 ++++--- .../{test_serialize.py => test_serializer.py} | 0 ...ialize_hext.py => test_serializer_hext.py} | 82 +++++++++--- ...urtle.py => test_serializer_longturtle.py} | 0 ...ialize_trix.py => test_serializer_trix.py} | 0 ...ze_turtle.py => test_serializer_turtle.py} | 0 ...erialize_xml.py => test_serializer_xml.py} | 0 17 files changed, 343 insertions(+), 146 deletions(-) create mode 100644 test/test_parser_hext_multigraph.ndjson rename test/{test_parser_hext_01.ndjson => test_parser_hext_singlegraph.ndjson} (61%) rename test/{test_serialize.py => test_serializer.py} (100%) rename test/{test_serialize_hext.py => test_serializer_hext.py} (57%) rename test/{test_serialize_longturtle.py => test_serializer_longturtle.py} (100%) rename test/{test_serialize_trix.py => test_serializer_trix.py} (100%) rename test/{test_serialize_turtle.py => test_serializer_turtle.py} (100%) rename test/{test_serialize_xml.py => test_serializer_xml.py} (100%) diff --git a/docs/plugin_parsers.rst b/docs/plugin_parsers.rst index a89e08fab..ed351c0ce 100644 --- a/docs/plugin_parsers.rst +++ b/docs/plugin_parsers.rst @@ -21,6 +21,7 @@ If you are not sure what format your file will be, you can use :func:`rdflib.uti Name Class ========= ==================================================================== json-ld :class:`~rdflib.plugins.parsers.jsonld.JsonLDParser` +hext :class:`~rdflib.plugins.parsers.hext.HextuplesParser` html :class:`~rdflib.plugins.parsers.structureddata.StructuredDataParser` n3 :class:`~rdflib.plugins.parsers.notation3.N3Parser` nquads :class:`~rdflib.plugins.parsers.nquads.NQuadsParser` @@ -29,3 +30,18 @@ trix :class:`~rdflib.plugins.parsers.trix.TriXParser` turtle :class:`~rdflib.plugins.parsers.notation3.TurtleParser` xml :class:`~rdflib.plugins.parsers.rdfxml.RDFXMLParser` ========= ==================================================================== + +Multi-graph IDs +--------------- +Note that for correct parsing of multi-graph data, e.g. Trig, HexT, etc., into a ``ConjunctiveGraph`` or a ``Dataset``, +as opposed to a context-unaware ``Graph``, you will need to set the ``publicID`` of the ``ConjunctiveGraph`` a +``Dataset`` to the identifier of the ``default_context`` (default graph), for example:: + + d = Dataset() + d.parse( + data=""" ... """, + format="trig", + publicID=d.default_context.identifier + ) + +(from the file tests/test_serializer_hext.py) diff --git a/docs/plugin_serializers.rst b/docs/plugin_serializers.rst index a6fc74bda..249b0aede 100644 --- a/docs/plugin_serializers.rst +++ b/docs/plugin_serializers.rst @@ -36,4 +36,10 @@ JSON-LD - 'json-ld' - has been incorprated in rdflib since v6.0.0. HexTuples --------- -The HexTuples Serializer - 'hext' - uses the HexTuples format defined at https://github.com/ontola/hextuples +The HexTuples Serializer - 'hext' - uses the HexTuples format defined at https://github.com/ontola/hextuples. + +For serialization of non-context-aware data sources, e.g. a single ``Graph``, the 'graph' field (6th variable in the +Hextuple) will be an empty string. + +For context-aware (multi-graph) serialization, the 'graph' field of the default graph will be an empty string and +the values for other graphs will be Blank Node IDs or IRIs. diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py index 9f34ac830..206ca0a3d 100644 --- a/rdflib/plugins/parsers/hext.py +++ b/rdflib/plugins/parsers/hext.py @@ -28,7 +28,7 @@ def _load_json_line(self, line: str): def _parse_hextuple(self, cg: ConjunctiveGraph, tup: [str]): # 1 - subject if tup[0].startswith("_"): - s = BNode(value=tup[0]) + s = BNode(value=tup[0].replace("_:", "")) else: s = URIRef(tup[0]) @@ -36,12 +36,11 @@ def _parse_hextuple(self, cg: ConjunctiveGraph, tup: [str]): p = URIRef(tup[1]) # 3 - value - if tup[3] is None: - if tup[0].startswith("_"): - o = BNode(value=tup[2]) - else: - o = URIRef(tup[2]) - else: + if tup[3] == "globalId": + o = URIRef(tup[2]) + elif tup[3] == "localId": + o = BNode(value=tup[2].replace("_:", "")) + else: # literal if tup[4] is None: o = Literal(tup[2], datatype=URIRef(tup[3])) else: diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index 3dfabb1b6..cec695b2d 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -4,8 +4,9 @@ """ from typing import IO, TYPE_CHECKING, Optional, Union from rdflib.graph import Graph, ConjunctiveGraph -from rdflib.term import Literal, URIRef, Node +from rdflib.term import Literal, URIRef, Node, BNode from rdflib.serializer import Serializer +from rdflib.namespace import RDF, XSD import warnings __all__ = ["HextuplesSerializer"] @@ -19,12 +20,12 @@ class HextuplesSerializer(Serializer): def __init__(self, store: Union[Graph, ConjunctiveGraph]): self.default_context: Optional[Node] if store.context_aware: - if TYPE_CHECKING: - assert isinstance(store, ConjunctiveGraph) self.contexts = list(store.contexts()) - self.default_context = store.default_context.identifier if store.default_context: + self.default_context = store.default_context self.contexts.append(store.default_context) + else: + self.default_context = None else: self.contexts = [store] self.default_context = None @@ -34,11 +35,8 @@ def __init__(self, store: Union[Graph, ConjunctiveGraph]): def serialize( self, stream: IO[bytes], - base: Optional[str] = None, **kwargs ): - if base is not None: - warnings.warn("HextuplesSerializer does not support base.") if kwargs.get("encoding") not in [None, "utf-8"]: warnings.warn( f"Hextuples files are always utf-8 encoded. " @@ -46,58 +44,76 @@ def serialize( "but I'm still going to use utf-8 anyway!" ) + if self.store.formula_aware is True: + raise Exception( + "Hextuple serialization can't (yet) handle formula-aware stores" + ) + for context in self.contexts: for triple in context: - stream.write( - _hex_line(triple, context.identifier).encode() - ) - - -def _hex_line(triple, context): - return "[%s, %s, %s, %s, %s, %s]\n" % ( - _iri_or_bn(triple[0]), - _iri_or_bn(triple[1]), - _literal(triple[2]) if type(triple[2]) == Literal else _iri_or_bn(triple[2]), - (f'"{triple[2].datatype}"' if triple[2].datatype is not None else '"http://www.w3.org/2001/XMLSchema#string"') if type(triple[2]) == Literal else '""', - (f'"{triple[2].language}"' if triple[2].language is not None else '""') if type(triple[2]) == Literal else '""', - _iri_or_bn(context) if not str(context).startswith(("_", "file://")) else '""' - ) - + hl = self._hex_line(triple, context) + if hl is not None: + stream.write(hl.encode()) -def _iri_or_bn(i_): - if type(i_) == URIRef: - return f"\"{i_}\"" - else: - return f"\"{i_.n3()}\"" + def _hex_line(self, triple, context): + if type(triple[0]) in [URIRef, BNode]: # exclude QuotedGraph and other objects + # value + value = triple[2] \ + if type(triple[2]) == Literal \ + else self._iri_or_bn(triple[2]) + # datatype + if type(triple[2]) == URIRef: + # datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#namedNode" + datatype = "globalId" + elif type(triple[2]) == BNode: + # datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#blankNode" + datatype = "localId" + elif type(triple[2]) == Literal: + if triple[2].datatype is not None: + datatype = f"{triple[2].datatype}" + else: + if triple[2].language is not None: # language + datatype = RDF.langString + else: + datatype = XSD.string + else: + return None # can't handle non URI, BN or Literal Object (QuotedGraph) -def _literal(i_): - raw_datatype = [ - "http://www.w3.org/2001/XMLSchema#integer", - "http://www.w3.org/2001/XMLSchema#long", - "http://www.w3.org/2001/XMLSchema#int", - "http://www.w3.org/2001/XMLSchema#short", - "http://www.w3.org/2001/XMLSchema#positiveInteger", - "http://www.w3.org/2001/XMLSchema#negativeInteger", - "http://www.w3.org/2001/XMLSchema#nonPositiveInteger", - "http://www.w3.org/2001/XMLSchema#nonNegativeInteger", - "http://www.w3.org/2001/XMLSchema#unsignedLong", - "http://www.w3.org/2001/XMLSchema#unsignedInt", - "http://www.w3.org/2001/XMLSchema#unsignedShort", + # language + if type(triple[2]) == Literal: + if triple[2].language is not None: + language = f"{triple[2].language}" + else: + language = "" + else: + language = "" - "http://www.w3.org/2001/XMLSchema#float", - "http://www.w3.org/2001/XMLSchema#double", - "http://www.w3.org/2001/XMLSchema#decimal", + return '["%s", "%s", "%s", "%s", "%s", "%s"]\n' % ( + self._iri_or_bn(triple[0]), + triple[1], + value, + datatype, + language, + self._context(context) + ) + else: # do not return anything for non-IRIs or BNs, e.g. QuotedGraph, Subjects + return None - "http://www.w3.org/2001/XMLSchema#boolean" - ] - if hasattr(i_, "datatype"): - if str(i_.datatype) in raw_datatype: - return f"{i_}" - else: - return f"\"{i_}\"" - else: - if str(i_) in ["true", "false"]: + def _iri_or_bn(self, i_): + if type(i_) == URIRef: return f"{i_}" + elif type(i_) == BNode: + return f"{i_.n3()}" else: - return f"\"{i_}\"" + return None + + def _context(self, context): + if self.default_context is None: + return "" + if context.identifier == "urn:x-rdflib:default": + return "" + elif context is not None and self.default_context is not None: + if context.identifier == self.default_context.identifier: + return "" + return context.identifier diff --git a/test/n3/n3-writer-test-29.n3 b/test/n3/n3-writer-test-29.n3 index 86cf56e7a..b6590cbd8 100644 --- a/test/n3/n3-writer-test-29.n3 +++ b/test/n3/n3-writer-test-29.n3 @@ -1,23 +1,23 @@ -# Test qname-ization - -@prefix : . -@prefix ns: . -@prefix ns2: . -@prefix ex: . - -# Ensure we don't write ns:p1/p2 (illegal URI) -:x "1" . - -# Legal URI -:x "1" . - -# Numeric namespace prefix: gives a warning on reading -# as Jena models work on XML rules. -#@prefix 1: . -:x "1" . - -# Numberic localname is allowed. -:x ex:1 "2" . - -# As is _1 -:x ex:_1 "rdf:_1 test" . +# Test qname-ization + +@prefix : . +@prefix ns: . +@prefix ns2: . +@prefix ex: . + +# Ensure we don't write ns:p1/p2 (illegal URI) +:x "1" . + +# Legal URI +:x "1" . + +# Numeric namespace prefix: gives a warning on reading +# as Jena models work on XML rules. +#@prefix 1: . +:x "1" . + +# Numeric localname is allowed. +:x ex:1 "2" . + +# As is _1 +:x ex:_1 "rdf:_1 test" . diff --git a/test/n3/strquot.n3 b/test/n3/strquot.n3 index c421c11f6..13da792e7 100644 --- a/test/n3/strquot.n3 +++ b/test/n3/strquot.n3 @@ -1,5 +1,5 @@ - @prefix : <#> . - +@prefix : <#> . + <> """testing string parsing in N3. Hmm... how much of this is in the primer? How much should be there? @@ -9,14 +9,14 @@ in python is sufficiently deployed nor does pythonwin on TimBL's laptop). """ . - + :martin :familyName "D\u00FCrst" . - + :x :prop "simple string" . - + :y :prop """triple quoted string with newlines in it.""" . - + :z :prop """string with " escaped quote marks""" . :zz :escapes "\\\"\a\b\f\r\t\v" . diff --git a/test/test_n3_suite.py b/test/test_n3_suite.py index b4a96abb0..a294d2bc4 100644 --- a/test/test_n3_suite.py +++ b/test/test_n3_suite.py @@ -1,5 +1,4 @@ import os -import sys import logging import pytest diff --git a/test/test_parser_hext.py b/test/test_parser_hext.py index ec211475f..50d09fc05 100644 --- a/test/test_parser_hext.py +++ b/test/test_parser_hext.py @@ -1,31 +1,114 @@ import sys from pathlib import Path sys.path.append(str(Path(__file__).parent.parent.absolute())) -from rdflib import Dataset +from rdflib import Dataset, ConjunctiveGraph, Literal +from rdflib.namespace import XSD def test_small_string(): s = """ - ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "", "", ""] - ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/2001/XMLSchema#string", "en", ""] + ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""] + ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""] ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""] ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""] ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] - ["http://example.com/s01", "http://example.com/age", 42, "http://www.w3.org/2001/XMLSchema#integer", "", ""] - ["http://example.com/s01", "http://example.com/trueFalse", false, "http://www.w3.org/2001/XMLSchema#boolean", "", ""] - ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "", "", ""] - ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "", "", ""] - ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "", "", ""] + ["http://example.com/s01", "http://example.com/age", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""] + ["http://example.com/s01", "http://example.com/trueFalse", "false", ",http://www.w3.org/2001/XMLSchema#boolean", "", ""] + ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "globalId", "", ""] + ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""] + ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""] """ d = Dataset().parse(data=s, format="hext") assert len(d) == 10 -def test_small_file(): - d = Dataset().parse(Path(__file__).parent / "test_parser_hext_01.ndjson", format="hext") +def test_small_file_singlegraph(): + d = Dataset().parse(Path(__file__).parent / "test_parser_hext_singlegraph.ndjson", format="hext") assert len(d) == 10 +def test_small_file_multigraph(): + d = ConjunctiveGraph() + assert len(d) == 0 + d.parse( + Path(__file__).parent / "test_parser_hext_multigraph.ndjson", + format="hext", + publicID=d.default_context.identifier + ) + + """There are 22 lines in the file test_parser_hext_multigraph.ndjson. When loaded + into a Dataset, we get only 18 quads since the the dataset can contextualise + the triples and thus deduplicate 4.""" + total_triples = 0 + # count all the triples in the Dataset + for context in d.contexts(): + for triple in context.triples((None, None, None)): + total_triples += 1 + assert total_triples == 18 + + +def test_roundtrip(): + # these are some RDF files that HexT can round-trip since the have no + # literals with no datatype declared: + TEST_DIR = Path(__file__).parent.absolute() / "nt" + files_to_skip = { + "paths-04.nt": "subject literal", + "even_more_literals.nt": "JSON decoding error", + "literals-02.nt": "JSON decoding error", + "more_literals.nt": "JSON decoding error", + "test.ntriples": "JSON decoding error", + "literals-05.nt": "JSON decoding error", + "i18n-01.nt": "JSON decoding error", + "literals-04.nt": "JSON decoding error", + "rdflibtest01.nt": "JSON decoding error", + "rdflibtest05.nt": "JSON decoding error", + } + tests = 0 + skipped = 0 + skip = False + print() + p = TEST_DIR.glob("**/*") + for f in [x for x in p if x.is_file()]: + tests += 1 + print(f"Test {tests}: {f}") + if f.name not in files_to_skip.keys(): + try: + cg = ConjunctiveGraph().parse(f, format="nt") + # print(cg.serialize(format="n3")) + except: + print(f"Skipping: could not NT parse") + skipped += 1 + skip = True + if not skip: + cg2 = ConjunctiveGraph() + cg2.parse( + data=cg.serialize(format="hext"), + format="hext", + publicID=cg2.default_context.identifier + ) + if cg2.context_aware: + for context in cg2.contexts(): + for triple in context.triples((None, None, None)): + if type(triple[2]) == Literal: + if triple[2].datatype == XSD.string: + context.remove((triple[0], triple[1], triple[2])) + context.add((triple[0], triple[1], Literal(str(triple[2])))) + else: + for triple in cg2.triples((None, None, None)): + if type(triple[2]) == Literal: + if triple[2].datatype == XSD.string: + cg2.remove((triple[0], triple[1], triple[2])) + cg2.add((triple[0], triple[1], Literal(str(triple[2])))) + + # print(cg2.serialize(format="trig")) + assert cg.isomorphic(cg2) + skip = False + else: + print(f"Skipping: {files_to_skip[f.name]}") + + print(f"No. tests: {tests}") + print(f"No. tests skipped: {skipped}") + + if __name__ == "__main__": - test_small_string() - test_small_file() + test_roundtrip() diff --git a/test/test_parser_hext_multigraph.ndjson b/test/test_parser_hext_multigraph.ndjson new file mode 100644 index 000000000..45d086e0b --- /dev/null +++ b/test/test_parser_hext_multigraph.ndjson @@ -0,0 +1,22 @@ +["_:n4d7dd184c5824f35aa064f17bd5d1440b1", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", "http://example.com/g1"] +["_:n4d7dd184c5824f35aa064f17bd5d1440b1", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g2"] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g2"] +["http://example.com/s1", "http://example.com/p2", "_:n4d7dd184c5824f35aa064f17bd5d1440b1", "localId", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"] +["http://example.com/s11", "http://example.com/p11", "http://example.com/o11", "globalId", "", "http://example.com/g2"] +["http://example.com/s11", "http://example.com/p11", "http://example.com/o12", "globalId", "", "http://example.com/g2"] +["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""] +["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""] +["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""] +["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""] diff --git a/test/test_parser_hext_01.ndjson b/test/test_parser_hext_singlegraph.ndjson similarity index 61% rename from test/test_parser_hext_01.ndjson rename to test/test_parser_hext_singlegraph.ndjson index 5d8f67c86..bde2774d5 100644 --- a/test/test_parser_hext_01.ndjson +++ b/test/test_parser_hext_singlegraph.ndjson @@ -1,10 +1,10 @@ -["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "", "", ""] -["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/2001/XMLSchema#string", "en", ""] +["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""] +["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""] ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""] ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""] ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] -["http://example.com/s01", "http://example.com/age", 42, "http://www.w3.org/2001/XMLSchema#integer", "", ""] -["http://example.com/s01", "http://example.com/trueFalse", false, "http://www.w3.org/2001/XMLSchema#boolean", "", ""] -["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "", "", ""] -["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "", "", ""] -["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "", "", ""] +["http://example.com/s01", "http://example.com/age", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""] +["http://example.com/s01", "http://example.com/trueFalse", "false", ",http://www.w3.org/2001/XMLSchema#boolean", "", ""] +["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "globalId", "", ""] +["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""] +["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""] diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py index 2729beafc..ec03a54d1 100644 --- a/test/test_roundtrip.py +++ b/test/test_roundtrip.py @@ -5,10 +5,9 @@ try: from .test_nt_suite import all_nt_files - assert all_nt_files - from .test_n3_suite import all_n3_files + from .test_n3_suite import all_n3_files assert all_n3_files except: from test.test_nt_suite import all_nt_files @@ -27,17 +26,22 @@ tests roundtripping through rdf/xml with only the literals-02 file +HexTuples format, "hext", cannot be used in all roundtrips due to its +addition of xsd:string to literals of no declared type as this breaks +(rdflib) graph isomorphism, and given that its JSON serialization is +simple (lacking), so hext has been excluded from roundtripping here +but provides some roundtrip test functions of its own (see test_parser_hext.py +& test_serializer_hext.py) + """ SKIP = [ - ( - "xml", - "test/n3/n3-writer-test-29.n3", - ), # has predicates that cannot be shortened to strict qnames + ("xml", "test/n3/n3-writer-test-29.n3"), + # has predicates that cannot be shortened to strict qnames ("xml", "test/nt/qname-02.nt"), # uses a property that cannot be qname'd - ("trix", "test/n3/strquot.n3"), # contains charachters forbidden by the xml spec - ("xml", "test/n3/strquot.n3"), # contains charachters forbidden by the xml spec + ("trix", "test/n3/strquot.n3"), # contains characters forbidden by the xml spec + ("xml", "test/n3/strquot.n3"), # contains characters forbidden by the xml spec ("json-ld", "test/nt/keywords-04.nt"), # known NT->JSONLD problem ("json-ld", "test/n3/example-misc.n3"), # known N3->JSONLD problem ("json-ld", "test/n3/n3-writer-test-16.n3"), # known N3->JSONLD problem @@ -98,11 +102,12 @@ def get_cases(): formats = parsers.intersection(serializers) for testfmt in formats: - if "/" in testfmt: - continue # skip double testing - for f, infmt in all_nt_files(): - if (testfmt, f) not in SKIP: - yield roundtrip, (infmt, testfmt, f) + if testfmt != "hext": + if "/" in testfmt: + continue # skip double testing + for f, infmt in all_nt_files(): + if (testfmt, f) not in SKIP: + yield roundtrip, (infmt, testfmt, f) @pytest.mark.parametrize("checker, args", get_cases()) @@ -120,13 +125,18 @@ def get_n3_test(): formats = parsers.intersection(serializers) for testfmt in formats: - if "/" in testfmt: - continue # skip double testing - for f, infmt in all_n3_files(): - if (testfmt, f) not in SKIP: - yield roundtrip, (infmt, testfmt, f) + if testfmt != "hext": + if "/" in testfmt: + continue # skip double testing + for f, infmt in all_n3_files(): + if (testfmt, f) not in SKIP: + yield roundtrip, (infmt, testfmt, f) @pytest.mark.parametrize("checker, args", get_n3_test()) def test_n3(checker, args): checker(args) + + +if __name__ == "__main__": + print("hi") diff --git a/test/test_serialize.py b/test/test_serializer.py similarity index 100% rename from test/test_serialize.py rename to test/test_serializer.py diff --git a/test/test_serialize_hext.py b/test/test_serializer_hext.py similarity index 57% rename from test/test_serialize_hext.py rename to test/test_serializer_hext.py index 2f1072e16..c322a211c 100644 --- a/test/test_serialize_hext.py +++ b/test/test_serializer_hext.py @@ -1,11 +1,12 @@ import sys from pathlib import Path sys.path.append(str(Path(__file__).parent.parent.absolute())) -from rdflib import Dataset, Graph, Namespace, Literal +from rdflib import Dataset, Graph import json def test_hext_graph(): + """Tests single-grant (not context-aware) data""" g = Graph() turtle_data = """ PREFIX ex: @@ -30,15 +31,20 @@ def test_hext_graph(): g.parse(data=turtle_data, format="turtle") out = g.serialize(format="hext") + # note: cant' test for BNs in result as they will be different ever time testing_lines = [ - [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", ""'], - [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", ""'], - [False, '["http://example.com/s1", "http://example.com/p5", 42, "http://www.w3.org/2001/XMLSchema#integer", ""'], - [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", ""'], - [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", ""'], - [False, '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", ""'], - [False, '["http://example.com/s1", "http://example.com/p7", true, "http://www.w3.org/2001/XMLSchema#boolean", ""'], - [False, '["http://example.com/s1", "http://example.com/p8", false, "http://www.w3.org/2001/XMLSchema#boolean", ""'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p7", "true", "http://www.w3.org/2001/XMLSchema#boolean", "", ""]'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p2"'], + [False, '["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""]'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p8", "false", "http://www.w3.org/2001/XMLSchema#boolean", "", ""]'], ] for line in out.splitlines(): for test in testing_lines: @@ -49,6 +55,7 @@ def test_hext_graph(): def test_hext_dataset(): + """Tests context-aware (multigraph) data""" d = Dataset() trig_data = """ PREFIX ex: @@ -81,15 +88,28 @@ def test_hext_dataset(): ex:s1 ex:p1 ex:o1 , ex:o2 . ex:s21 ex:p21 ex:o21 , ex:o22 . """ - d.parse(data=trig_data, format="trig") + d.parse(data=trig_data, format="trig", publicID=d.default_context.identifier) out = d.serialize(format="hext") + # note: cant' test for BNs in result as they will be different ever time testing_lines = [ - [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", "", "http://example.com/g2"]'], - [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'], - [False, '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/2001/XMLSchema#string", "en", "http://example.com/g1"]'], - [False, '["http://example.com/s1", "http://example.com/p5", 42, "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'], + [False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""]'], + [False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""]'], + [False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o12", "globalId", "", "http://example.com/g2"]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g2"]'], + [False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o11", "globalId", "", "http://example.com/g2"]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g2"]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p2"'], [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'], - [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'] + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'], ] for line in out.splitlines(): for test in testing_lines: @@ -100,6 +120,7 @@ def test_hext_dataset(): def test_hext_json_representation(): + """Tests to see if every link in the ND-JSON Hextuple result is, in fact, JSON""" d = Dataset() trig_data = """ PREFIX ex: @@ -139,9 +160,35 @@ def test_hext_json_representation(): assert isinstance(j, list) +def test_hext_dataset_linecount(): + d = Dataset() + assert len(d) == 0 + d.parse( + Path(__file__).parent / "test_parser_hext_multigraph.ndjson", + format="hext", + publicID=d.default_context.identifier + ) + total_triples = 0 + # count all the triples in the Dataset + for context in d.contexts(): + for triple in context.triples((None, None, None)): + total_triples += 1 + assert total_triples == 18 + + # count the number of serialized Hextuples, should be 22, as per the original file + lc = len(d.serialize(format="hext").splitlines()) + assert lc == 22 + + def test_roundtrip(): - d = Dataset().parse(Path(__file__).parent / "test_parser_hext_01.ndjson", format="hext") - with open(str(Path(__file__).parent / "test_parser_hext_01.ndjson")) as i: + d = Dataset() + d.parse( + Path(__file__).parent / "test_parser_hext_multigraph.ndjson", + format="hext", + publicID=d.default_context.identifier + ) + d.default_union = True + with open(str(Path(__file__).parent / "test_parser_hext_multigraph.ndjson")) as i: ordered_input = "".join(sorted(i.readlines())).strip() ordered_output = "\n".join(sorted(d.serialize(format="hext").split("\n"))).strip() @@ -174,4 +221,3 @@ def test_roundtrip(): # if __name__ == "__main__": test_roundtrip() - diff --git a/test/test_serialize_longturtle.py b/test/test_serializer_longturtle.py similarity index 100% rename from test/test_serialize_longturtle.py rename to test/test_serializer_longturtle.py diff --git a/test/test_serialize_trix.py b/test/test_serializer_trix.py similarity index 100% rename from test/test_serialize_trix.py rename to test/test_serializer_trix.py diff --git a/test/test_serialize_turtle.py b/test/test_serializer_turtle.py similarity index 100% rename from test/test_serialize_turtle.py rename to test/test_serializer_turtle.py diff --git a/test/test_serialize_xml.py b/test/test_serializer_xml.py similarity index 100% rename from test/test_serialize_xml.py rename to test/test_serializer_xml.py From 75b797e0aacdd098d5ad7c863fc628d468f4dfe9 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 16:15:51 +1000 Subject: [PATCH 16/25] isinstance() instead of type() --- rdflib/plugins/serializers/hext.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index cec695b2d..456dd0144 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -2,7 +2,7 @@ HextuplesSerializer RDF graph serializer for RDFLib. See for details about the format. """ -from typing import IO, TYPE_CHECKING, Optional, Union +from typing import IO, Optional, Union from rdflib.graph import Graph, ConjunctiveGraph from rdflib.term import Literal, URIRef, Node, BNode from rdflib.serializer import Serializer @@ -56,20 +56,20 @@ def serialize( stream.write(hl.encode()) def _hex_line(self, triple, context): - if type(triple[0]) in [URIRef, BNode]: # exclude QuotedGraph and other objects + if isinstance(triple[0], (URIRef, BNode)): # exclude QuotedGraph and other objects # value value = triple[2] \ - if type(triple[2]) == Literal \ + if isinstance(triple[2], Literal) \ else self._iri_or_bn(triple[2]) # datatype - if type(triple[2]) == URIRef: + if isinstance(triple[2], URIRef): # datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#namedNode" datatype = "globalId" - elif type(triple[2]) == BNode: + elif isinstance(triple[2], BNode): # datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#blankNode" datatype = "localId" - elif type(triple[2]) == Literal: + elif isinstance(triple[2], Literal): if triple[2].datatype is not None: datatype = f"{triple[2].datatype}" else: @@ -81,7 +81,7 @@ def _hex_line(self, triple, context): return None # can't handle non URI, BN or Literal Object (QuotedGraph) # language - if type(triple[2]) == Literal: + if isinstance(triple[2], Literal): if triple[2].language is not None: language = f"{triple[2].language}" else: @@ -101,9 +101,9 @@ def _hex_line(self, triple, context): return None def _iri_or_bn(self, i_): - if type(i_) == URIRef: + if isinstance(i_, URIRef): return f"{i_}" - elif type(i_) == BNode: + elif isinstance(i_, BNode): return f"{i_.n3()}" else: return None From 15dec4abd49c110ad5249ddf0668a72afd2cd6b4 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 16:20:26 +1000 Subject: [PATCH 17/25] Flake8 style improvements --- rdflib/namespace/__init__.py | 7 +++--- rdflib/plugins/parsers/notation3.py | 36 ++++++++++++++--------------- rdflib/plugins/parsers/ntriples.py | 6 ++--- rdflib/plugins/parsers/trig.py | 6 ++--- 4 files changed, 27 insertions(+), 28 deletions(-) diff --git a/rdflib/namespace/__init__.py b/rdflib/namespace/__init__.py index 0487a55da..68bc17267 100644 --- a/rdflib/namespace/__init__.py +++ b/rdflib/namespace/__init__.py @@ -81,7 +81,6 @@ __all__ = ["is_ncname", "split_uri", "Namespace", "ClosedNamespace", "NamespaceManager"] - logger = logging.getLogger(__name__) @@ -192,7 +191,7 @@ def __getitem__(cls, name, default=None): name = str(name) if str(name).startswith("__"): return super().__getitem__(name, default) - if (cls._warn or cls._fail) and not name in cls: + if (cls._warn or cls._fail) and name not in cls: if cls._fail: raise AttributeError(f"term '{name}' not in namespace '{cls._NS}'") else: @@ -220,7 +219,7 @@ def __contains__(cls, item): if item_str.startswith("__"): return super().__contains__(item) if item_str.startswith(str(cls._NS)): - item_str = item_str[len(str(cls._NS)) :] + item_str = item_str[len(str(cls._NS)):] return any( item_str in c.__annotations__ or item_str in c._extras @@ -419,7 +418,7 @@ def compute_qname(self, uri, generate=True): pl_namespace = get_longest_namespace(self.__strie[namespace], uri) if pl_namespace is not None: namespace = pl_namespace - name = uri[len(namespace) :] + name = uri[len(namespace):] namespace = URIRef(namespace) prefix = self.store.prefix(namespace) # warning multiple prefixes problem diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index cfdc8568d..df2e87580 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -143,7 +143,7 @@ def join(here, there): "colon - with relative '%s'." % (here, there) ) - if here[bcolonl + 1 : bcolonl + 3] == "//": + if here[bcolonl + 1: bcolonl + 3] == "//": bpath = here.find("/", bcolonl + 3) else: bpath = bcolonl + 1 @@ -530,7 +530,7 @@ def sparqlTok(self, tok, argstr, i): assert tok[0] not in _notNameChars # not for punctuation len_tok = len(tok) - if argstr[i : i + len_tok].lower() == tok.lower() and ( + if argstr[i: i + len_tok].lower() == tok.lower() and ( argstr[i + len_tok] in _notQNameChars ): i += len_tok @@ -798,7 +798,7 @@ def verb(self, argstr, i, res): res.append(("->", RDF_type)) return j - if argstr[i : i + 2] == "<=": + if argstr[i: i + 2] == "<=": if self.turtle: self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") @@ -814,7 +814,7 @@ def verb(self, argstr, i, res): res.append(("->", DAML_sameAs)) return i + 1 - if argstr[i : i + 2] == ":=": + if argstr[i: i + 2] == ":=": if self.turtle: self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") @@ -827,7 +827,7 @@ def verb(self, argstr, i, res): res.append(("->", r[0])) return j - if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": + if argstr[i: i + 2] == ">-" or argstr[i: i + 2] == "<-": self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") return -1 @@ -945,7 +945,7 @@ def node(self, argstr, i, res, subjectAlready=None): i = self.skipSpace(argstr, j) if i < 0: self.BadSyntax(argstr, i, "needed '$}', found end.") - if argstr[i : i + 2] == "$}": + if argstr[i: i + 2] == "$}": j = i + 2 break @@ -1068,7 +1068,7 @@ def property_list(self, argstr, i, subj): break i = j + 1 - if argstr[j : j + 2] == ":-": + if argstr[j: j + 2] == ":-": if self.turtle: self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") i = j + 2 @@ -1428,7 +1428,7 @@ def object(self, argstr, i, res): ch = argstr[i] if ch in self.string_delimiters: ch_three = ch * 3 - if argstr[i : i + 3] == ch_three: + if argstr[i: i + 3] == ch_three: delim = ch_three i += 3 else: @@ -1478,7 +1478,7 @@ def nodeOrLiteral(self, argstr, i, res): ch_three = ch * 3 if ch in self.string_delimiters: - if argstr[i : i + 3] == ch_three: + if argstr[i: i + 3] == ch_three: delim = ch_three i += 3 else: @@ -1499,9 +1499,9 @@ def nodeOrLiteral(self, argstr, i, res): "Bad language code syntax on string " + "literal, after @", ) i = m.end() - lang = argstr[j + 1 : i] + lang = argstr[j + 1: i] j = i - if argstr[j : j + 2] == "^^": + if argstr[j: j + 2] == "^^": res2 = [] j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI dt = res2[0] @@ -1535,15 +1535,15 @@ def strconst(self, argstr, i, delim): if ( delim == delim3 ): # done when delim is """ or ''' and, respectively ... - if argstr[j : j + 5] == delim5: # ... we have "" or '' before + if argstr[j: j + 5] == delim5: # ... we have "" or '' before i = j + 5 ustr += delim2 return i, ustr - if argstr[j : j + 4] == delim4: # ... we have " or ' before + if argstr[j: j + 4] == delim4: # ... we have " or ' before i = j + 4 ustr += delim1 return i, ustr - if argstr[j : j + 3] == delim3: # current " or ' is part of delim + if argstr[j: j + 3] == delim3: # current " or ' is part of delim i = j + 3 return i, ustr @@ -1555,8 +1555,8 @@ def strconst(self, argstr, i, delim): m = interesting.search(argstr, j) # was argstr[j:]. # Note for pos param to work, MUST be compiled ... re bug? assert m, "Quote expected in string at ^ in %s^%s" % ( - argstr[j - 20 : j], - argstr[j : j + 20], + argstr[j - 20: j], + argstr[j: j + 20], ) # at least need a quote i = m.start() @@ -1633,7 +1633,7 @@ def _unicodeEscape(self, argstr, i, startline, reg, n, prefix): self._thisDoc, startline, argstr, i, "unterminated string literal(3)" ) try: - return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) + return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i: i + n]) except: raise BadSyntax( self._thisDoc, @@ -1685,7 +1685,7 @@ def __str__(self): self._why, pre, argstr[st:i], - argstr[i : i + 60], + argstr[i: i + 60], post, ) diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index a86bd66f0..a5858bc26 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -65,7 +65,7 @@ def unquote(s): while s: m = r_safe.match(s) if m: - s = s[m.end() :] + s = s[m.end():] result.append(m.group(1)) continue @@ -77,7 +77,7 @@ def unquote(s): m = r_uniquot.match(s) if m: - s = s[m.end() :] + s = s[m.end():] u, U = m.groups() codepoint = int(u or U, 16) if codepoint > 0x10FFFF: @@ -224,7 +224,7 @@ def eat(self, pattern): # print(dir(pattern)) # print repr(self.line), type(self.line) raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line)) - self.line = self.line[m.end() :] + self.line = self.line[m.end():] return m def subject(self, bnode_context=None): diff --git a/rdflib/plugins/parsers/trig.py b/rdflib/plugins/parsers/trig.py index 9caa0662b..1addb747c 100644 --- a/rdflib/plugins/parsers/trig.py +++ b/rdflib/plugins/parsers/trig.py @@ -80,7 +80,7 @@ def graph(self, argstr, i): if j < 0: self.BadSyntax(argstr, i, "EOF found when expected graph") - if argstr[j : j + 1] == "=": # optional = for legacy support + if argstr[j: j + 1] == "=": # optional = for legacy support i = self.skipSpace(argstr, j + 1) if i < 0: @@ -88,7 +88,7 @@ def graph(self, argstr, i): else: i = j - if argstr[i : i + 1] != "{": + if argstr[i: i + 1] != "{": return -1 # the node wasn't part of a graph j = i + 1 @@ -104,7 +104,7 @@ def graph(self, argstr, i): if i < 0: self.BadSyntax(argstr, i, "needed '}', found end.") - if argstr[i : i + 1] == "}": + if argstr[i: i + 1] == "}": j = i + 1 break From 509009aa23a40dfe8b0a989270e82ba15b1e613e Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 16:32:47 +1000 Subject: [PATCH 18/25] match serialize() super function signature --- rdflib/plugins/parsers/hext.py | 3 ++- rdflib/plugins/serializers/hext.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py index 206ca0a3d..e225186dd 100644 --- a/rdflib/plugins/parsers/hext.py +++ b/rdflib/plugins/parsers/hext.py @@ -5,6 +5,7 @@ """ import json +from typing import List from rdflib.parser import Parser from rdflib import ConjunctiveGraph, URIRef, Literal, BNode import warnings @@ -25,7 +26,7 @@ def __init__(self): def _load_json_line(self, line: str): return [x if x != "" else None for x in json.loads(line)] - def _parse_hextuple(self, cg: ConjunctiveGraph, tup: [str]): + def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[str]): # 1 - subject if tup[0].startswith("_"): s = BNode(value=tup[0].replace("_:", "")) diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index 456dd0144..f32a1014c 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -19,7 +19,7 @@ class HextuplesSerializer(Serializer): def __init__(self, store: Union[Graph, ConjunctiveGraph]): self.default_context: Optional[Node] - if store.context_aware: + if type(store) != Graph: self.contexts = list(store.contexts()) if store.default_context: self.default_context = store.default_context @@ -35,12 +35,20 @@ def __init__(self, store: Union[Graph, ConjunctiveGraph]): def serialize( self, stream: IO[bytes], + base: Optional[str] = None, + encoding: Optional[str] = "utf-8", **kwargs ): - if kwargs.get("encoding") not in [None, "utf-8"]: + if base is not None: + warnings.warn( + f"base has no meaning for Hextuples serialization. " + f"I will ignore this value" + ) + + if encoding not in [None, "utf-8"]: warnings.warn( f"Hextuples files are always utf-8 encoded. " - f"I was passed: {kwargs.get('encoding')}, " + f"I was passed: {encoding}, " "but I'm still going to use utf-8 anyway!" ) From a93eb13dcf3b1de9f2589948f4416e4d485499eb Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 16:35:47 +1000 Subject: [PATCH 19/25] blacked parsers & serializers --- rdflib/plugins/parsers/hext.py | 5 ++-- rdflib/plugins/parsers/notation3.py | 36 ++++++++++++++-------------- rdflib/plugins/parsers/ntriples.py | 8 +++---- rdflib/plugins/parsers/trig.py | 6 ++--- rdflib/plugins/serializers/hext.py | 14 +++++++---- rdflib/plugins/serializers/jsonld.py | 2 +- rdflib/plugins/serializers/nquads.py | 2 +- rdflib/plugins/serializers/nt.py | 2 +- rdflib/plugins/serializers/rdfxml.py | 4 ++-- rdflib/plugins/serializers/trig.py | 2 +- rdflib/plugins/serializers/trix.py | 2 +- 11 files changed, 44 insertions(+), 39 deletions(-) diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py index e225186dd..06ae9b1ef 100644 --- a/rdflib/plugins/parsers/hext.py +++ b/rdflib/plugins/parsers/hext.py @@ -62,8 +62,9 @@ def parse(self, source, graph, **kwargs): "but I'm still going to use utf-8" ) - assert graph.store.context_aware, \ - "Hextuples Parser needs a context-aware store!" + assert ( + graph.store.context_aware + ), "Hextuples Parser needs a context-aware store!" cg = ConjunctiveGraph(store=graph.store, identifier=graph.identifier) cg.default_context = graph diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index df2e87580..cfdc8568d 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -143,7 +143,7 @@ def join(here, there): "colon - with relative '%s'." % (here, there) ) - if here[bcolonl + 1: bcolonl + 3] == "//": + if here[bcolonl + 1 : bcolonl + 3] == "//": bpath = here.find("/", bcolonl + 3) else: bpath = bcolonl + 1 @@ -530,7 +530,7 @@ def sparqlTok(self, tok, argstr, i): assert tok[0] not in _notNameChars # not for punctuation len_tok = len(tok) - if argstr[i: i + len_tok].lower() == tok.lower() and ( + if argstr[i : i + len_tok].lower() == tok.lower() and ( argstr[i + len_tok] in _notQNameChars ): i += len_tok @@ -798,7 +798,7 @@ def verb(self, argstr, i, res): res.append(("->", RDF_type)) return j - if argstr[i: i + 2] == "<=": + if argstr[i : i + 2] == "<=": if self.turtle: self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") @@ -814,7 +814,7 @@ def verb(self, argstr, i, res): res.append(("->", DAML_sameAs)) return i + 1 - if argstr[i: i + 2] == ":=": + if argstr[i : i + 2] == ":=": if self.turtle: self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") @@ -827,7 +827,7 @@ def verb(self, argstr, i, res): res.append(("->", r[0])) return j - if argstr[i: i + 2] == ">-" or argstr[i: i + 2] == "<-": + if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") return -1 @@ -945,7 +945,7 @@ def node(self, argstr, i, res, subjectAlready=None): i = self.skipSpace(argstr, j) if i < 0: self.BadSyntax(argstr, i, "needed '$}', found end.") - if argstr[i: i + 2] == "$}": + if argstr[i : i + 2] == "$}": j = i + 2 break @@ -1068,7 +1068,7 @@ def property_list(self, argstr, i, subj): break i = j + 1 - if argstr[j: j + 2] == ":-": + if argstr[j : j + 2] == ":-": if self.turtle: self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") i = j + 2 @@ -1428,7 +1428,7 @@ def object(self, argstr, i, res): ch = argstr[i] if ch in self.string_delimiters: ch_three = ch * 3 - if argstr[i: i + 3] == ch_three: + if argstr[i : i + 3] == ch_three: delim = ch_three i += 3 else: @@ -1478,7 +1478,7 @@ def nodeOrLiteral(self, argstr, i, res): ch_three = ch * 3 if ch in self.string_delimiters: - if argstr[i: i + 3] == ch_three: + if argstr[i : i + 3] == ch_three: delim = ch_three i += 3 else: @@ -1499,9 +1499,9 @@ def nodeOrLiteral(self, argstr, i, res): "Bad language code syntax on string " + "literal, after @", ) i = m.end() - lang = argstr[j + 1: i] + lang = argstr[j + 1 : i] j = i - if argstr[j: j + 2] == "^^": + if argstr[j : j + 2] == "^^": res2 = [] j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI dt = res2[0] @@ -1535,15 +1535,15 @@ def strconst(self, argstr, i, delim): if ( delim == delim3 ): # done when delim is """ or ''' and, respectively ... - if argstr[j: j + 5] == delim5: # ... we have "" or '' before + if argstr[j : j + 5] == delim5: # ... we have "" or '' before i = j + 5 ustr += delim2 return i, ustr - if argstr[j: j + 4] == delim4: # ... we have " or ' before + if argstr[j : j + 4] == delim4: # ... we have " or ' before i = j + 4 ustr += delim1 return i, ustr - if argstr[j: j + 3] == delim3: # current " or ' is part of delim + if argstr[j : j + 3] == delim3: # current " or ' is part of delim i = j + 3 return i, ustr @@ -1555,8 +1555,8 @@ def strconst(self, argstr, i, delim): m = interesting.search(argstr, j) # was argstr[j:]. # Note for pos param to work, MUST be compiled ... re bug? assert m, "Quote expected in string at ^ in %s^%s" % ( - argstr[j - 20: j], - argstr[j: j + 20], + argstr[j - 20 : j], + argstr[j : j + 20], ) # at least need a quote i = m.start() @@ -1633,7 +1633,7 @@ def _unicodeEscape(self, argstr, i, startline, reg, n, prefix): self._thisDoc, startline, argstr, i, "unterminated string literal(3)" ) try: - return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i: i + n]) + return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) except: raise BadSyntax( self._thisDoc, @@ -1685,7 +1685,7 @@ def __str__(self): self._why, pre, argstr[st:i], - argstr[i: i + 60], + argstr[i : i + 60], post, ) diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index a5858bc26..5e9ab7c82 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -65,7 +65,7 @@ def unquote(s): while s: m = r_safe.match(s) if m: - s = s[m.end():] + s = s[m.end() :] result.append(m.group(1)) continue @@ -77,7 +77,7 @@ def unquote(s): m = r_uniquot.match(s) if m: - s = s[m.end():] + s = s[m.end() :] u, U = m.groups() codepoint = int(u or U, 16) if codepoint > 0x10FFFF: @@ -186,7 +186,7 @@ def readline(self): while True: m = r_line.match(self.buffer) if m: # the more likely prospect - self.buffer = self.buffer[m.end():] + self.buffer = self.buffer[m.end() :] return m.group(1) else: buffer = self.file.read(bufsiz) @@ -224,7 +224,7 @@ def eat(self, pattern): # print(dir(pattern)) # print repr(self.line), type(self.line) raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line)) - self.line = self.line[m.end():] + self.line = self.line[m.end() :] return m def subject(self, bnode_context=None): diff --git a/rdflib/plugins/parsers/trig.py b/rdflib/plugins/parsers/trig.py index 1addb747c..9caa0662b 100644 --- a/rdflib/plugins/parsers/trig.py +++ b/rdflib/plugins/parsers/trig.py @@ -80,7 +80,7 @@ def graph(self, argstr, i): if j < 0: self.BadSyntax(argstr, i, "EOF found when expected graph") - if argstr[j: j + 1] == "=": # optional = for legacy support + if argstr[j : j + 1] == "=": # optional = for legacy support i = self.skipSpace(argstr, j + 1) if i < 0: @@ -88,7 +88,7 @@ def graph(self, argstr, i): else: i = j - if argstr[i: i + 1] != "{": + if argstr[i : i + 1] != "{": return -1 # the node wasn't part of a graph j = i + 1 @@ -104,7 +104,7 @@ def graph(self, argstr, i): if i < 0: self.BadSyntax(argstr, i, "needed '}', found end.") - if argstr[i: i + 1] == "}": + if argstr[i : i + 1] == "}": j = i + 1 break diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index f32a1014c..c3a1d5da4 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -37,7 +37,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = "utf-8", - **kwargs + **kwargs, ): if base is not None: warnings.warn( @@ -64,11 +64,15 @@ def serialize( stream.write(hl.encode()) def _hex_line(self, triple, context): - if isinstance(triple[0], (URIRef, BNode)): # exclude QuotedGraph and other objects + if isinstance( + triple[0], (URIRef, BNode) + ): # exclude QuotedGraph and other objects # value - value = triple[2] \ - if isinstance(triple[2], Literal) \ + value = ( + triple[2] + if isinstance(triple[2], Literal) else self._iri_or_bn(triple[2]) + ) # datatype if isinstance(triple[2], URIRef): @@ -103,7 +107,7 @@ def _hex_line(self, triple, context): value, datatype, language, - self._context(context) + self._context(context), ) else: # do not return anything for non-IRIs or BNs, e.g. QuotedGraph, Subjects return None diff --git a/rdflib/plugins/serializers/jsonld.py b/rdflib/plugins/serializers/jsonld.py index f5067e287..d0bdd9078 100644 --- a/rdflib/plugins/serializers/jsonld.py +++ b/rdflib/plugins/serializers/jsonld.py @@ -62,7 +62,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **kwargs + **kwargs, ): # TODO: docstring w. args and return value encoding = encoding or "utf-8" diff --git a/rdflib/plugins/serializers/nquads.py b/rdflib/plugins/serializers/nquads.py index e76c747d4..79479cefc 100644 --- a/rdflib/plugins/serializers/nquads.py +++ b/rdflib/plugins/serializers/nquads.py @@ -25,7 +25,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ): if base is not None: warnings.warn("NQuadsSerializer does not support base.") diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py index dfb73b1f6..34ecc9596 100644 --- a/rdflib/plugins/serializers/nt.py +++ b/rdflib/plugins/serializers/nt.py @@ -28,7 +28,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = "utf-8", - **args + **args, ): if base is not None: warnings.warn("NTSerializer does not support base.") diff --git a/rdflib/plugins/serializers/rdfxml.py b/rdflib/plugins/serializers/rdfxml.py index 901d911d9..7787d60cc 100644 --- a/rdflib/plugins/serializers/rdfxml.py +++ b/rdflib/plugins/serializers/rdfxml.py @@ -46,7 +46,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ): # if base is given here, use that, if not and a base is set for the graph use that if base is not None: @@ -171,7 +171,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ): self.__serialized: Dict[Identifier, int] = {} store = self.store diff --git a/rdflib/plugins/serializers/trig.py b/rdflib/plugins/serializers/trig.py index 5a606e401..3aa9ca689 100644 --- a/rdflib/plugins/serializers/trig.py +++ b/rdflib/plugins/serializers/trig.py @@ -60,7 +60,7 @@ def serialize( base: Optional[str] = None, encoding: Optional[str] = None, spacious: Optional[bool] = None, - **args + **args, ): self.reset() self.stream = stream diff --git a/rdflib/plugins/serializers/trix.py b/rdflib/plugins/serializers/trix.py index 1612d815c..63d58c44d 100644 --- a/rdflib/plugins/serializers/trix.py +++ b/rdflib/plugins/serializers/trix.py @@ -28,7 +28,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ): nm = self.store.namespace_manager From 4d09eb775d9542aba368d4dc6fedeb84a19f8411 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 17:28:19 +1000 Subject: [PATCH 20/25] blacked all files --- rdflib/namespace/__init__.py | 4 ++-- rdflib/parser.py | 3 +-- rdflib/plugins/shared/jsonld/util.py | 2 +- rdflib/serializer.py | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/rdflib/namespace/__init__.py b/rdflib/namespace/__init__.py index 68bc17267..0739e3722 100644 --- a/rdflib/namespace/__init__.py +++ b/rdflib/namespace/__init__.py @@ -219,7 +219,7 @@ def __contains__(cls, item): if item_str.startswith("__"): return super().__contains__(item) if item_str.startswith(str(cls._NS)): - item_str = item_str[len(str(cls._NS)):] + item_str = item_str[len(str(cls._NS)) :] return any( item_str in c.__annotations__ or item_str in c._extras @@ -418,7 +418,7 @@ def compute_qname(self, uri, generate=True): pl_namespace = get_longest_namespace(self.__strie[namespace], uri) if pl_namespace is not None: namespace = pl_namespace - name = uri[len(namespace):] + name = uri[len(namespace) :] namespace = URIRef(namespace) prefix = self.store.prefix(namespace) # warning multiple prefixes problem diff --git a/rdflib/parser.py b/rdflib/parser.py index 2a2875d99..8437a2e72 100644 --- a/rdflib/parser.py +++ b/rdflib/parser.py @@ -341,8 +341,7 @@ def create_input_source( input_source = StringInputSource(data) auto_close = True else: - raise RuntimeError( - f"parse data can only str, or bytes. not: {type(data)}") + raise RuntimeError(f"parse data can only str, or bytes. not: {type(data)}") if input_source is None: raise Exception("could not create InputSource") diff --git a/rdflib/plugins/shared/jsonld/util.py b/rdflib/plugins/shared/jsonld/util.py index 707b9ce24..cf71742f2 100644 --- a/rdflib/plugins/shared/jsonld/util.py +++ b/rdflib/plugins/shared/jsonld/util.py @@ -88,4 +88,4 @@ def context_from_urlinputsource(source): if ' rel="http://www.w3.org/ns/json-ld#context"' in link: i, j = link.index("<"), link.index(">") if i > -1 and j > -1: - return urljoin(source.url, link[i + 1: j]) + return urljoin(source.url, link[i + 1 : j]) diff --git a/rdflib/serializer.py b/rdflib/serializer.py index 74f29544b..15a91d968 100644 --- a/rdflib/serializer.py +++ b/rdflib/serializer.py @@ -30,7 +30,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ) -> None: """Abstract method""" From de1e07763854c10eaa1ecdad699f0c15dcbd2843 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 17:46:35 +1000 Subject: [PATCH 21/25] Flak8 improvements --- rdflib/compare.py | 3 +- rdflib/namespace/__init__.py | 4 +-- rdflib/plugins/parsers/jsonld.py | 2 +- rdflib/plugins/parsers/notation3.py | 46 +++++++++++-------------- rdflib/plugins/parsers/ntriples.py | 8 ++--- rdflib/plugins/parsers/trig.py | 6 ++-- rdflib/plugins/serializers/hext.py | 4 +-- rdflib/plugins/serializers/jsonld.py | 20 +++++------ rdflib/plugins/serializers/nt.py | 2 +- rdflib/plugins/serializers/rdfxml.py | 2 +- rdflib/plugins/shared/jsonld/context.py | 2 +- rdflib/plugins/shared/jsonld/util.py | 2 +- rdflib/plugins/stores/sparqlstore.py | 4 +-- rdflib/query.py | 3 +- rdflib/term.py | 2 +- rdflib/tools/rdf2dot.py | 8 ++--- rdflib/util.py | 4 +-- 17 files changed, 57 insertions(+), 65 deletions(-) diff --git a/rdflib/compare.py b/rdflib/compare.py index 16994fa71..2d97fac36 100644 --- a/rdflib/compare.py +++ b/rdflib/compare.py @@ -366,7 +366,7 @@ def _refine(self, coloring: List[Color], sequence: List[Color]) -> List[Color]: coloring.extend(colors) try: si = sequence.index(c) - sequence = sequence[:si] + colors + sequence[si + 1 :] + sequence = sequence[:si] + colors + sequence[si + 1:] except ValueError: sequence = colors[1:] + sequence combined_colors: List[Color] = [] @@ -428,7 +428,6 @@ def _traces( candidates = self._get_candidates(coloring) best: List[List[Color]] = [] best_score = None - best_experimental = None best_experimental_score = None last_coloring = None generator: Dict[Node, Set[Node]] = defaultdict(set) diff --git a/rdflib/namespace/__init__.py b/rdflib/namespace/__init__.py index 0739e3722..68bc17267 100644 --- a/rdflib/namespace/__init__.py +++ b/rdflib/namespace/__init__.py @@ -219,7 +219,7 @@ def __contains__(cls, item): if item_str.startswith("__"): return super().__contains__(item) if item_str.startswith(str(cls._NS)): - item_str = item_str[len(str(cls._NS)) :] + item_str = item_str[len(str(cls._NS)):] return any( item_str in c.__annotations__ or item_str in c._extras @@ -418,7 +418,7 @@ def compute_qname(self, uri, generate=True): pl_namespace = get_longest_namespace(self.__strie[namespace], uri) if pl_namespace is not None: namespace = pl_namespace - name = uri[len(namespace) :] + name = uri[len(namespace):] namespace = URIRef(namespace) prefix = self.store.prefix(namespace) # warning multiple prefixes problem diff --git a/rdflib/plugins/parsers/jsonld.py b/rdflib/plugins/parsers/jsonld.py index 77fa8b4dc..c816947a4 100644 --- a/rdflib/plugins/parsers/jsonld.py +++ b/rdflib/plugins/parsers/jsonld.py @@ -94,7 +94,7 @@ def parse(self, source, sink, **kwargs): if encoding not in ("utf-8", "utf-16"): warnings.warn( "JSON should be encoded as unicode. " - + "Given encoding was: %s" % encoding + "Given encoding was: %s" % encoding ) base = kwargs.get("base") or sink.absolutize( diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index cfdc8568d..9d39e970a 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -43,7 +43,6 @@ from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id from rdflib.graph import QuotedGraph, ConjunctiveGraph, Graph from rdflib.compat import long_type -from rdflib.compat import narrow_build __all__ = [ "BadSyntax", @@ -143,7 +142,7 @@ def join(here, there): "colon - with relative '%s'." % (here, there) ) - if here[bcolonl + 1 : bcolonl + 3] == "//": + if here[bcolonl + 1: bcolonl + 3] == "//": bpath = here.find("/", bcolonl + 3) else: bpath = bcolonl + 1 @@ -327,7 +326,7 @@ def unicodeExpand(m): N3CommentCharacter = "#" # For unix script # ! compatibility -########################################## Parse string to sink +# Parse string to sink # # Regular expressions: eol = re.compile(r"[ \t]*(#[^\n]*)?\r?\n") # end of line, poss. w/comment @@ -530,7 +529,7 @@ def sparqlTok(self, tok, argstr, i): assert tok[0] not in _notNameChars # not for punctuation len_tok = len(tok) - if argstr[i : i + len_tok].lower() == tok.lower() and ( + if argstr[i: i + len_tok].lower() == tok.lower() and ( argstr[i + len_tok] in _notQNameChars ): i += len_tok @@ -604,10 +603,7 @@ def directive(self, argstr, i): self.BadSyntax( argstr, j, - "With no base URI, cannot use " - + "relative URI in @prefix <" - + ns - + ">", + f"With no base URI, cannot use relative URI in @prefix <{ns}>", ) assert ":" in ns # must be absolute self._bindings[t[0][0]] = ns @@ -798,7 +794,7 @@ def verb(self, argstr, i, res): res.append(("->", RDF_type)) return j - if argstr[i : i + 2] == "<=": + if argstr[i: i + 2] == "<=": if self.turtle: self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") @@ -814,7 +810,7 @@ def verb(self, argstr, i, res): res.append(("->", DAML_sameAs)) return i + 1 - if argstr[i : i + 2] == ":=": + if argstr[i: i + 2] == ":=": if self.turtle: self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") @@ -827,7 +823,7 @@ def verb(self, argstr, i, res): res.append(("->", r[0])) return j - if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": + if argstr[i: i + 2] == ">-" or argstr[i: i + 2] == "<-": self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") return -1 @@ -945,7 +941,7 @@ def node(self, argstr, i, res, subjectAlready=None): i = self.skipSpace(argstr, j) if i < 0: self.BadSyntax(argstr, i, "needed '$}', found end.") - if argstr[i : i + 2] == "$}": + if argstr[i: i + 2] == "$}": j = i + 2 break @@ -1068,7 +1064,7 @@ def property_list(self, argstr, i, subj): break i = j + 1 - if argstr[j : j + 2] == ":-": + if argstr[j: j + 2] == ":-": if self.turtle: self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") i = j + 2 @@ -1428,7 +1424,7 @@ def object(self, argstr, i, res): ch = argstr[i] if ch in self.string_delimiters: ch_three = ch * 3 - if argstr[i : i + 3] == ch_three: + if argstr[i: i + 3] == ch_three: delim = ch_three i += 3 else: @@ -1478,7 +1474,7 @@ def nodeOrLiteral(self, argstr, i, res): ch_three = ch * 3 if ch in self.string_delimiters: - if argstr[i : i + 3] == ch_three: + if argstr[i: i + 3] == ch_three: delim = ch_three i += 3 else: @@ -1499,9 +1495,9 @@ def nodeOrLiteral(self, argstr, i, res): "Bad language code syntax on string " + "literal, after @", ) i = m.end() - lang = argstr[j + 1 : i] + lang = argstr[j + 1: i] j = i - if argstr[j : j + 2] == "^^": + if argstr[j: j + 2] == "^^": res2 = [] j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI dt = res2[0] @@ -1535,15 +1531,15 @@ def strconst(self, argstr, i, delim): if ( delim == delim3 ): # done when delim is """ or ''' and, respectively ... - if argstr[j : j + 5] == delim5: # ... we have "" or '' before + if argstr[j: j + 5] == delim5: # ... we have "" or '' before i = j + 5 ustr += delim2 return i, ustr - if argstr[j : j + 4] == delim4: # ... we have " or ' before + if argstr[j: j + 4] == delim4: # ... we have " or ' before i = j + 4 ustr += delim1 return i, ustr - if argstr[j : j + 3] == delim3: # current " or ' is part of delim + if argstr[j: j + 3] == delim3: # current " or ' is part of delim i = j + 3 return i, ustr @@ -1555,8 +1551,8 @@ def strconst(self, argstr, i, delim): m = interesting.search(argstr, j) # was argstr[j:]. # Note for pos param to work, MUST be compiled ... re bug? assert m, "Quote expected in string at ^ in %s^%s" % ( - argstr[j - 20 : j], - argstr[j : j + 20], + argstr[j - 20: j], + argstr[j: j + 20], ) # at least need a quote i = m.start() @@ -1633,14 +1629,14 @@ def _unicodeEscape(self, argstr, i, startline, reg, n, prefix): self._thisDoc, startline, argstr, i, "unterminated string literal(3)" ) try: - return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) + return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i: i + n]) except: raise BadSyntax( self._thisDoc, startline, argstr, i, - "bad string literal hex escape: " + argstr[i : i + n], + "bad string literal hex escape: " + argstr[i: i + n], ) def uEscape(self, argstr, i, startline): @@ -1685,7 +1681,7 @@ def __str__(self): self._why, pre, argstr[st:i], - argstr[i : i + 60], + argstr[i: i + 60], post, ) diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index 5e9ab7c82..a5858bc26 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -65,7 +65,7 @@ def unquote(s): while s: m = r_safe.match(s) if m: - s = s[m.end() :] + s = s[m.end():] result.append(m.group(1)) continue @@ -77,7 +77,7 @@ def unquote(s): m = r_uniquot.match(s) if m: - s = s[m.end() :] + s = s[m.end():] u, U = m.groups() codepoint = int(u or U, 16) if codepoint > 0x10FFFF: @@ -186,7 +186,7 @@ def readline(self): while True: m = r_line.match(self.buffer) if m: # the more likely prospect - self.buffer = self.buffer[m.end() :] + self.buffer = self.buffer[m.end():] return m.group(1) else: buffer = self.file.read(bufsiz) @@ -224,7 +224,7 @@ def eat(self, pattern): # print(dir(pattern)) # print repr(self.line), type(self.line) raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line)) - self.line = self.line[m.end() :] + self.line = self.line[m.end():] return m def subject(self, bnode_context=None): diff --git a/rdflib/plugins/parsers/trig.py b/rdflib/plugins/parsers/trig.py index 9caa0662b..1addb747c 100644 --- a/rdflib/plugins/parsers/trig.py +++ b/rdflib/plugins/parsers/trig.py @@ -80,7 +80,7 @@ def graph(self, argstr, i): if j < 0: self.BadSyntax(argstr, i, "EOF found when expected graph") - if argstr[j : j + 1] == "=": # optional = for legacy support + if argstr[j: j + 1] == "=": # optional = for legacy support i = self.skipSpace(argstr, j + 1) if i < 0: @@ -88,7 +88,7 @@ def graph(self, argstr, i): else: i = j - if argstr[i : i + 1] != "{": + if argstr[i: i + 1] != "{": return -1 # the node wasn't part of a graph j = i + 1 @@ -104,7 +104,7 @@ def graph(self, argstr, i): if i < 0: self.BadSyntax(argstr, i, "needed '}', found end.") - if argstr[i : i + 1] == "}": + if argstr[i: i + 1] == "}": j = i + 1 break diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index c3a1d5da4..858e2d4e8 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -41,8 +41,8 @@ def serialize( ): if base is not None: warnings.warn( - f"base has no meaning for Hextuples serialization. " - f"I will ignore this value" + "base has no meaning for Hextuples serialization. " + "I will ignore this value" ) if encoding not in [None, "utf-8"]: diff --git a/rdflib/plugins/serializers/jsonld.py b/rdflib/plugins/serializers/jsonld.py index d0bdd9078..002f04f75 100644 --- a/rdflib/plugins/serializers/jsonld.py +++ b/rdflib/plugins/serializers/jsonld.py @@ -364,18 +364,18 @@ def to_raw_value(self, graph, s, o, nodemap): else: return v - def to_collection(self, graph, l): - if l != RDF.nil and not graph.value(l, RDF.first): + def to_collection(self, graph, l_): + if l_ != RDF.nil and not graph.value(l_, RDF.first): return None list_nodes = [] - chain = set([l]) - while l: - if l == RDF.nil: + chain = set([l_]) + while l_: + if l_ == RDF.nil: return list_nodes - if isinstance(l, URIRef): + if isinstance(l_, URIRef): return None first, rest = None, None - for p, o in graph.predicate_objects(l): + for p, o in graph.predicate_objects(l_): if not first and p == RDF.first: first = o elif not rest and p == RDF.rest: @@ -383,7 +383,7 @@ def to_collection(self, graph, l): elif p != RDF.type or o != RDF.List: return None list_nodes.append(first) - l = rest - if l in chain: + l_ = rest + if l_ in chain: return None - chain.add(l) + chain.add(l_) diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py index 34ecc9596..e1b60e436 100644 --- a/rdflib/plugins/serializers/nt.py +++ b/rdflib/plugins/serializers/nt.py @@ -95,7 +95,7 @@ def _replace_single(c): fmt = "\\u%04X" if c <= 0xFFFF else "\\U%08X" return fmt % c - string = err.object[err.start : err.end] + string = err.object[err.start: err.end] return "".join(_replace_single(c) for c in string), err.end diff --git a/rdflib/plugins/serializers/rdfxml.py b/rdflib/plugins/serializers/rdfxml.py index 7787d60cc..790c23939 100644 --- a/rdflib/plugins/serializers/rdfxml.py +++ b/rdflib/plugins/serializers/rdfxml.py @@ -1,4 +1,4 @@ -from typing import IO, Dict, Optional, Set, cast +from typing import IO, Dict, Optional, Set from rdflib.plugins.serializers.xmlwriter import XMLWriter from rdflib.namespace import Namespace, RDF, RDFS # , split_uri diff --git a/rdflib/plugins/shared/jsonld/context.py b/rdflib/plugins/shared/jsonld/context.py index 30bb02eab..e11bb447b 100644 --- a/rdflib/plugins/shared/jsonld/context.py +++ b/rdflib/plugins/shared/jsonld/context.py @@ -329,7 +329,7 @@ def shrink_iri(self, iri): if str(iri) == self._base: return "" elif iri.startswith(self._basedomain): - return iri[len(self._basedomain) :] + return iri[len(self._basedomain):] return iri def to_symbol(self, iri): diff --git a/rdflib/plugins/shared/jsonld/util.py b/rdflib/plugins/shared/jsonld/util.py index cf71742f2..707b9ce24 100644 --- a/rdflib/plugins/shared/jsonld/util.py +++ b/rdflib/plugins/shared/jsonld/util.py @@ -88,4 +88,4 @@ def context_from_urlinputsource(source): if ' rel="http://www.w3.org/ns/json-ld#context"' in link: i, j = link.index("<"), link.index(">") if i > -1 and j > -1: - return urljoin(source.url, link[i + 1 : j]) + return urljoin(source.url, link[i + 1: j]) diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index aed43088c..566360498 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -782,12 +782,12 @@ def _insert_named_graph(self, query, query_graph): if match.group("block_start") is not None: level += 1 if level == 1: - modified_query.append(query[pos : match.end()]) + modified_query.append(query[pos: match.end()]) modified_query.append(graph_block_open) pos = match.end() elif match.group("block_end") is not None: if level == 1: - since_previous_pos = query[pos : match.start()] + since_previous_pos = query[pos: match.start()] if modified_query[-1] is graph_block_open and ( since_previous_pos == "" or since_previous_pos.isspace() ): diff --git a/rdflib/query.py b/rdflib/query.py index 0bc842841..dd9117084 100644 --- a/rdflib/query.py +++ b/rdflib/query.py @@ -6,7 +6,7 @@ import types from typing import IO, TYPE_CHECKING, List, Optional, Union, cast -from io import BytesIO, BufferedIOBase +from io import BytesIO from urllib.parse import urlparse @@ -333,7 +333,6 @@ def __eq__(self, other): return self.vars == other.vars and self.bindings == other.bindings else: return self.graph == other.graph - except: return False diff --git a/rdflib/term.py b/rdflib/term.py index 796a76b34..fb7acd638 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -315,7 +315,7 @@ def de_skolemize(self): """ if isinstance(self, RDFLibGenid): parsed_uri = urlparse("%s" % self) - return BNode(value=parsed_uri.path[len(rdflib_skolem_genid) :]) + return BNode(value=parsed_uri.path[len(rdflib_skolem_genid):]) elif isinstance(self, Genid): bnode_id = "%s" % self if bnode_id in skolems: diff --git a/rdflib/tools/rdf2dot.py b/rdflib/tools/rdf2dot.py index b6bed525b..afaabd318 100644 --- a/rdflib/tools/rdf2dot.py +++ b/rdflib/tools/rdf2dot.py @@ -94,12 +94,10 @@ def node(x): return nodes[x] def label(x, g): - for labelProp in LABEL_PROPERTIES: - l = g.value(x, labelProp) - if l: - return l - + l_ = g.value(x, labelProp) + if l_: + return l_ try: return g.namespace_manager.compute_qname(x)[2] except: diff --git a/rdflib/util.py b/rdflib/util.py index 182c1fe3e..15c372885 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -176,7 +176,7 @@ def from_n3(s: str, default=None, backend=None, nsm=None): else: quotes = '"' value, rest = s.rsplit(quotes, 1) - value = value[len(quotes) :] # strip leading quotes + value = value[len(quotes):] # strip leading quotes datatype = None language = None @@ -187,7 +187,7 @@ def from_n3(s: str, default=None, backend=None, nsm=None): # datatype has to come after lang-tag so ignore everything before # see: http://www.w3.org/TR/2011/WD-turtle-20110809/ # #prod-turtle2-RDFLiteral - datatype = from_n3(rest[dtoffset + 2 :], default, backend, nsm) + datatype = from_n3(rest[dtoffset + 2:], default, backend, nsm) else: if rest.startswith("@"): language = rest[1:] # strip leading at sign From 2de6cf2aa3e4610613a10648f7c2d05629c8a37a Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 18:34:21 +1000 Subject: [PATCH 22/25] backing all files --- rdflib/__init__.py | 2 +- rdflib/compare.py | 2 +- rdflib/namespace/__init__.py | 4 +-- rdflib/plugins/parsers/notation3.py | 38 ++++++++++++------------- rdflib/plugins/parsers/ntriples.py | 8 +++--- rdflib/plugins/parsers/trig.py | 6 ++-- rdflib/plugins/serializers/nt.py | 2 +- rdflib/plugins/shared/jsonld/context.py | 2 +- rdflib/plugins/shared/jsonld/util.py | 2 +- rdflib/plugins/stores/sparqlstore.py | 4 +-- rdflib/term.py | 2 +- rdflib/util.py | 4 +-- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/rdflib/__init__.py b/rdflib/__init__.py index 9b8daeb0e..105418611 100644 --- a/rdflib/__init__.py +++ b/rdflib/__init__.py @@ -45,7 +45,7 @@ __docformat__ = "restructuredtext en" # The format of the __version__ line is matched by a regex in setup.py -__version__ = "6.03a" +__version__ = "6.0.3" __date__ = "2021-10-10" __all__ = [ diff --git a/rdflib/compare.py b/rdflib/compare.py index 2d97fac36..f82564390 100644 --- a/rdflib/compare.py +++ b/rdflib/compare.py @@ -366,7 +366,7 @@ def _refine(self, coloring: List[Color], sequence: List[Color]) -> List[Color]: coloring.extend(colors) try: si = sequence.index(c) - sequence = sequence[:si] + colors + sequence[si + 1:] + sequence = sequence[:si] + colors + sequence[si + 1 :] except ValueError: sequence = colors[1:] + sequence combined_colors: List[Color] = [] diff --git a/rdflib/namespace/__init__.py b/rdflib/namespace/__init__.py index 68bc17267..0739e3722 100644 --- a/rdflib/namespace/__init__.py +++ b/rdflib/namespace/__init__.py @@ -219,7 +219,7 @@ def __contains__(cls, item): if item_str.startswith("__"): return super().__contains__(item) if item_str.startswith(str(cls._NS)): - item_str = item_str[len(str(cls._NS)):] + item_str = item_str[len(str(cls._NS)) :] return any( item_str in c.__annotations__ or item_str in c._extras @@ -418,7 +418,7 @@ def compute_qname(self, uri, generate=True): pl_namespace = get_longest_namespace(self.__strie[namespace], uri) if pl_namespace is not None: namespace = pl_namespace - name = uri[len(namespace):] + name = uri[len(namespace) :] namespace = URIRef(namespace) prefix = self.store.prefix(namespace) # warning multiple prefixes problem diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index 9d39e970a..31b20036a 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -142,7 +142,7 @@ def join(here, there): "colon - with relative '%s'." % (here, there) ) - if here[bcolonl + 1: bcolonl + 3] == "//": + if here[bcolonl + 1 : bcolonl + 3] == "//": bpath = here.find("/", bcolonl + 3) else: bpath = bcolonl + 1 @@ -529,7 +529,7 @@ def sparqlTok(self, tok, argstr, i): assert tok[0] not in _notNameChars # not for punctuation len_tok = len(tok) - if argstr[i: i + len_tok].lower() == tok.lower() and ( + if argstr[i : i + len_tok].lower() == tok.lower() and ( argstr[i + len_tok] in _notQNameChars ): i += len_tok @@ -794,7 +794,7 @@ def verb(self, argstr, i, res): res.append(("->", RDF_type)) return j - if argstr[i: i + 2] == "<=": + if argstr[i : i + 2] == "<=": if self.turtle: self.BadSyntax(argstr, i, "Found '<=' in Turtle mode. ") @@ -810,7 +810,7 @@ def verb(self, argstr, i, res): res.append(("->", DAML_sameAs)) return i + 1 - if argstr[i: i + 2] == ":=": + if argstr[i : i + 2] == ":=": if self.turtle: self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") @@ -823,7 +823,7 @@ def verb(self, argstr, i, res): res.append(("->", r[0])) return j - if argstr[i: i + 2] == ">-" or argstr[i: i + 2] == "<-": + if argstr[i : i + 2] == ">-" or argstr[i : i + 2] == "<-": self.BadSyntax(argstr, j, ">- ... -> syntax is obsolete.") return -1 @@ -941,7 +941,7 @@ def node(self, argstr, i, res, subjectAlready=None): i = self.skipSpace(argstr, j) if i < 0: self.BadSyntax(argstr, i, "needed '$}', found end.") - if argstr[i: i + 2] == "$}": + if argstr[i : i + 2] == "$}": j = i + 2 break @@ -1064,7 +1064,7 @@ def property_list(self, argstr, i, subj): break i = j + 1 - if argstr[j: j + 2] == ":-": + if argstr[j : j + 2] == ":-": if self.turtle: self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") i = j + 2 @@ -1424,7 +1424,7 @@ def object(self, argstr, i, res): ch = argstr[i] if ch in self.string_delimiters: ch_three = ch * 3 - if argstr[i: i + 3] == ch_three: + if argstr[i : i + 3] == ch_three: delim = ch_three i += 3 else: @@ -1474,7 +1474,7 @@ def nodeOrLiteral(self, argstr, i, res): ch_three = ch * 3 if ch in self.string_delimiters: - if argstr[i: i + 3] == ch_three: + if argstr[i : i + 3] == ch_three: delim = ch_three i += 3 else: @@ -1495,9 +1495,9 @@ def nodeOrLiteral(self, argstr, i, res): "Bad language code syntax on string " + "literal, after @", ) i = m.end() - lang = argstr[j + 1: i] + lang = argstr[j + 1 : i] j = i - if argstr[j: j + 2] == "^^": + if argstr[j : j + 2] == "^^": res2 = [] j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI dt = res2[0] @@ -1531,15 +1531,15 @@ def strconst(self, argstr, i, delim): if ( delim == delim3 ): # done when delim is """ or ''' and, respectively ... - if argstr[j: j + 5] == delim5: # ... we have "" or '' before + if argstr[j : j + 5] == delim5: # ... we have "" or '' before i = j + 5 ustr += delim2 return i, ustr - if argstr[j: j + 4] == delim4: # ... we have " or ' before + if argstr[j : j + 4] == delim4: # ... we have " or ' before i = j + 4 ustr += delim1 return i, ustr - if argstr[j: j + 3] == delim3: # current " or ' is part of delim + if argstr[j : j + 3] == delim3: # current " or ' is part of delim i = j + 3 return i, ustr @@ -1551,8 +1551,8 @@ def strconst(self, argstr, i, delim): m = interesting.search(argstr, j) # was argstr[j:]. # Note for pos param to work, MUST be compiled ... re bug? assert m, "Quote expected in string at ^ in %s^%s" % ( - argstr[j - 20: j], - argstr[j: j + 20], + argstr[j - 20 : j], + argstr[j : j + 20], ) # at least need a quote i = m.start() @@ -1629,14 +1629,14 @@ def _unicodeEscape(self, argstr, i, startline, reg, n, prefix): self._thisDoc, startline, argstr, i, "unterminated string literal(3)" ) try: - return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i: i + n]) + return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) except: raise BadSyntax( self._thisDoc, startline, argstr, i, - "bad string literal hex escape: " + argstr[i: i + n], + "bad string literal hex escape: " + argstr[i : i + n], ) def uEscape(self, argstr, i, startline): @@ -1681,7 +1681,7 @@ def __str__(self): self._why, pre, argstr[st:i], - argstr[i: i + 60], + argstr[i : i + 60], post, ) diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index a5858bc26..5e9ab7c82 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -65,7 +65,7 @@ def unquote(s): while s: m = r_safe.match(s) if m: - s = s[m.end():] + s = s[m.end() :] result.append(m.group(1)) continue @@ -77,7 +77,7 @@ def unquote(s): m = r_uniquot.match(s) if m: - s = s[m.end():] + s = s[m.end() :] u, U = m.groups() codepoint = int(u or U, 16) if codepoint > 0x10FFFF: @@ -186,7 +186,7 @@ def readline(self): while True: m = r_line.match(self.buffer) if m: # the more likely prospect - self.buffer = self.buffer[m.end():] + self.buffer = self.buffer[m.end() :] return m.group(1) else: buffer = self.file.read(bufsiz) @@ -224,7 +224,7 @@ def eat(self, pattern): # print(dir(pattern)) # print repr(self.line), type(self.line) raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line)) - self.line = self.line[m.end():] + self.line = self.line[m.end() :] return m def subject(self, bnode_context=None): diff --git a/rdflib/plugins/parsers/trig.py b/rdflib/plugins/parsers/trig.py index 1addb747c..9caa0662b 100644 --- a/rdflib/plugins/parsers/trig.py +++ b/rdflib/plugins/parsers/trig.py @@ -80,7 +80,7 @@ def graph(self, argstr, i): if j < 0: self.BadSyntax(argstr, i, "EOF found when expected graph") - if argstr[j: j + 1] == "=": # optional = for legacy support + if argstr[j : j + 1] == "=": # optional = for legacy support i = self.skipSpace(argstr, j + 1) if i < 0: @@ -88,7 +88,7 @@ def graph(self, argstr, i): else: i = j - if argstr[i: i + 1] != "{": + if argstr[i : i + 1] != "{": return -1 # the node wasn't part of a graph j = i + 1 @@ -104,7 +104,7 @@ def graph(self, argstr, i): if i < 0: self.BadSyntax(argstr, i, "needed '}', found end.") - if argstr[i: i + 1] == "}": + if argstr[i : i + 1] == "}": j = i + 1 break diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py index e1b60e436..34ecc9596 100644 --- a/rdflib/plugins/serializers/nt.py +++ b/rdflib/plugins/serializers/nt.py @@ -95,7 +95,7 @@ def _replace_single(c): fmt = "\\u%04X" if c <= 0xFFFF else "\\U%08X" return fmt % c - string = err.object[err.start: err.end] + string = err.object[err.start : err.end] return "".join(_replace_single(c) for c in string), err.end diff --git a/rdflib/plugins/shared/jsonld/context.py b/rdflib/plugins/shared/jsonld/context.py index e11bb447b..30bb02eab 100644 --- a/rdflib/plugins/shared/jsonld/context.py +++ b/rdflib/plugins/shared/jsonld/context.py @@ -329,7 +329,7 @@ def shrink_iri(self, iri): if str(iri) == self._base: return "" elif iri.startswith(self._basedomain): - return iri[len(self._basedomain):] + return iri[len(self._basedomain) :] return iri def to_symbol(self, iri): diff --git a/rdflib/plugins/shared/jsonld/util.py b/rdflib/plugins/shared/jsonld/util.py index 707b9ce24..cf71742f2 100644 --- a/rdflib/plugins/shared/jsonld/util.py +++ b/rdflib/plugins/shared/jsonld/util.py @@ -88,4 +88,4 @@ def context_from_urlinputsource(source): if ' rel="http://www.w3.org/ns/json-ld#context"' in link: i, j = link.index("<"), link.index(">") if i > -1 and j > -1: - return urljoin(source.url, link[i + 1: j]) + return urljoin(source.url, link[i + 1 : j]) diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index 566360498..aed43088c 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -782,12 +782,12 @@ def _insert_named_graph(self, query, query_graph): if match.group("block_start") is not None: level += 1 if level == 1: - modified_query.append(query[pos: match.end()]) + modified_query.append(query[pos : match.end()]) modified_query.append(graph_block_open) pos = match.end() elif match.group("block_end") is not None: if level == 1: - since_previous_pos = query[pos: match.start()] + since_previous_pos = query[pos : match.start()] if modified_query[-1] is graph_block_open and ( since_previous_pos == "" or since_previous_pos.isspace() ): diff --git a/rdflib/term.py b/rdflib/term.py index fb7acd638..796a76b34 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -315,7 +315,7 @@ def de_skolemize(self): """ if isinstance(self, RDFLibGenid): parsed_uri = urlparse("%s" % self) - return BNode(value=parsed_uri.path[len(rdflib_skolem_genid):]) + return BNode(value=parsed_uri.path[len(rdflib_skolem_genid) :]) elif isinstance(self, Genid): bnode_id = "%s" % self if bnode_id in skolems: diff --git a/rdflib/util.py b/rdflib/util.py index 15c372885..182c1fe3e 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -176,7 +176,7 @@ def from_n3(s: str, default=None, backend=None, nsm=None): else: quotes = '"' value, rest = s.rsplit(quotes, 1) - value = value[len(quotes):] # strip leading quotes + value = value[len(quotes) :] # strip leading quotes datatype = None language = None @@ -187,7 +187,7 @@ def from_n3(s: str, default=None, backend=None, nsm=None): # datatype has to come after lang-tag so ignore everything before # see: http://www.w3.org/TR/2011/WD-turtle-20110809/ # #prod-turtle2-RDFLiteral - datatype = from_n3(rest[dtoffset + 2:], default, backend, nsm) + datatype = from_n3(rest[dtoffset + 2 :], default, backend, nsm) else: if rest.startswith("@"): language = rest[1:] # strip leading at sign From 8e8071c8d1ce9db2216a7cdf83643027870417e1 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 19:40:59 +1000 Subject: [PATCH 23/25] ignore Flake Error W505 as soon it won't be considered an error --- .flake8 | 2 ++ rdflib/plugins/parsers/hext.py | 4 ++-- rdflib/plugins/serializers/xmlwriter.py | 4 ++-- test/test_parser_hext.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.flake8 b/.flake8 index 177f450c8..e44a31d27 100644 --- a/.flake8 +++ b/.flake8 @@ -4,3 +4,5 @@ extend-ignore = # E501: line too long # Disabled so that black can control line length. E501, + # Ignored since this is soon not going to be considered an error, see https://www.flake8rules.com/rules/W503.html + W503, diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py index 06ae9b1ef..d30641ca2 100644 --- a/rdflib/plugins/parsers/hext.py +++ b/rdflib/plugins/parsers/hext.py @@ -5,7 +5,7 @@ """ import json -from typing import List +from typing import List, Optional from rdflib.parser import Parser from rdflib import ConjunctiveGraph, URIRef, Literal, BNode import warnings @@ -26,7 +26,7 @@ def __init__(self): def _load_json_line(self, line: str): return [x if x != "" else None for x in json.loads(line)] - def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[str]): + def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Optional[str, None]]): # 1 - subject if tup[0].startswith("_"): s = BNode(value=tup[0].replace("_:", "")) diff --git a/rdflib/plugins/serializers/xmlwriter.py b/rdflib/plugins/serializers/xmlwriter.py index b0f1a06e9..99d1e7677 100644 --- a/rdflib/plugins/serializers/xmlwriter.py +++ b/rdflib/plugins/serializers/xmlwriter.py @@ -100,8 +100,8 @@ def qname(self, uri): for pre, ns in self.extra_ns.items(): if uri.startswith(ns): if pre != "": - return ":".join(pre, uri[len(ns) :]) + return ":".join(pre, uri[len(ns):]) else: - return uri[len(ns) :] + return uri[len(ns):] return self.nm.qname_strict(uri) diff --git a/test/test_parser_hext.py b/test/test_parser_hext.py index 50d09fc05..3253922f9 100644 --- a/test/test_parser_hext.py +++ b/test/test_parser_hext.py @@ -111,4 +111,4 @@ def test_roundtrip(): if __name__ == "__main__": - test_roundtrip() + test_small_file_multigraph() From 0891e2eab5ddf1ba2609edf7c74a2398fa2440c5 Mon Sep 17 00:00:00 2001 From: nicholascar Date: Tue, 7 Dec 2021 20:32:10 +1000 Subject: [PATCH 24/25] more Flak8 improvements --- rdflib/plugins/parsers/hext.py | 4 ++-- rdflib/plugins/serializers/nquads.py | 5 ++++- rdflib/plugins/serializers/nt.py | 5 ++++- test/test_prefixTypes.py | 7 +++---- test/test_preflabel.py | 18 +++++++++--------- test/test_swap_n3.py | 1 - test/test_term.py | 8 ++++---- 7 files changed, 26 insertions(+), 22 deletions(-) diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py index d30641ca2..b61fc0e43 100644 --- a/rdflib/plugins/parsers/hext.py +++ b/rdflib/plugins/parsers/hext.py @@ -5,7 +5,7 @@ """ import json -from typing import List, Optional +from typing import List, Union from rdflib.parser import Parser from rdflib import ConjunctiveGraph, URIRef, Literal, BNode import warnings @@ -26,7 +26,7 @@ def __init__(self): def _load_json_line(self, line: str): return [x if x != "" else None for x in json.loads(line)] - def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Optional[str, None]]): + def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]): # 1 - subject if tup[0].startswith("_"): s = BNode(value=tup[0].replace("_:", "")) diff --git a/rdflib/plugins/serializers/nquads.py b/rdflib/plugins/serializers/nquads.py index 79479cefc..ce2d2f7c3 100644 --- a/rdflib/plugins/serializers/nquads.py +++ b/rdflib/plugins/serializers/nquads.py @@ -30,7 +30,10 @@ def serialize( if base is not None: warnings.warn("NQuadsSerializer does not support base.") if encoding is not None and encoding.lower() != self.encoding.lower(): - warnings.warn("NQuadsSerializer does not use custom encoding.") + warnings.warn( + "NQuadsSerializer does not use custom encoding." + + "Given encoding was: %s" % encoding + ) encoding = self.encoding for context in self.store.contexts(): for triple in context: diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py index 34ecc9596..7862528d5 100644 --- a/rdflib/plugins/serializers/nt.py +++ b/rdflib/plugins/serializers/nt.py @@ -33,7 +33,10 @@ def serialize( if base is not None: warnings.warn("NTSerializer does not support base.") if encoding != "utf-8": - warnings.warn("NTSerializer always uses UTF-8 encoding.") + warnings.warn( + "NTSerializer always uses UTF-8 encoding." + + "Given encoding was: %s" % encoding + ) for triple in self.store: stream.write(_nt_row(triple).encode()) diff --git a/test/test_prefixTypes.py b/test/test_prefixTypes.py index 045c6056d..57408d4f1 100644 --- a/test/test_prefixTypes.py +++ b/test/test_prefixTypes.py @@ -25,10 +25,9 @@ class PrefixTypesTest(unittest.TestCase): """ def test(self): - s = graph.serialize(format="n3", encoding="latin-1") - print(s) - self.assertTrue(b"foaf:Document" in s) - self.assertTrue(b"xsd:date" in s) + s = graph.serialize(format="n3") + self.assertTrue("foaf:Document" in s) + self.assertTrue("xsd:date" in s) if __name__ == "__main__": diff --git a/test/test_preflabel.py b/test/test_preflabel.py index 77ecbcd3d..39057d48e 100644 --- a/test/test_preflabel.py +++ b/test/test_preflabel.py @@ -10,9 +10,9 @@ class TestPrefLabel(unittest.TestCase): def setUp(self): self.g = ConjunctiveGraph() - self.u = URIRef("http://example.com/foo") - self.g.add([self.u, RDFS.label, Literal("foo")]) - self.g.add([self.u, RDFS.label, Literal("bar")]) + self.u = URIRef("https://example.com/foo") + self.g.add((self.u, RDFS.label, Literal("foo"))) + self.g.add((self.u, RDFS.label, Literal("bar"))) def test_default_label_sorting(self): res = sorted(self.g.preferredLabel(self.u)) @@ -29,7 +29,7 @@ def test_default_label_sorting(self): self.assertEqual(res, tgt) def test_default_preflabel_sorting(self): - self.g.add([self.u, SKOS.prefLabel, Literal("bla")]) + self.g.add((self.u, SKOS.prefLabel, Literal("bla"))) res = self.g.preferredLabel(self.u) tgt = [ ( @@ -40,8 +40,8 @@ def test_default_preflabel_sorting(self): self.assertEqual(res, tgt) def test_preflabel_lang_sorting_no_lang_attr(self): - self.g.add([self.u, SKOS.prefLabel, Literal("bla")]) - self.g.add([self.u, SKOS.prefLabel, Literal("blubb", lang="en")]) + self.g.add((self.u, SKOS.prefLabel, Literal("bla"))) + self.g.add((self.u, SKOS.prefLabel, Literal("blubb", lang="en"))) res = sorted(self.g.preferredLabel(self.u)) tgt = [ ( @@ -57,8 +57,8 @@ def test_preflabel_lang_sorting_no_lang_attr(self): self.assertEqual(res, tgt) def test_preflabel_lang_sorting_empty_lang_attr(self): - self.g.add([self.u, SKOS.prefLabel, Literal("bla")]) - self.g.add([self.u, SKOS.prefLabel, Literal("blubb", lang="en")]) + self.g.add((self.u, SKOS.prefLabel, Literal("bla"))) + self.g.add((self.u, SKOS.prefLabel, Literal("blubb", lang="en"))) res = self.g.preferredLabel(self.u, lang="") tgt = [ ( @@ -69,7 +69,7 @@ def test_preflabel_lang_sorting_empty_lang_attr(self): self.assertEqual(res, tgt) def test_preflabel_lang_sorting_en_lang_attr(self): - self.g.add([self.u, SKOS.prefLabel, Literal("blubb", lang="en")]) + self.g.add((self.u, SKOS.prefLabel, Literal("blubb", lang="en"))) res = self.g.preferredLabel(self.u, lang="en") tgt = [ ( diff --git a/test/test_swap_n3.py b/test/test_swap_n3.py index 1734806cb..c0b596cdd 100644 --- a/test/test_swap_n3.py +++ b/test/test_swap_n3.py @@ -1,6 +1,5 @@ import os import sys -import unittest import pytest diff --git a/test/test_term.py b/test/test_term.py index e154bd4eb..0f9dbea21 100644 --- a/test/test_term.py +++ b/test/test_term.py @@ -74,16 +74,16 @@ def test_total_order(self): sorted(literals) orderable = True except TypeError as e: - for l in literals: - print(repr(l), repr(l.value)) + for l_ in literals: + print(repr(l_), repr(l_.value)) print(e) orderable = False self.assertTrue(orderable) # also make sure that within a datetime things are still ordered: l1 = [ - Literal(l, datatype=XSD.dateTime) - for l in [ + Literal(l_, datatype=XSD.dateTime) + for l_ in [ "2001-01-01T00:00:00", "2001-01-01T01:00:00", "2001-01-01T01:00:01", From dabcacf3bfda83715e3bc4afb85d10e7ae2630ce Mon Sep 17 00:00:00 2001 From: nicholascar Date: Thu, 9 Dec 2021 15:29:15 +1000 Subject: [PATCH 25/25] fixing MyPy errors --- rdflib/plugins/parsers/hext.py | 8 ++++++++ rdflib/plugins/serializers/hext.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py index b61fc0e43..59e045cf4 100644 --- a/rdflib/plugins/parsers/hext.py +++ b/rdflib/plugins/parsers/hext.py @@ -27,7 +27,14 @@ def _load_json_line(self, line: str): return [x if x != "" else None for x in json.loads(line)] def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]): + # all values check + # subject, predicate, value, datatype cannot be None + # language and graph may be None + if tup[0] is None or tup[1] is None or tup[2] is None or tup[3] is None: + raise ValueError("subject, predicate, value, datatype cannot be None") + # 1 - subject + s: Union[URIRef, BNode] if tup[0].startswith("_"): s = BNode(value=tup[0].replace("_:", "")) else: @@ -37,6 +44,7 @@ def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]): p = URIRef(tup[1]) # 3 - value + o: Union[URIRef, BNode, Literal] if tup[3] == "globalId": o = URIRef(tup[2]) elif tup[3] == "localId": diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py index 858e2d4e8..c86882a2b 100644 --- a/rdflib/plugins/serializers/hext.py +++ b/rdflib/plugins/serializers/hext.py @@ -19,7 +19,7 @@ class HextuplesSerializer(Serializer): def __init__(self, store: Union[Graph, ConjunctiveGraph]): self.default_context: Optional[Node] - if type(store) != Graph: + if isinstance(store, ConjunctiveGraph): self.contexts = list(store.contexts()) if store.default_context: self.default_context = store.default_context