diff --git a/.flake8 b/.flake8 index 177f450c8..e44a31d27 100644 --- a/.flake8 +++ b/.flake8 @@ -4,3 +4,5 @@ extend-ignore = # E501: line too long # Disabled so that black can control line length. E501, + # Ignored since this is soon not going to be considered an error, see https://www.flake8rules.com/rules/W503.html + W503, diff --git a/docs/plugin_parsers.rst b/docs/plugin_parsers.rst index 81ab7ae6b..ed351c0ce 100644 --- a/docs/plugin_parsers.rst +++ b/docs/plugin_parsers.rst @@ -13,24 +13,35 @@ The ``html`` parser will auto-detect RDFa, HTurtle or Microdata. It is also possible to pass a mime-type for the ``format`` parameter:: - graph.parse(my_url, format='application/rdf+xml') + graph.parse(my_url, format='application/rdf+xml') If you are not sure what format your file will be, you can use :func:`rdflib.util.guess_format` which will guess based on the file extension. ========= ==================================================================== Name Class ========= ==================================================================== +json-ld :class:`~rdflib.plugins.parsers.jsonld.JsonLDParser` +hext :class:`~rdflib.plugins.parsers.hext.HextuplesParser` html :class:`~rdflib.plugins.parsers.structureddata.StructuredDataParser` -hturtle :class:`~rdflib.plugins.parsers.hturtle.HTurtleParser` -mdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser` -microdata :class:`~rdflib.plugins.parsers.structureddata.MicrodataParser` n3 :class:`~rdflib.plugins.parsers.notation3.N3Parser` nquads :class:`~rdflib.plugins.parsers.nquads.NQuadsParser` nt :class:`~rdflib.plugins.parsers.ntriples.NTParser` -rdfa :class:`~rdflib.plugins.parsers.structureddata.RDFaParser` -rdfa1.0 :class:`~rdflib.plugins.parsers.structureddata.RDFa10Parser` -rdfa1.1 :class:`~rdflib.plugins.parsers.structureddata.RDFaParser` trix :class:`~rdflib.plugins.parsers.trix.TriXParser` turtle :class:`~rdflib.plugins.parsers.notation3.TurtleParser` xml :class:`~rdflib.plugins.parsers.rdfxml.RDFXMLParser` ========= ==================================================================== + +Multi-graph IDs +--------------- +Note that for correct parsing of multi-graph data, e.g. Trig, HexT, etc., into a ``ConjunctiveGraph`` or a ``Dataset``, +as opposed to a context-unaware ``Graph``, you will need to set the ``publicID`` of the ``ConjunctiveGraph`` a +``Dataset`` to the identifier of the ``default_context`` (default graph), for example:: + + d = Dataset() + d.parse( + data=""" ... """, + format="trig", + publicID=d.default_context.identifier + ) + +(from the file tests/test_serializer_hext.py) diff --git a/docs/plugin_serializers.rst b/docs/plugin_serializers.rst index 0aedd566f..249b0aede 100644 --- a/docs/plugin_serializers.rst +++ b/docs/plugin_serializers.rst @@ -11,18 +11,35 @@ passing the name to a graph's :meth:`~rdflib.graph.Graph.serialize` method:: It is also possible to pass a mime-type for the ``format`` parameter:: - graph.serialize(my_url, format='application/rdf+xml') + graph.serialize(my_url, format='application/rdf+xml') ========== =============================================================== Name Class ========== =============================================================== +json-ld :class:`~rdflib.plugins.serializers.jsonld.JsonLDSerializer` n3 :class:`~rdflib.plugins.serializers.n3.N3Serializer` nquads :class:`~rdflib.plugins.serializers.nquads.NQuadsSerializer` nt :class:`~rdflib.plugins.serializers.nt.NTSerializer` +hext :class:`~rdflib.plugins.serializers.hext.HextuplesSerializer` pretty-xml :class:`~rdflib.plugins.serializers.rdfxml.PrettyXMLSerializer` trig :class:`~rdflib.plugins.serializers.trig.TrigSerializer` trix :class:`~rdflib.plugins.serializers.trix.TriXSerializer` turtle :class:`~rdflib.plugins.serializers.turtle.TurtleSerializer` +longturtle :class:`~rdflib.plugins.serializers.turtle.LongTurtleSerializer` xml :class:`~rdflib.plugins.serializers.rdfxml.XMLSerializer` ========== =============================================================== + +JSON-LD +------- +JSON-LD - 'json-ld' - has been incorprated in rdflib since v6.0.0. + +HexTuples +--------- +The HexTuples Serializer - 'hext' - uses the HexTuples format defined at https://github.com/ontola/hextuples. + +For serialization of non-context-aware data sources, e.g. a single ``Graph``, the 'graph' field (6th variable in the +Hextuple) will be an empty string. + +For context-aware (multi-graph) serialization, the 'graph' field of the default graph will be an empty string and +the values for other graphs will be Blank Node IDs or IRIs. diff --git a/rdflib/__init__.py b/rdflib/__init__.py index 9b8daeb0e..105418611 100644 --- a/rdflib/__init__.py +++ b/rdflib/__init__.py @@ -45,7 +45,7 @@ __docformat__ = "restructuredtext en" # The format of the __version__ line is matched by a regex in setup.py -__version__ = "6.03a" +__version__ = "6.0.3" __date__ = "2021-10-10" __all__ = [ diff --git a/rdflib/compare.py b/rdflib/compare.py index 16994fa71..f82564390 100644 --- a/rdflib/compare.py +++ b/rdflib/compare.py @@ -428,7 +428,6 @@ def _traces( candidates = self._get_candidates(coloring) best: List[List[Color]] = [] best_score = None - best_experimental = None best_experimental_score = None last_coloring = None generator: Dict[Node, Set[Node]] = defaultdict(set) diff --git a/rdflib/namespace/__init__.py b/rdflib/namespace/__init__.py index 4b76366f6..8ce4de79b 100644 --- a/rdflib/namespace/__init__.py +++ b/rdflib/namespace/__init__.py @@ -81,7 +81,6 @@ __all__ = ["is_ncname", "split_uri", "Namespace", "ClosedNamespace", "NamespaceManager"] - logger = logging.getLogger(__name__) @@ -192,7 +191,7 @@ def __getitem__(cls, name, default=None): name = str(name) if str(name).startswith("__"): return super().__getitem__(name, default) - if (cls._warn or cls._fail) and not name in cls: + if (cls._warn or cls._fail) and name not in cls: if cls._fail: raise AttributeError(f"term '{name}' not in namespace '{cls._NS}'") else: diff --git a/rdflib/parser.py b/rdflib/parser.py index 2a2875d99..8437a2e72 100644 --- a/rdflib/parser.py +++ b/rdflib/parser.py @@ -341,8 +341,7 @@ def create_input_source( input_source = StringInputSource(data) auto_close = True else: - raise RuntimeError( - f"parse data can only str, or bytes. not: {type(data)}") + raise RuntimeError(f"parse data can only str, or bytes. not: {type(data)}") if input_source is None: raise Exception("could not create InputSource") diff --git a/rdflib/plugin.py b/rdflib/plugin.py index c5ffd0939..b7edbc624 100644 --- a/rdflib/plugin.py +++ b/rdflib/plugin.py @@ -169,16 +169,54 @@ def plugins( yield p -# Register Store Plugins -register("default", Store, "rdflib.plugins.stores.memory", "Memory") -register("Memory", Store, "rdflib.plugins.stores.memory", "Memory") -register("SimpleMemory", Store, "rdflib.plugins.stores.memory", "SimpleMemory") -register("Auditable", Store, "rdflib.plugins.stores.auditable", "AuditableStore") -register("Concurrent", Store, "rdflib.plugins.stores.concurrent", "ConcurrentStore") -register("BerkeleyDB", Store, "rdflib.plugins.stores.berkeleydb", "BerkeleyDB") -register("SPARQLStore", Store, "rdflib.plugins.stores.sparqlstore", "SPARQLStore") +# Register Stores register( - "SPARQLUpdateStore", Store, "rdflib.plugins.stores.sparqlstore", "SPARQLUpdateStore" + "default", + Store, + "rdflib.plugins.stores.memory", + "Memory", +) +register( + "Memory", + Store, + "rdflib.plugins.stores.memory", + "Memory", +) +register( + "SimpleMemory", + Store, + "rdflib.plugins.stores.memory", + "SimpleMemory", +) +register( + "Auditable", + Store, + "rdflib.plugins.stores.auditable", + "AuditableStore", +) +register( + "Concurrent", + Store, + "rdflib.plugins.stores.concurrent", + "ConcurrentStore", +) +register( + "BerkeleyDB", + Store, + "rdflib.plugins.stores.berkeleydb", + "BerkeleyDB", +) +register( + "SPARQLStore", + Store, + "rdflib.plugins.stores.sparqlstore", + "SPARQLStore", +) +register( + "SPARQLUpdateStore", + Store, + "rdflib.plugins.stores.sparqlstore", + "SPARQLUpdateStore", ) # Register Triple Serializers @@ -188,25 +226,84 @@ def plugins( "rdflib.plugins.serializers.rdfxml", "XMLSerializer", ) -register("xml", Serializer, "rdflib.plugins.serializers.rdfxml", "XMLSerializer") register( - "pretty-xml", Serializer, "rdflib.plugins.serializers.rdfxml", "PrettyXMLSerializer" + "xml", + Serializer, + "rdflib.plugins.serializers.rdfxml", + "XMLSerializer", +) +register( + "pretty-xml", + Serializer, + "rdflib.plugins.serializers.rdfxml", + "PrettyXMLSerializer", +) +register( + "text/n3", + Serializer, + "rdflib.plugins.serializers.n3", + "N3Serializer", +) +register( + "n3", + Serializer, + "rdflib.plugins.serializers.n3", + "N3Serializer", +) +register( + "text/turtle", + Serializer, + "rdflib.plugins.serializers.turtle", + "TurtleSerializer", +) +register( + "turtle", + Serializer, + "rdflib.plugins.serializers.turtle", + "TurtleSerializer", +) +register( + "ttl", + Serializer, + "rdflib.plugins.serializers.turtle", + "TurtleSerializer", ) -register("text/n3", Serializer, "rdflib.plugins.serializers.n3", "N3Serializer") -register("n3", Serializer, "rdflib.plugins.serializers.n3", "N3Serializer") register( - "text/turtle", Serializer, "rdflib.plugins.serializers.turtle", "TurtleSerializer" + "longturtle", + Serializer, + "rdflib.plugins.serializers.longturtle", + "LongTurtleSerializer", ) -register("turtle", Serializer, "rdflib.plugins.serializers.turtle", "TurtleSerializer") -register("turtle2", Serializer, "rdflib.plugins.serializers.turtle2", "TurtleSerializer2") -register("ttl", Serializer, "rdflib.plugins.serializers.turtle", "TurtleSerializer") register( - "application/n-triples", Serializer, "rdflib.plugins.serializers.nt", "NTSerializer" + "application/n-triples", + Serializer, + "rdflib.plugins.serializers.nt", + "NTSerializer", +) +register( + "ntriples", + Serializer, + "rdflib.plugins.serializers.nt", + "NTSerializer", +) +register( + "nt", + Serializer, + "rdflib.plugins.serializers.nt", + "NTSerializer", +) +register( + "nt11", + Serializer, + "rdflib.plugins.serializers.nt", + "NT11Serializer", +) +register( + "json-ld", + Serializer, + "rdflib.plugins.serializers.jsonld", + "JsonLDSerializer", ) -register("ntriples", Serializer, "rdflib.plugins.serializers.nt", "NTSerializer") -register("nt", Serializer, "rdflib.plugins.serializers.nt", "NTSerializer") -register("nt11", Serializer, "rdflib.plugins.serializers.nt", "NT11Serializer") -register("json-ld", Serializer, "rdflib.plugins.serializers.jsonld", "JsonLDSerializer") register( "application/ld+json", Serializer, @@ -221,43 +318,180 @@ def plugins( "rdflib.plugins.serializers.nquads", "NQuadsSerializer", ) -register("nquads", Serializer, "rdflib.plugins.serializers.nquads", "NQuadsSerializer") register( - "application/trix", Serializer, "rdflib.plugins.serializers.trix", "TriXSerializer" + "nquads", + Serializer, + "rdflib.plugins.serializers.nquads", + "NQuadsSerializer", ) -register("trix", Serializer, "rdflib.plugins.serializers.trix", "TriXSerializer") register( - "application/trig", Serializer, "rdflib.plugins.serializers.trig", "TrigSerializer" + "application/trix", + Serializer, + "rdflib.plugins.serializers.trix", + "TriXSerializer", +) +register( + "trix", + Serializer, + "rdflib.plugins.serializers.trix", + "TriXSerializer", +) +register( + "application/trig", + Serializer, + "rdflib.plugins.serializers.trig", + "TrigSerializer", +) +register( + "trig", + Serializer, + "rdflib.plugins.serializers.trig", + "TrigSerializer", +) +register( + "hext", + Serializer, + "rdflib.plugins.serializers.hext", + "HextuplesSerializer", ) -register("trig", Serializer, "rdflib.plugins.serializers.trig", "TrigSerializer") # Register Triple Parsers -register("application/rdf+xml", Parser, "rdflib.plugins.parsers.rdfxml", "RDFXMLParser") -register("xml", Parser, "rdflib.plugins.parsers.rdfxml", "RDFXMLParser") -register("text/n3", Parser, "rdflib.plugins.parsers.notation3", "N3Parser") -register("n3", Parser, "rdflib.plugins.parsers.notation3", "N3Parser") -register("text/turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser") -register("turtle", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser") -register("ttl", Parser, "rdflib.plugins.parsers.notation3", "TurtleParser") -register("application/n-triples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") -register("ntriples", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") -register("nt", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") -register("nt11", Parser, "rdflib.plugins.parsers.ntriples", "NTParser") -register("application/ld+json", Parser, "rdflib.plugins.parsers.jsonld", "JsonLDParser") -register("json-ld", Parser, "rdflib.plugins.parsers.jsonld", "JsonLDParser") - +register( + "application/rdf+xml", + Parser, + "rdflib.plugins.parsers.rdfxml", + "RDFXMLParser", +) +register( + "xml", + Parser, + "rdflib.plugins.parsers.rdfxml", + "RDFXMLParser", +) +register( + "text/n3", + Parser, + "rdflib.plugins.parsers.notation3", + "N3Parser", +) +register( + "n3", + Parser, + "rdflib.plugins.parsers.notation3", + "N3Parser", +) +register( + "text/turtle", + Parser, + "rdflib.plugins.parsers.notation3", + "TurtleParser", +) +register( + "turtle", + Parser, + "rdflib.plugins.parsers.notation3", + "TurtleParser", +) +register( + "ttl", + Parser, + "rdflib.plugins.parsers.notation3", + "TurtleParser", +) +register( + "application/n-triples", + Parser, + "rdflib.plugins.parsers.ntriples", + "NTParser", +) +register( + "ntriples", + Parser, + "rdflib.plugins.parsers.ntriples", + "NTParser", +) +register( + "nt", + Parser, + "rdflib.plugins.parsers.ntriples", + "NTParser", +) +register( + "nt11", + Parser, + "rdflib.plugins.parsers.ntriples", + "NTParser", +) +register( + "application/ld+json", + Parser, + "rdflib.plugins.parsers.jsonld", + "JsonLDParser", +) +register( + "json-ld", + Parser, + "rdflib.plugins.parsers.jsonld", + "JsonLDParser", +) # Register Quad Parsers -register("application/n-quads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser") -register("nquads", Parser, "rdflib.plugins.parsers.nquads", "NQuadsParser") -register("application/trix", Parser, "rdflib.plugins.parsers.trix", "TriXParser") -register("trix", Parser, "rdflib.plugins.parsers.trix", "TriXParser") -register("application/trig", Parser, "rdflib.plugins.parsers.trig", "TrigParser") -register("trig", Parser, "rdflib.plugins.parsers.trig", "TrigParser") +register( + "application/n-quads", + Parser, + "rdflib.plugins.parsers.nquads", + "NQuadsParser", +) +register( + "nquads", + Parser, + "rdflib.plugins.parsers.nquads", + "NQuadsParser", +) +register( + "application/trix", + Parser, + "rdflib.plugins.parsers.trix", + "TriXParser", +) +register( + "trix", + Parser, + "rdflib.plugins.parsers.trix", + "TriXParser", +) +register( + "application/trig", + Parser, + "rdflib.plugins.parsers.trig", + "TrigParser", +) +register( + "trig", + Parser, + "rdflib.plugins.parsers.trig", + "TrigParser", +) +register( + "hext", + Parser, + "rdflib.plugins.parsers.hext", + "HextuplesParser", +) # Register SPARQL Processors -register("sparql", Result, "rdflib.plugins.sparql.processor", "SPARQLResult") -register("sparql", Processor, "rdflib.plugins.sparql.processor", "SPARQLProcessor") +register( + "sparql", + Result, + "rdflib.plugins.sparql.processor", + "SPARQLResult", +) +register( + "sparql", + Processor, + "rdflib.plugins.sparql.processor", + "SPARQLProcessor", +) register( "sparql", UpdateProcessor, @@ -311,7 +545,10 @@ def plugins( # Register SPARQL Result Parsers register( - "xml", ResultParser, "rdflib.plugins.sparql.results.xmlresults", "XMLResultParser" + "xml", + ResultParser, + "rdflib.plugins.sparql.results.xmlresults", + "XMLResultParser", ) register( "application/sparql-results+xml", @@ -344,7 +581,10 @@ def plugins( "JSONResultParser", ) register( - "csv", ResultParser, "rdflib.plugins.sparql.results.csvresults", "CSVResultParser" + "csv", + ResultParser, + "rdflib.plugins.sparql.results.csvresults", + "CSVResultParser", ) register( "text/csv", @@ -353,7 +593,10 @@ def plugins( "CSVResultParser", ) register( - "tsv", ResultParser, "rdflib.plugins.sparql.results.tsvresults", "TSVResultParser" + "tsv", + ResultParser, + "rdflib.plugins.sparql.results.tsvresults", + "TSVResultParser", ) register( "text/tab-separated-values", diff --git a/rdflib/plugins/parsers/hext.py b/rdflib/plugins/parsers/hext.py new file mode 100644 index 000000000..59e045cf4 --- /dev/null +++ b/rdflib/plugins/parsers/hext.py @@ -0,0 +1,88 @@ +""" +This is a rdflib plugin for parsing Hextuple files, which are Newline-Delimited JSON +(ndjson) files, into Conjunctive. The store that backs the graph *must* be able to +handle contexts, i.e. multiple graphs. +""" +import json + +from typing import List, Union +from rdflib.parser import Parser +from rdflib import ConjunctiveGraph, URIRef, Literal, BNode +import warnings + + +__all__ = ["HextuplesParser"] + + +class HextuplesParser(Parser): + """ + An RDFLib parser for Hextuples + + """ + + def __init__(self): + pass + + def _load_json_line(self, line: str): + return [x if x != "" else None for x in json.loads(line)] + + def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]): + # all values check + # subject, predicate, value, datatype cannot be None + # language and graph may be None + if tup[0] is None or tup[1] is None or tup[2] is None or tup[3] is None: + raise ValueError("subject, predicate, value, datatype cannot be None") + + # 1 - subject + s: Union[URIRef, BNode] + if tup[0].startswith("_"): + s = BNode(value=tup[0].replace("_:", "")) + else: + s = URIRef(tup[0]) + + # 2 - predicate + p = URIRef(tup[1]) + + # 3 - value + o: Union[URIRef, BNode, Literal] + if tup[3] == "globalId": + o = URIRef(tup[2]) + elif tup[3] == "localId": + o = BNode(value=tup[2].replace("_:", "")) + else: # literal + if tup[4] is None: + o = Literal(tup[2], datatype=URIRef(tup[3])) + else: + o = Literal(tup[2], lang=tup[4]) + + # 6 - context + if tup[5] is not None: + c = URIRef(tup[5]) + cg.add((s, p, o, c)) + else: + cg.add((s, p, o)) + + def parse(self, source, graph, **kwargs): + if kwargs.get("encoding") not in [None, "utf-8"]: + warnings.warn( + f"Hextuples files are always utf-8 encoded, " + f"I was passed: {kwargs.get('encoding')}, " + "but I'm still going to use utf-8" + ) + + assert ( + graph.store.context_aware + ), "Hextuples Parser needs a context-aware store!" + + cg = ConjunctiveGraph(store=graph.store, identifier=graph.identifier) + cg.default_context = graph + + # handle different source types - only file and string (data) for now + if hasattr(source, "file"): + with open(source.file.name) as fp: + for l in fp: + self._parse_hextuple(cg, self._load_json_line(l)) + elif hasattr(source, "_InputSource__bytefile"): + if hasattr(source._InputSource__bytefile, "wrapped"): + for l in source._InputSource__bytefile.wrapped.strip().splitlines(): + self._parse_hextuple(cg, self._load_json_line(l)) diff --git a/rdflib/plugins/parsers/jsonld.py b/rdflib/plugins/parsers/jsonld.py index 77fa8b4dc..c816947a4 100644 --- a/rdflib/plugins/parsers/jsonld.py +++ b/rdflib/plugins/parsers/jsonld.py @@ -94,7 +94,7 @@ def parse(self, source, sink, **kwargs): if encoding not in ("utf-8", "utf-16"): warnings.warn( "JSON should be encoded as unicode. " - + "Given encoding was: %s" % encoding + "Given encoding was: %s" % encoding ) base = kwargs.get("base") or sink.absolutize( diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index cfdc8568d..31b20036a 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -43,7 +43,6 @@ from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id from rdflib.graph import QuotedGraph, ConjunctiveGraph, Graph from rdflib.compat import long_type -from rdflib.compat import narrow_build __all__ = [ "BadSyntax", @@ -327,7 +326,7 @@ def unicodeExpand(m): N3CommentCharacter = "#" # For unix script # ! compatibility -########################################## Parse string to sink +# Parse string to sink # # Regular expressions: eol = re.compile(r"[ \t]*(#[^\n]*)?\r?\n") # end of line, poss. w/comment @@ -604,10 +603,7 @@ def directive(self, argstr, i): self.BadSyntax( argstr, j, - "With no base URI, cannot use " - + "relative URI in @prefix <" - + ns - + ">", + f"With no base URI, cannot use relative URI in @prefix <{ns}>", ) assert ":" in ns # must be absolute self._bindings[t[0][0]] = ns diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index 9a66df7f4..5e9ab7c82 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -54,7 +54,6 @@ def triple(self, s, p, o): def unquote(s): """Unquote an N-Triples string.""" if not validate: - if isinstance(s, str): # nquads s = decodeUnicodeEscape(s) else: diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py new file mode 100644 index 000000000..c86882a2b --- /dev/null +++ b/rdflib/plugins/serializers/hext.py @@ -0,0 +1,131 @@ +""" +HextuplesSerializer RDF graph serializer for RDFLib. +See for details about the format. +""" +from typing import IO, Optional, Union +from rdflib.graph import Graph, ConjunctiveGraph +from rdflib.term import Literal, URIRef, Node, BNode +from rdflib.serializer import Serializer +from rdflib.namespace import RDF, XSD +import warnings + +__all__ = ["HextuplesSerializer"] + + +class HextuplesSerializer(Serializer): + """ + Serializes RDF graphs to NTriples format. + """ + + def __init__(self, store: Union[Graph, ConjunctiveGraph]): + self.default_context: Optional[Node] + if isinstance(store, ConjunctiveGraph): + self.contexts = list(store.contexts()) + if store.default_context: + self.default_context = store.default_context + self.contexts.append(store.default_context) + else: + self.default_context = None + else: + self.contexts = [store] + self.default_context = None + + Serializer.__init__(self, store) + + def serialize( + self, + stream: IO[bytes], + base: Optional[str] = None, + encoding: Optional[str] = "utf-8", + **kwargs, + ): + if base is not None: + warnings.warn( + "base has no meaning for Hextuples serialization. " + "I will ignore this value" + ) + + if encoding not in [None, "utf-8"]: + warnings.warn( + f"Hextuples files are always utf-8 encoded. " + f"I was passed: {encoding}, " + "but I'm still going to use utf-8 anyway!" + ) + + if self.store.formula_aware is True: + raise Exception( + "Hextuple serialization can't (yet) handle formula-aware stores" + ) + + for context in self.contexts: + for triple in context: + hl = self._hex_line(triple, context) + if hl is not None: + stream.write(hl.encode()) + + def _hex_line(self, triple, context): + if isinstance( + triple[0], (URIRef, BNode) + ): # exclude QuotedGraph and other objects + # value + value = ( + triple[2] + if isinstance(triple[2], Literal) + else self._iri_or_bn(triple[2]) + ) + + # datatype + if isinstance(triple[2], URIRef): + # datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#namedNode" + datatype = "globalId" + elif isinstance(triple[2], BNode): + # datatype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#blankNode" + datatype = "localId" + elif isinstance(triple[2], Literal): + if triple[2].datatype is not None: + datatype = f"{triple[2].datatype}" + else: + if triple[2].language is not None: # language + datatype = RDF.langString + else: + datatype = XSD.string + else: + return None # can't handle non URI, BN or Literal Object (QuotedGraph) + + # language + if isinstance(triple[2], Literal): + if triple[2].language is not None: + language = f"{triple[2].language}" + else: + language = "" + else: + language = "" + + return '["%s", "%s", "%s", "%s", "%s", "%s"]\n' % ( + self._iri_or_bn(triple[0]), + triple[1], + value, + datatype, + language, + self._context(context), + ) + else: # do not return anything for non-IRIs or BNs, e.g. QuotedGraph, Subjects + return None + + def _iri_or_bn(self, i_): + if isinstance(i_, URIRef): + return f"{i_}" + elif isinstance(i_, BNode): + return f"{i_.n3()}" + else: + return None + + def _context(self, context): + if self.default_context is None: + return "" + if context.identifier == "urn:x-rdflib:default": + return "" + elif context is not None and self.default_context is not None: + if context.identifier == self.default_context.identifier: + return "" + return context.identifier diff --git a/rdflib/plugins/serializers/jsonld.py b/rdflib/plugins/serializers/jsonld.py index f5067e287..002f04f75 100644 --- a/rdflib/plugins/serializers/jsonld.py +++ b/rdflib/plugins/serializers/jsonld.py @@ -62,7 +62,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **kwargs + **kwargs, ): # TODO: docstring w. args and return value encoding = encoding or "utf-8" @@ -364,18 +364,18 @@ def to_raw_value(self, graph, s, o, nodemap): else: return v - def to_collection(self, graph, l): - if l != RDF.nil and not graph.value(l, RDF.first): + def to_collection(self, graph, l_): + if l_ != RDF.nil and not graph.value(l_, RDF.first): return None list_nodes = [] - chain = set([l]) - while l: - if l == RDF.nil: + chain = set([l_]) + while l_: + if l_ == RDF.nil: return list_nodes - if isinstance(l, URIRef): + if isinstance(l_, URIRef): return None first, rest = None, None - for p, o in graph.predicate_objects(l): + for p, o in graph.predicate_objects(l_): if not first and p == RDF.first: first = o elif not rest and p == RDF.rest: @@ -383,7 +383,7 @@ def to_collection(self, graph, l): elif p != RDF.type or o != RDF.List: return None list_nodes.append(first) - l = rest - if l in chain: + l_ = rest + if l_ in chain: return None - chain.add(l) + chain.add(l_) diff --git a/rdflib/plugins/serializers/turtle2.py b/rdflib/plugins/serializers/longturtle.py similarity index 93% rename from rdflib/plugins/serializers/turtle2.py rename to rdflib/plugins/serializers/longturtle.py index 49e9cbadb..0d50e47a3 100644 --- a/rdflib/plugins/serializers/turtle2.py +++ b/rdflib/plugins/serializers/longturtle.py @@ -1,8 +1,8 @@ """ -Turtle2 RDF graph serializer for RDFLib. +LongTurtle RDF graph serializer for RDFLib. See for syntax specification. -This variant, turtle2 as opposed to just turtle, makes some small format changes +This variant, longturtle as opposed to just turtle, makes some small format changes to turtle - the original turtle serializer. It: * uses PREFIX instead of @prefix @@ -21,7 +21,7 @@ from .turtle import RecursiveSerializer from rdflib.namespace import RDF -__all__ = ["TurtleSerializer2"] +__all__ = ["LongTurtleSerializer"] SUBJECT = 0 VERB = 1 @@ -31,14 +31,14 @@ _SPACIOUS_OUTPUT = False -class TurtleSerializer2(RecursiveSerializer): +class LongTurtleSerializer(RecursiveSerializer): - short_name = "turtle2" + short_name = "longturtle" indentString = " " def __init__(self, store): self._ns_rewrite = {} - super(TurtleSerializer2, self).__init__(store) + super(LongTurtleSerializer, self).__init__(store) self.keywords = {RDF.type: "a"} self.reset() self.stream = None @@ -66,11 +66,11 @@ def addNamespace(self, prefix, namespace): prefix = self._ns_rewrite.get(prefix, prefix) - super(TurtleSerializer2, self).addNamespace(prefix, namespace) + super(LongTurtleSerializer, self).addNamespace(prefix, namespace) return prefix def reset(self): - super(TurtleSerializer2, self).reset() + super(LongTurtleSerializer, self).reset() self._shortNames = {} self._started = False self._ns_rewrite = {} @@ -78,7 +78,7 @@ def reset(self): def serialize(self, stream, base=None, encoding=None, spacious=None, **args): self.reset() self.stream = stream - # if base is given here, use that, if not and a base is set for the graph use that + # if base is given here, use, if not and a base is set for the graph use that if base is not None: self.base = base elif self.store.base is not None: @@ -107,7 +107,7 @@ def serialize(self, stream, base=None, encoding=None, spacious=None, **args): self.base = None def preprocessTriple(self, triple): - super(TurtleSerializer2, self).preprocessTriple(triple) + super(LongTurtleSerializer, self).preprocessTriple(triple) for i, node in enumerate(triple): if node in self.keywords: continue @@ -123,12 +123,9 @@ def getQName(self, uri, gen_prefix=True): if not isinstance(uri, URIRef): return None - parts = None - try: parts = self.store.compute_qname(uri, generate=gen_prefix) except: - # is the uri a namespace in itself? pfx = self.store.store.prefix(uri) diff --git a/rdflib/plugins/serializers/nquads.py b/rdflib/plugins/serializers/nquads.py index e76c747d4..ce2d2f7c3 100644 --- a/rdflib/plugins/serializers/nquads.py +++ b/rdflib/plugins/serializers/nquads.py @@ -25,12 +25,15 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ): if base is not None: warnings.warn("NQuadsSerializer does not support base.") if encoding is not None and encoding.lower() != self.encoding.lower(): - warnings.warn("NQuadsSerializer does not use custom encoding.") + warnings.warn( + "NQuadsSerializer does not use custom encoding." + + "Given encoding was: %s" % encoding + ) encoding = self.encoding for context in self.store.contexts(): for triple in context: diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py index 467de4613..7862528d5 100644 --- a/rdflib/plugins/serializers/nt.py +++ b/rdflib/plugins/serializers/nt.py @@ -22,23 +22,25 @@ class NTSerializer(Serializer): def __init__(self, store: Graph): Serializer.__init__(self, store) - self.encoding = "ascii" # n-triples are ascii encoded def serialize( self, stream: IO[bytes], base: Optional[str] = None, - encoding: Optional[str] = None, - **args + encoding: Optional[str] = "utf-8", + **args, ): if base is not None: warnings.warn("NTSerializer does not support base.") - if encoding is not None and encoding.lower() != self.encoding.lower(): - warnings.warn("NTSerializer does not use custom encoding.") - encoding = self.encoding + if encoding != "utf-8": + warnings.warn( + "NTSerializer always uses UTF-8 encoding." + + "Given encoding was: %s" % encoding + ) + for triple in self.store: - stream.write(_nt_row(triple).encode(self.encoding, "_rdflib_nt_escape")) - stream.write("\n".encode("latin-1")) + stream.write(_nt_row(triple).encode()) + stream.write("\n".encode()) class NT11Serializer(NTSerializer): diff --git a/rdflib/plugins/serializers/rdfxml.py b/rdflib/plugins/serializers/rdfxml.py index 901d911d9..790c23939 100644 --- a/rdflib/plugins/serializers/rdfxml.py +++ b/rdflib/plugins/serializers/rdfxml.py @@ -1,4 +1,4 @@ -from typing import IO, Dict, Optional, Set, cast +from typing import IO, Dict, Optional, Set from rdflib.plugins.serializers.xmlwriter import XMLWriter from rdflib.namespace import Namespace, RDF, RDFS # , split_uri @@ -46,7 +46,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ): # if base is given here, use that, if not and a base is set for the graph use that if base is not None: @@ -171,7 +171,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ): self.__serialized: Dict[Identifier, int] = {} store = self.store diff --git a/rdflib/plugins/serializers/trig.py b/rdflib/plugins/serializers/trig.py index 5a606e401..3aa9ca689 100644 --- a/rdflib/plugins/serializers/trig.py +++ b/rdflib/plugins/serializers/trig.py @@ -60,7 +60,7 @@ def serialize( base: Optional[str] = None, encoding: Optional[str] = None, spacious: Optional[bool] = None, - **args + **args, ): self.reset() self.stream = stream diff --git a/rdflib/plugins/serializers/trix.py b/rdflib/plugins/serializers/trix.py index 1612d815c..63d58c44d 100644 --- a/rdflib/plugins/serializers/trix.py +++ b/rdflib/plugins/serializers/trix.py @@ -28,7 +28,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ): nm = self.store.namespace_manager diff --git a/rdflib/plugins/serializers/xmlwriter.py b/rdflib/plugins/serializers/xmlwriter.py index b0f1a06e9..99d1e7677 100644 --- a/rdflib/plugins/serializers/xmlwriter.py +++ b/rdflib/plugins/serializers/xmlwriter.py @@ -100,8 +100,8 @@ def qname(self, uri): for pre, ns in self.extra_ns.items(): if uri.startswith(ns): if pre != "": - return ":".join(pre, uri[len(ns) :]) + return ":".join(pre, uri[len(ns):]) else: - return uri[len(ns) :] + return uri[len(ns):] return self.nm.qname_strict(uri) diff --git a/rdflib/query.py b/rdflib/query.py index 0bc842841..dd9117084 100644 --- a/rdflib/query.py +++ b/rdflib/query.py @@ -6,7 +6,7 @@ import types from typing import IO, TYPE_CHECKING, List, Optional, Union, cast -from io import BytesIO, BufferedIOBase +from io import BytesIO from urllib.parse import urlparse @@ -333,7 +333,6 @@ def __eq__(self, other): return self.vars == other.vars and self.bindings == other.bindings else: return self.graph == other.graph - except: return False diff --git a/rdflib/serializer.py b/rdflib/serializer.py index 74f29544b..15a91d968 100644 --- a/rdflib/serializer.py +++ b/rdflib/serializer.py @@ -30,7 +30,7 @@ def serialize( stream: IO[bytes], base: Optional[str] = None, encoding: Optional[str] = None, - **args + **args, ) -> None: """Abstract method""" diff --git a/rdflib/tools/rdf2dot.py b/rdflib/tools/rdf2dot.py index b6bed525b..afaabd318 100644 --- a/rdflib/tools/rdf2dot.py +++ b/rdflib/tools/rdf2dot.py @@ -94,12 +94,10 @@ def node(x): return nodes[x] def label(x, g): - for labelProp in LABEL_PROPERTIES: - l = g.value(x, labelProp) - if l: - return l - + l_ = g.value(x, labelProp) + if l_: + return l_ try: return g.namespace_manager.compute_qname(x)[2] except: diff --git a/test/n3/n3-writer-test-29.n3 b/test/n3/n3-writer-test-29.n3 index 86cf56e7a..b6590cbd8 100644 --- a/test/n3/n3-writer-test-29.n3 +++ b/test/n3/n3-writer-test-29.n3 @@ -1,23 +1,23 @@ -# Test qname-ization - -@prefix : . -@prefix ns: . -@prefix ns2: . -@prefix ex: . - -# Ensure we don't write ns:p1/p2 (illegal URI) -:x "1" . - -# Legal URI -:x "1" . - -# Numeric namespace prefix: gives a warning on reading -# as Jena models work on XML rules. -#@prefix 1: . -:x "1" . - -# Numberic localname is allowed. -:x ex:1 "2" . - -# As is _1 -:x ex:_1 "rdf:_1 test" . +# Test qname-ization + +@prefix : . +@prefix ns: . +@prefix ns2: . +@prefix ex: . + +# Ensure we don't write ns:p1/p2 (illegal URI) +:x "1" . + +# Legal URI +:x "1" . + +# Numeric namespace prefix: gives a warning on reading +# as Jena models work on XML rules. +#@prefix 1: . +:x "1" . + +# Numeric localname is allowed. +:x ex:1 "2" . + +# As is _1 +:x ex:_1 "rdf:_1 test" . diff --git a/test/n3/strquot.n3 b/test/n3/strquot.n3 index c421c11f6..13da792e7 100644 --- a/test/n3/strquot.n3 +++ b/test/n3/strquot.n3 @@ -1,5 +1,5 @@ - @prefix : <#> . - +@prefix : <#> . + <> """testing string parsing in N3. Hmm... how much of this is in the primer? How much should be there? @@ -9,14 +9,14 @@ in python is sufficiently deployed nor does pythonwin on TimBL's laptop). """ . - + :martin :familyName "D\u00FCrst" . - + :x :prop "simple string" . - + :y :prop """triple quoted string with newlines in it.""" . - + :z :prop """string with " escaped quote marks""" . :zz :escapes "\\\"\a\b\f\r\t\v" . diff --git a/test/test_n3_suite.py b/test/test_n3_suite.py index b4a96abb0..a294d2bc4 100644 --- a/test/test_n3_suite.py +++ b/test/test_n3_suite.py @@ -1,5 +1,4 @@ import os -import sys import logging import pytest diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py index a90798591..710774871 100644 --- a/test/test_nt_misc.py +++ b/test/test_nt_misc.py @@ -33,15 +33,15 @@ def testIssue859(self): def testIssue78(self): g = Graph() g.add((URIRef("foo"), URIRef("foo"), Literal("R\u00E4ksm\u00F6rg\u00E5s"))) - s = g.serialize(format="nt", encoding="latin-1") - self.assertEqual(type(s), bytes) - self.assertTrue(r"R\u00E4ksm\u00F6rg\u00E5s".encode("latin-1") in s) + s = g.serialize(format="nt") + self.assertEqual(type(s), str) + self.assertTrue("R\u00E4ksm\u00F6rg\u00E5s" in s) def testIssue146(self): g = Graph() g.add((URIRef("foo"), URIRef("foo"), Literal("test\n", lang="en"))) - s = g.serialize(format="nt", encoding="latin-1").strip() - self.assertEqual(s, b' "test\\n"@en .') + s = g.serialize(format="nt").strip() + self.assertEqual(s, ' "test\\n"@en .') def testIssue1144_rdflib(self): fname = "test/nt/lists-02.nt" diff --git a/test/test_parser_hext.py b/test/test_parser_hext.py new file mode 100644 index 000000000..3253922f9 --- /dev/null +++ b/test/test_parser_hext.py @@ -0,0 +1,114 @@ +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent.absolute())) +from rdflib import Dataset, ConjunctiveGraph, Literal +from rdflib.namespace import XSD + + +def test_small_string(): + s = """ + ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""] + ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""] + ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""] + ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""] + ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] + ["http://example.com/s01", "http://example.com/age", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""] + ["http://example.com/s01", "http://example.com/trueFalse", "false", ",http://www.w3.org/2001/XMLSchema#boolean", "", ""] + ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "globalId", "", ""] + ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""] + ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""] + """ + d = Dataset().parse(data=s, format="hext") + assert len(d) == 10 + + +def test_small_file_singlegraph(): + d = Dataset().parse(Path(__file__).parent / "test_parser_hext_singlegraph.ndjson", format="hext") + assert len(d) == 10 + + +def test_small_file_multigraph(): + d = ConjunctiveGraph() + assert len(d) == 0 + d.parse( + Path(__file__).parent / "test_parser_hext_multigraph.ndjson", + format="hext", + publicID=d.default_context.identifier + ) + + """There are 22 lines in the file test_parser_hext_multigraph.ndjson. When loaded + into a Dataset, we get only 18 quads since the the dataset can contextualise + the triples and thus deduplicate 4.""" + total_triples = 0 + # count all the triples in the Dataset + for context in d.contexts(): + for triple in context.triples((None, None, None)): + total_triples += 1 + assert total_triples == 18 + + +def test_roundtrip(): + # these are some RDF files that HexT can round-trip since the have no + # literals with no datatype declared: + TEST_DIR = Path(__file__).parent.absolute() / "nt" + files_to_skip = { + "paths-04.nt": "subject literal", + "even_more_literals.nt": "JSON decoding error", + "literals-02.nt": "JSON decoding error", + "more_literals.nt": "JSON decoding error", + "test.ntriples": "JSON decoding error", + "literals-05.nt": "JSON decoding error", + "i18n-01.nt": "JSON decoding error", + "literals-04.nt": "JSON decoding error", + "rdflibtest01.nt": "JSON decoding error", + "rdflibtest05.nt": "JSON decoding error", + } + tests = 0 + skipped = 0 + skip = False + print() + p = TEST_DIR.glob("**/*") + for f in [x for x in p if x.is_file()]: + tests += 1 + print(f"Test {tests}: {f}") + if f.name not in files_to_skip.keys(): + try: + cg = ConjunctiveGraph().parse(f, format="nt") + # print(cg.serialize(format="n3")) + except: + print(f"Skipping: could not NT parse") + skipped += 1 + skip = True + if not skip: + cg2 = ConjunctiveGraph() + cg2.parse( + data=cg.serialize(format="hext"), + format="hext", + publicID=cg2.default_context.identifier + ) + if cg2.context_aware: + for context in cg2.contexts(): + for triple in context.triples((None, None, None)): + if type(triple[2]) == Literal: + if triple[2].datatype == XSD.string: + context.remove((triple[0], triple[1], triple[2])) + context.add((triple[0], triple[1], Literal(str(triple[2])))) + else: + for triple in cg2.triples((None, None, None)): + if type(triple[2]) == Literal: + if triple[2].datatype == XSD.string: + cg2.remove((triple[0], triple[1], triple[2])) + cg2.add((triple[0], triple[1], Literal(str(triple[2])))) + + # print(cg2.serialize(format="trig")) + assert cg.isomorphic(cg2) + skip = False + else: + print(f"Skipping: {files_to_skip[f.name]}") + + print(f"No. tests: {tests}") + print(f"No. tests skipped: {skipped}") + + +if __name__ == "__main__": + test_small_file_multigraph() diff --git a/test/test_parser_hext_multigraph.ndjson b/test/test_parser_hext_multigraph.ndjson new file mode 100644 index 000000000..45d086e0b --- /dev/null +++ b/test/test_parser_hext_multigraph.ndjson @@ -0,0 +1,22 @@ +["_:n4d7dd184c5824f35aa064f17bd5d1440b1", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", "http://example.com/g1"] +["_:n4d7dd184c5824f35aa064f17bd5d1440b1", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g2"] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g2"] +["http://example.com/s1", "http://example.com/p2", "_:n4d7dd184c5824f35aa064f17bd5d1440b1", "localId", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"] +["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"] +["http://example.com/s11", "http://example.com/p11", "http://example.com/o11", "globalId", "", "http://example.com/g2"] +["http://example.com/s11", "http://example.com/p11", "http://example.com/o12", "globalId", "", "http://example.com/g2"] +["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""] +["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""] +["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""] +["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""] diff --git a/test/test_parser_hext_singlegraph.ndjson b/test/test_parser_hext_singlegraph.ndjson new file mode 100644 index 000000000..bde2774d5 --- /dev/null +++ b/test/test_parser_hext_singlegraph.ndjson @@ -0,0 +1,10 @@ +["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""] +["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""] +["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""] +["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""] +["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] +["http://example.com/s01", "http://example.com/age", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""] +["http://example.com/s01", "http://example.com/trueFalse", "false", ",http://www.w3.org/2001/XMLSchema#boolean", "", ""] +["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "globalId", "", ""] +["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""] +["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""] diff --git a/test/test_prefixTypes.py b/test/test_prefixTypes.py index 045c6056d..57408d4f1 100644 --- a/test/test_prefixTypes.py +++ b/test/test_prefixTypes.py @@ -25,10 +25,9 @@ class PrefixTypesTest(unittest.TestCase): """ def test(self): - s = graph.serialize(format="n3", encoding="latin-1") - print(s) - self.assertTrue(b"foaf:Document" in s) - self.assertTrue(b"xsd:date" in s) + s = graph.serialize(format="n3") + self.assertTrue("foaf:Document" in s) + self.assertTrue("xsd:date" in s) if __name__ == "__main__": diff --git a/test/test_preflabel.py b/test/test_preflabel.py index 77ecbcd3d..39057d48e 100644 --- a/test/test_preflabel.py +++ b/test/test_preflabel.py @@ -10,9 +10,9 @@ class TestPrefLabel(unittest.TestCase): def setUp(self): self.g = ConjunctiveGraph() - self.u = URIRef("http://example.com/foo") - self.g.add([self.u, RDFS.label, Literal("foo")]) - self.g.add([self.u, RDFS.label, Literal("bar")]) + self.u = URIRef("https://example.com/foo") + self.g.add((self.u, RDFS.label, Literal("foo"))) + self.g.add((self.u, RDFS.label, Literal("bar"))) def test_default_label_sorting(self): res = sorted(self.g.preferredLabel(self.u)) @@ -29,7 +29,7 @@ def test_default_label_sorting(self): self.assertEqual(res, tgt) def test_default_preflabel_sorting(self): - self.g.add([self.u, SKOS.prefLabel, Literal("bla")]) + self.g.add((self.u, SKOS.prefLabel, Literal("bla"))) res = self.g.preferredLabel(self.u) tgt = [ ( @@ -40,8 +40,8 @@ def test_default_preflabel_sorting(self): self.assertEqual(res, tgt) def test_preflabel_lang_sorting_no_lang_attr(self): - self.g.add([self.u, SKOS.prefLabel, Literal("bla")]) - self.g.add([self.u, SKOS.prefLabel, Literal("blubb", lang="en")]) + self.g.add((self.u, SKOS.prefLabel, Literal("bla"))) + self.g.add((self.u, SKOS.prefLabel, Literal("blubb", lang="en"))) res = sorted(self.g.preferredLabel(self.u)) tgt = [ ( @@ -57,8 +57,8 @@ def test_preflabel_lang_sorting_no_lang_attr(self): self.assertEqual(res, tgt) def test_preflabel_lang_sorting_empty_lang_attr(self): - self.g.add([self.u, SKOS.prefLabel, Literal("bla")]) - self.g.add([self.u, SKOS.prefLabel, Literal("blubb", lang="en")]) + self.g.add((self.u, SKOS.prefLabel, Literal("bla"))) + self.g.add((self.u, SKOS.prefLabel, Literal("blubb", lang="en"))) res = self.g.preferredLabel(self.u, lang="") tgt = [ ( @@ -69,7 +69,7 @@ def test_preflabel_lang_sorting_empty_lang_attr(self): self.assertEqual(res, tgt) def test_preflabel_lang_sorting_en_lang_attr(self): - self.g.add([self.u, SKOS.prefLabel, Literal("blubb", lang="en")]) + self.g.add((self.u, SKOS.prefLabel, Literal("blubb", lang="en"))) res = self.g.preferredLabel(self.u, lang="en") tgt = [ ( diff --git a/test/test_rdfxml.py b/test/test_rdfxml.py index b0b222dc5..e695355c4 100644 --- a/test/test_rdfxml.py +++ b/test/test_rdfxml.py @@ -84,7 +84,7 @@ def cached_file(url): def relative(url): - return url[len(RDFCOREBASE) :] + return url[len(RDFCOREBASE):] def resolve(rel): diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py index f076e576a..ec03a54d1 100644 --- a/test/test_roundtrip.py +++ b/test/test_roundtrip.py @@ -5,17 +5,16 @@ try: from .test_nt_suite import all_nt_files - assert all_nt_files - from .test_n3_suite import all_n3_files + from .test_n3_suite import all_n3_files assert all_n3_files except: from test.test_nt_suite import all_nt_files from test.test_n3_suite import all_n3_files """ -Test round-tripping by all serializers/parser that are registerd. +Test round-tripping by all serializers/parser that are registered. This means, you may test more than just core rdflib! run with no arguments to test all formats + all files @@ -27,17 +26,22 @@ tests roundtripping through rdf/xml with only the literals-02 file +HexTuples format, "hext", cannot be used in all roundtrips due to its +addition of xsd:string to literals of no declared type as this breaks +(rdflib) graph isomorphism, and given that its JSON serialization is +simple (lacking), so hext has been excluded from roundtripping here +but provides some roundtrip test functions of its own (see test_parser_hext.py +& test_serializer_hext.py) + """ SKIP = [ - ( - "xml", - "test/n3/n3-writer-test-29.n3", - ), # has predicates that cannot be shortened to strict qnames + ("xml", "test/n3/n3-writer-test-29.n3"), + # has predicates that cannot be shortened to strict qnames ("xml", "test/nt/qname-02.nt"), # uses a property that cannot be qname'd - ("trix", "test/n3/strquot.n3"), # contains charachters forbidden by the xml spec - ("xml", "test/n3/strquot.n3"), # contains charachters forbidden by the xml spec + ("trix", "test/n3/strquot.n3"), # contains characters forbidden by the xml spec + ("xml", "test/n3/strquot.n3"), # contains characters forbidden by the xml spec ("json-ld", "test/nt/keywords-04.nt"), # known NT->JSONLD problem ("json-ld", "test/n3/example-misc.n3"), # known N3->JSONLD problem ("json-ld", "test/n3/n3-writer-test-16.n3"), # known N3->JSONLD problem @@ -98,11 +102,12 @@ def get_cases(): formats = parsers.intersection(serializers) for testfmt in formats: - if "/" in testfmt: - continue # skip double testing - for f, infmt in all_nt_files(): - if (testfmt, f) not in SKIP: - yield roundtrip, (infmt, testfmt, f) + if testfmt != "hext": + if "/" in testfmt: + continue # skip double testing + for f, infmt in all_nt_files(): + if (testfmt, f) not in SKIP: + yield roundtrip, (infmt, testfmt, f) @pytest.mark.parametrize("checker, args", get_cases()) @@ -120,13 +125,18 @@ def get_n3_test(): formats = parsers.intersection(serializers) for testfmt in formats: - if "/" in testfmt: - continue # skip double testing - for f, infmt in all_n3_files(): - if (testfmt, f) not in SKIP: - yield roundtrip, (infmt, testfmt, f) + if testfmt != "hext": + if "/" in testfmt: + continue # skip double testing + for f, infmt in all_n3_files(): + if (testfmt, f) not in SKIP: + yield roundtrip, (infmt, testfmt, f) @pytest.mark.parametrize("checker, args", get_n3_test()) def test_n3(checker, args): checker(args) + + +if __name__ == "__main__": + print("hi") diff --git a/test/test_serialize.py b/test/test_serializer.py similarity index 100% rename from test/test_serialize.py rename to test/test_serializer.py diff --git a/test/test_serializer_hext.py b/test/test_serializer_hext.py new file mode 100644 index 000000000..c322a211c --- /dev/null +++ b/test/test_serializer_hext.py @@ -0,0 +1,223 @@ +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent.absolute())) +from rdflib import Dataset, Graph +import json + + +def test_hext_graph(): + """Tests single-grant (not context-aware) data""" + g = Graph() + turtle_data = """ + PREFIX ex: + PREFIX owl: + PREFIX rdf: + PREFIX xsd: + + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + ex:p2 [ + a owl:Thing ; + rdf:value "thingy" ; + ] ; + ex:p3 "Object 3" , "Object 4 - English"@en ; + ex:p4 "2021-12-03"^^xsd:date ; + ex:p5 42 ; + ex:p6 "42" ; + ex:p7 true ; + ex:p8 "false"^^xsd:boolean ; + . + """ + + g.parse(data=turtle_data, format="turtle") + out = g.serialize(format="hext") + # note: cant' test for BNs in result as they will be different ever time + testing_lines = [ + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p7", "true", "http://www.w3.org/2001/XMLSchema#boolean", "", ""]'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p2"'], + [False, '["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""]'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p8", "false", "http://www.w3.org/2001/XMLSchema#boolean", "", ""]'], + ] + for line in out.splitlines(): + for test in testing_lines: + if test[1] in line: + test[0] = True + + assert all([x[0] for x in testing_lines]) + + +def test_hext_dataset(): + """Tests context-aware (multigraph) data""" + d = Dataset() + trig_data = """ + PREFIX ex: + PREFIX owl: + PREFIX rdf: + PREFIX xsd: + + ex:g1 { + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + ex:p2 [ + a owl:Thing ; + rdf:value "thingy" ; + ] ; + ex:p3 "Object 3" , "Object 4 - English"@en ; + ex:p4 "2021-12-03"^^xsd:date ; + ex:p5 42 ; + ex:p6 "42" ; + . + } + + ex:g2 { + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + . + ex:s11 ex:p11 ex:o11 , ex:o12 . + } + + # default graph triples + ex:s1 ex:p1 ex:o1 , ex:o2 . + ex:s21 ex:p21 ex:o21 , ex:o22 . + """ + d.parse(data=trig_data, format="trig", publicID=d.default_context.identifier) + out = d.serialize(format="hext") + # note: cant' test for BNs in result as they will be different ever time + testing_lines = [ + [False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""]'], + [False, '["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""]'], + [False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o12", "globalId", "", "http://example.com/g2"]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g2"]'], + [False, '["http://example.com/s11", "http://example.com/p11", "http://example.com/o11", "globalId", "", "http://example.com/g2"]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g2"]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p2"'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'], + [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'], + [False, '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'], + ] + for line in out.splitlines(): + for test in testing_lines: + if test[1] in line: + test[0] = True + + assert all([x[0] for x in testing_lines]) + + +def test_hext_json_representation(): + """Tests to see if every link in the ND-JSON Hextuple result is, in fact, JSON""" + d = Dataset() + trig_data = """ + PREFIX ex: + PREFIX owl: + PREFIX rdf: + PREFIX xsd: + + ex:g1 { + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + ex:p2 [ + a owl:Thing ; + rdf:value "thingy" ; + ] ; + ex:p3 "Object 3" , "Object 4 - English"@en ; + ex:p4 "2021-12-03"^^xsd:date ; + ex:p5 42 ; + ex:p6 "42" ; + . + } + + ex:g2 { + ex:s1 + ex:p1 ex:o1 , ex:o2 ; + . + ex:s11 ex:p11 ex:o11 , ex:o12 . + } + + # default graph triples + ex:s1 ex:p1 ex:o1 , ex:o2 . + ex:s21 ex:p21 ex:o21 , ex:o22 . + """ + d.parse(data=trig_data, format="trig") + out = d.serialize(format="hext") + for line in out.splitlines(): + j = json.loads(line) + assert isinstance(j, list) + + +def test_hext_dataset_linecount(): + d = Dataset() + assert len(d) == 0 + d.parse( + Path(__file__).parent / "test_parser_hext_multigraph.ndjson", + format="hext", + publicID=d.default_context.identifier + ) + total_triples = 0 + # count all the triples in the Dataset + for context in d.contexts(): + for triple in context.triples((None, None, None)): + total_triples += 1 + assert total_triples == 18 + + # count the number of serialized Hextuples, should be 22, as per the original file + lc = len(d.serialize(format="hext").splitlines()) + assert lc == 22 + + +def test_roundtrip(): + d = Dataset() + d.parse( + Path(__file__).parent / "test_parser_hext_multigraph.ndjson", + format="hext", + publicID=d.default_context.identifier + ) + d.default_union = True + with open(str(Path(__file__).parent / "test_parser_hext_multigraph.ndjson")) as i: + ordered_input = "".join(sorted(i.readlines())).strip() + + ordered_output = "\n".join(sorted(d.serialize(format="hext").split("\n"))).strip() + + assert ordered_output == ordered_input + + +# def _make_large_graph(): +# import random +# +# EX = Namespace("http://example.com/") +# g = Graph() +# +# for i in range(1000): +# s = EX["s" + str(random.randint(1, 10000)).zfill(5)] +# p = EX["p" + str(random.randint(1, 10000)).zfill(5)] +# o_r = random.randint(1, 10000) +# if o_r > 5000: +# o = EX["p" + str(o_r).zfill(5)] +# else: +# o = Literal("p" + str(o_r).zfill(5)) +# g.add((s, p, o)) +# return g +# +# +# def test_hext_scaling(): +# g = _make_large_graph() +# g.serialize(destination="large.ndjson", format="hext") +# +# +if __name__ == "__main__": + test_roundtrip() diff --git a/test/test_turtle2.py b/test/test_serializer_longturtle.py similarity index 98% rename from test/test_turtle2.py rename to test/test_serializer_longturtle.py index fef0efab7..cc184787a 100644 --- a/test/test_turtle2.py +++ b/test/test_serializer_longturtle.py @@ -1,9 +1,9 @@ -# tests for the turtle2 serializer +# tests for the longturtle Serializer from rdflib import Graph -def test_turtle2(): +def test_longturtle(): g = Graph() g.parse( @@ -86,7 +86,7 @@ def test_turtle2(): """, format="turtle", ) - s = g.serialize(format="turtle2") + s = g.serialize(format="longturtle") lines = s.split("\n") assert "ex:b" in lines diff --git a/test/test_trix_serialize.py b/test/test_serializer_trix.py similarity index 100% rename from test/test_trix_serialize.py rename to test/test_serializer_trix.py diff --git a/test/test_turtle_serialize.py b/test/test_serializer_turtle.py similarity index 100% rename from test/test_turtle_serialize.py rename to test/test_serializer_turtle.py diff --git a/test/test_serializexml.py b/test/test_serializer_xml.py similarity index 100% rename from test/test_serializexml.py rename to test/test_serializer_xml.py diff --git a/test/test_swap_n3.py b/test/test_swap_n3.py index 1734806cb..c0b596cdd 100644 --- a/test/test_swap_n3.py +++ b/test/test_swap_n3.py @@ -1,6 +1,5 @@ import os import sys -import unittest import pytest diff --git a/test/test_term.py b/test/test_term.py index e154bd4eb..0f9dbea21 100644 --- a/test/test_term.py +++ b/test/test_term.py @@ -74,16 +74,16 @@ def test_total_order(self): sorted(literals) orderable = True except TypeError as e: - for l in literals: - print(repr(l), repr(l.value)) + for l_ in literals: + print(repr(l_), repr(l_.value)) print(e) orderable = False self.assertTrue(orderable) # also make sure that within a datetime things are still ordered: l1 = [ - Literal(l, datatype=XSD.dateTime) - for l in [ + Literal(l_, datatype=XSD.dateTime) + for l_ in [ "2001-01-01T00:00:00", "2001-01-01T01:00:00", "2001-01-01T01:00:01",