diff --git a/README.md b/README.md index 001ffe1..dc25cd6 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ context = RDF::Graph.new << [uri, RDF::Vocab::DC.title, "Some Title"] program = Ldpath::Program.parse my_program output = program.evaluate uri, context: context -# => { ... } +# => {"title"=>["Some Title"]} ``` ## Compatibility diff --git a/lib/ldpath/field_mapping.rb b/lib/ldpath/field_mapping.rb index 9dd6a01..d1dae1f 100644 --- a/lib/ldpath/field_mapping.rb +++ b/lib/ldpath/field_mapping.rb @@ -9,13 +9,13 @@ def initialize(name:, selector:, field_type: nil, options: {}) @options = options end - def evaluate(program, uri, context) + def evaluate(program, uri, context, maintain_literals: false) case selector when Ldpath::Selector - return to_enum(:evaluate, program, uri, context) unless block_given? + return to_enum(:evaluate, program, uri, context, maintain_literals: maintain_literals) unless block_given? - selector.evaluate(program, uri, context).each do |value| - yield transform_value(value) + selector.evaluate(program, uri, context, maintain_literals: maintain_literals).each do |value| + yield transform_value(value, maintain_literals: maintain_literals) end when RDF::Literal Array(selector.canonicalize.object) @@ -26,18 +26,30 @@ def evaluate(program, uri, context) private - def transform_value(value) - v = if value.is_a? RDF::Literal + def transform_value(value, maintain_literals: false) + v = if value.is_a?(RDF::Literal) && !maintain_literals value.canonicalize.object else value end - if field_type - RDF::Literal.new(v.to_s, datatype: field_type).canonicalize.object + if field_type && !same_type(v, field_type) + v_literal = RDF::Literal.new(v.to_s, datatype: field_type) + maintain_literals ? v_literal : v_literal.canonicalize.object else v end end + + def same_type(object, field_type) + case object + when RDF::Literal + object.comperable_datatype? field_type + when RDF::URI + field_type.to_s.end_with? 'anyURI' + else + false + end + end end end diff --git a/lib/ldpath/parser.rb b/lib/ldpath/parser.rb index 13a39ee..4b5e6f2 100644 --- a/lib/ldpath/parser.rb +++ b/lib/ldpath/parser.rb @@ -1,3 +1,5 @@ +# Parslet parser for parsing ldpath programs. +# @see https://kschiess.github.io/parslet/parser.html Parslet arser documentation require 'parslet' module Ldpath diff --git a/lib/ldpath/program.rb b/lib/ldpath/program.rb index 99a3870..10d7e32 100644 --- a/lib/ldpath/program.rb +++ b/lib/ldpath/program.rb @@ -1,14 +1,31 @@ +# Parse and evaluate an ldpath program. +# @see https://kschiess.github.io/parslet/documentation.html Parslet Documentation +# @see https://marmotta.apache.org/ldpath/language.html LDPath Language Reference module Ldpath class Program ParseError = Class.new StandardError class << self + + # Parse ldpath program and apply transforms. + # @param program [String] the program to be parsed + # @param transform_context [Hash] see parslet documentation for more info + # @return [Ldpath::Program] instance of this class that can be evaluated on a graph def parse(program, transform_context = {}) ast = transform.apply load(program), transform_context Ldpath::Program.new ast.compact, transform_context end + # Load the ldpath program using the ldpath parser. + # @param program [String] ldpath program + # @raise [ParseError] exception raised if parse fails + # @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result tree + # @example ldpath program (see spec/ldpath_program_spec.rb for a more details example program) + # @prefix dcterms : ; + # title = dcterms:title :: xsd:string ; + # parent_title = dcterms:isPartOf / dcterms:title :: xsd:string ; + # int_value = [^^xsd:integer] :: xsd:integer ; def load(program) parser.parse(program, reporter: Parslet::ErrorReporter::Deepest.new) rescue Parslet::ParseFailed => e @@ -36,10 +53,18 @@ def initialize(mappings, default_loader: Ldpath::Loaders::Direct.new, prefixes: end - def evaluate(uri, context: nil, limit_to_context: false) - result = Ldpath::Result.new(self, uri, context: context, limit_to_context: limit_to_context) + # Evaluate an ldpath program returning values extracted from the graph and dereferencing the subject + # to get additional context unless limit_to_context==false. + # @param uri [RDF::URI] subject URI for matching triples from the graph + # @param context [RDF::Graph] the graph from which to extract values + # @param limit_to_context [Boolean] if true, only draw values from the passed in context; otherwise, will make curl requests to gather additional context + # @param maintain_literals [Boolean] if true, will return values that are RDF::Literals as RDF::Literals; otherwise, returns canonicalize form (e.g. String, Integer, etc.) + # @return [Array] the extracted values based on the ldpath with values that can be of type RDF::URI, RDF::Literal, String, Integer, etc., + # based on the value in the graph and the value of maintain_literals. + def evaluate(uri, context: nil, limit_to_context: false, maintain_literals: false) + result = Ldpath::Result.new(self, uri, context: context, limit_to_context: limit_to_context, maintain_literals: maintain_literals) unless filters.empty? - return {} unless filters.all? { |f| f.evaluate(result, uri, result.context) } + return {} unless filters.all? { |f| f.evaluate(result, uri, result.context, maintain_literals: maintain_literals) } end result.to_hash diff --git a/lib/ldpath/result.rb b/lib/ldpath/result.rb index 78abb2b..a512251 100644 --- a/lib/ldpath/result.rb +++ b/lib/ldpath/result.rb @@ -3,13 +3,14 @@ class Result include Ldpath::Functions attr_reader :program, :uri, :cache, :loaded - def initialize(program, uri, cache: RDF::Util::Cache.new, context: nil, limit_to_context: false) + def initialize(program, uri, cache: RDF::Util::Cache.new, context: nil, limit_to_context: false, maintain_literals: false) @program = program @uri = uri @cache = cache @loaded = {} @context = context @limit_to_context = limit_to_context + @maintain_literals = maintain_literals end def loading(uri, context) @@ -59,7 +60,7 @@ def meta private def evaluate(mapping) - mapping.evaluate(self, uri, context) + mapping.evaluate(self, uri, context, maintain_literals: maintain_literals?) end def function_method?(function) @@ -73,5 +74,9 @@ def mappings def limit_to_context? @limit_to_context end + + def maintain_literals? + @maintain_literals + end end end diff --git a/lib/ldpath/selectors.rb b/lib/ldpath/selectors.rb index d511a1f..6c24c10 100644 --- a/lib/ldpath/selectors.rb +++ b/lib/ldpath/selectors.rb @@ -1,7 +1,7 @@ module Ldpath class Selector - def evaluate(program, uris, context) - return to_enum(:evaluate, program, uris, context) unless block_given? + def evaluate(program, uris, context, maintain_literals: false) + return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given? enum_wrap(uris).map do |uri| loading program, uri, context enum_flatten_one(evaluate_one(uri, context)).each do |x| @@ -55,15 +55,15 @@ def initialize(fname, arguments = []) @arguments = Array(arguments) end - def evaluate(program, uris, context) - return to_enum(:evaluate, program, uris, context) unless block_given? + def evaluate(program, uris, context, maintain_literals: false) + return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given? enum_wrap(uris).map do |uri| loading program, uri, context args = arguments.map do |i| case i when Selector - i.evaluate(program, uri, context) + i.evaluate(program, uri, context, maintain_literals: maintain_literals) else i end @@ -138,14 +138,14 @@ def initialize(property, repeat) @repeat = repeat end - def evaluate(program, uris, context) - return to_enum(:evaluate, program, uris, context) unless block_given? + def evaluate(program, uris, context, maintain_literals: false) + return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given? input = enum_wrap(uris) (0..repeat.max).each_with_index do |i, idx| break if input.none? || (repeat.max == Ldpath::Transform::Infinity && idx > 25) # we're probably lost.. - input = property.evaluate program, input, context + input = property.evaluate program, input, context, maintain_literals: maintain_literals next unless idx >= repeat.min @@ -165,19 +165,20 @@ def initialize(left, right) end class PathSelector < CompoundSelector - def evaluate(program, uris, context, &block) - return to_enum(:evaluate, program, uris, context) unless block_given? + def evaluate(program, uris, context, maintain_literals: false, &block) + return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given? - output = left.evaluate(program, uris, context) - right.evaluate(program, output, context, &block) + output = left.evaluate(program, uris, context, maintain_literals: maintain_literals) + right.evaluate(program, output, context, maintain_literals: maintain_literals, &block) end end class UnionSelector < CompoundSelector - def evaluate(program, uris, context) - return to_enum(:evaluate, program, uris, context) unless block_given? + def evaluate(program, uris, context, maintain_literals: false) + return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given? - enum_union(left.evaluate(program, uris, context), right.evaluate(program, uris, context)).each do |x| + enum_union(left.evaluate(program, uris, context, maintain_literals: maintain_literals), + right.evaluate(program, uris, context, maintain_literals: maintain_literals)).each do |x| yield x end end @@ -198,10 +199,11 @@ def enum_union(left, right) end class IntersectionSelector < CompoundSelector - def evaluate(program, uris, context) - return to_enum(:evaluate, program, uris, context) unless block_given? + def evaluate(program, uris, context, maintain_literals: false) + return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given? - result = left.evaluate(program, uris, context).to_a & right.evaluate(program, uris, context).to_a + result = left.evaluate(program, uris, context, maintain_literals: maintain_literals).to_a & + right.evaluate(program, uris, context, maintain_literals: maintain_literals).to_a result.each do |x| yield x @@ -216,10 +218,11 @@ def initialize(identifier, tap) @tap = tap end - def evaluate(program, uris, context) - return to_enum(:evaluate, program, uris, context) unless block_given? + def evaluate(program, uris, context, maintain_literals: false) + return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given? - program.meta[identifier] = tap.evaluate(program, uris, context).map { |x| RDF::Literal.new(x.to_s).canonicalize.object } + program.meta[identifier] = tap.evaluate(program, uris, context, maintain_literals: maintain_literals) + .map { |x| RDF::Literal.new(x.to_s).canonicalize.object } enum_wrap(uris).map do |uri| loading program, uri, context diff --git a/lib/ldpath/tests.rb b/lib/ldpath/tests.rb index 48c420d..05ec16b 100644 --- a/lib/ldpath/tests.rb +++ b/lib/ldpath/tests.rb @@ -7,12 +7,12 @@ def initialize(delegate, test) @test = test end - def evaluate(program, uris, context) - return to_enum(:evaluate, program, uris, context) unless block_given? + def evaluate(program, uris, context, maintain_literals: false) + return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given? - entries = delegate.evaluate program, uris, context + entries = delegate.evaluate program, uris, context, maintain_literals: maintain_literals entries.select do |uri| - result = enum_wrap(test.evaluate(program, uri, context)).any? do |x| + result = enum_wrap(test.evaluate(program, uri, context, maintain_literals: maintain_literals)).any? do |x| x end yield uri if result @@ -26,7 +26,7 @@ def initialize(lang) @lang = lang end - def evaluate(_program, uri, _context) + def evaluate(_program, uri, _context, maintain_literals: false) return unless uri.literal? uri if (lang.to_s == "none" && !uri.has_language?) || uri.language.to_s == lang.to_s @@ -39,7 +39,7 @@ def initialize(type) @type = type end - def evaluate(program, uri, _context) + def evaluate(program, uri, _context, maintain_literals: false) return unless uri.literal? uri if uri.has_datatype? && uri.datatype == type @@ -53,8 +53,8 @@ def initialize(delegate) @delegate = delegate end - def evaluate(program, uri, context) - !enum_wrap(delegate.evaluate(program, uri, context)).any? { |x| x } + def evaluate(program, uri, context, maintain_literals: false) + !enum_wrap(delegate.evaluate(program, uri, context, maintain_literals: maintain_literals)).any? { |x| x } end end @@ -66,8 +66,9 @@ def initialize(left, right) @right = right end - def evaluate(program, uri, context) - left.evaluate(program, uri, context).any? || right.evaluate(program, uri, context).any? + def evaluate(program, uri, context, maintain_literals: false) + left.evaluate(program, uri, context, maintain_literals: maintain_literals).any? || + right.evaluate(program, uri, context, maintain_literals: maintain_literals).any? end end @@ -79,9 +80,9 @@ def initialize(left, right) @right = right end - def evaluate(program, uri, context) - left.evaluate(program, uri, context).any? && - right.evaluate(program, uri, context).any? + def evaluate(program, uri, context, maintain_literals: false) + left.evaluate(program, uri, context, maintain_literals: maintain_literals).any? && + right.evaluate(program, uri, context, maintain_literals: maintain_literals).any? end end @@ -93,8 +94,8 @@ def initialize(left, right) @right = right end - def evaluate(program, uri, context) - left.evaluate(program, uri, context).include?(right) + def evaluate(program, uri, context, maintain_literals: false) + left.evaluate(program, uri, context, maintain_literals: maintain_literals).include?(right) end end end diff --git a/lib/ldpath/transform.rb b/lib/ldpath/transform.rb index 216fc59..f0b6eb3 100644 --- a/lib/ldpath/transform.rb +++ b/lib/ldpath/transform.rb @@ -1,8 +1,11 @@ +# Support Parslet Hash transforms. +# @see https://kschiess.github.io/parslet/transform.html Parslet transform documentation module Ldpath class Transform < Parslet::Transform attr_reader :prefixes class << self + # Default set of prefixes that can be used in an ldpath program without defining. def default_prefixes @default_prefixes ||= { "rdf" => RDF::Vocabulary.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), @@ -20,6 +23,14 @@ def default_prefixes end end + # Applies transformations to a tree that is generated by Parslet::Parser + # or a simple parslet. Transformation will proceed down the tree, replacing + # parts/all of it with new objects. The resulting object will be returned. + # @param obj [Object] Plain Old Ruby Object (PORO) Abstract Syntax Tree (ast) to transform + # @param context [] start context to inject into the bindings. + # @return object from the resulting transformations + # @see https://kschiess.github.io/parslet/transform.html Parslet transform documentation for more information on parameters and processing + # @see https://en.wikipedia.org/wiki/Abstract_syntax_tree Abstract syntax tree description def apply(obj, context = nil) context ||= {} context[:filters] ||= [] diff --git a/spec/ldpath_program_spec.rb b/spec/ldpath_program_spec.rb index 1e059c0..9a8f1e0 100644 --- a/spec/ldpath_program_spec.rb +++ b/spec/ldpath_program_spec.rb @@ -11,7 +11,8 @@ titles = dcterms:title | (dcterms:isPartOf / dcterms:title) | (^dcterms:isPartOf / dcterms:title) :: xsd:string ; no_titles = dcterms:title & (dcterms:isPartOf / dcterms:title) & (^dcterms:isPartOf / dcterms:title) :: xsd:string ; self = . :: xsd:string ; -wildcard = * ::xsd:string ; +str_wildcard = * ::xsd:string ; +uri_wildcard = * ::xsd:anyURI ; child_title = ^dcterms:isPartOf / dcterms:title :: xsd:string ; child_description_en = ^dcterms:isPartOf / dcterms:description[@en] :: xsd:string ; recursive = (dcterms:isPartOf)* ; @@ -57,7 +58,8 @@ expect(result["parent_title"]).to match_array ["Parent title", "Parent English!", "Parent French!"] expect(result["parent_title_en"]).to match_array "Parent English!" expect(result["self"]).to match_array(object) - expect(result["wildcard"]).to include "Hello, world!", parent + expect(result["str_wildcard"]).to include "Hello, world!", parent + expect(result["uri_wildcard"]).to include parent expect(result["child_title"]).to match_array "Child title" expect(result["titles"]).to match_array ["Hello, world!", "Parent title", "Child title", "Parent English!", "Parent French!"] expect(result["no_titles"]).to be_empty @@ -73,6 +75,54 @@ expect(result["is_test"]).to match_array(object) expect(result["is_not_test"]).to be_empty end + + context "when requesting literals" do + let(:title) { RDF::Literal.new("Hello, world!") } + let(:parent_title) { RDF::Literal.new("Parent title") } + let(:child_title) { RDF::Literal.new("Child title") } + let(:en_description) { RDF::Literal.new("English!", language: "en") } + let(:fr_description) { RDF::Literal.new("French!", language: "fr") } + let(:en_parent_title) { RDF::Literal.new("Parent English!", language: "en") } + let(:fr_parent_title) { RDF::Literal.new("Parent French!", language: "fr") } + + it "should return literals" do + graph << [object, RDF::Vocab::DC.title, title.canonicalize.object] + graph << [object, RDF::Vocab::DC.isPartOf, parent] + graph << [object, RDF::Vocab::DC.description, en_description] + graph << [object, RDF::Vocab::DC.description, fr_description] + graph << [object, RDF::URI.new("info:intProperty"), 1] + graph << [object, RDF::URI.new("info:intProperty"), "garbage"] + graph << [object, RDF::URI.new("info:numericProperty"), "1"] + graph << [parent, RDF::Vocab::DC.title, parent_title.canonicalize.object] + graph << [child, RDF::Vocab::DC.isPartOf, object] + graph << [child, RDF::Vocab::DC.title, child_title.canonicalize.object] + graph << [parent, RDF::Vocab::DC.title, en_parent_title] + graph << [parent, RDF::Vocab::DC.title, fr_parent_title] + graph << [parent, RDF::Vocab::DC.isPartOf, grandparent] + + result = subject.evaluate object, context: graph, maintain_literals: true + expect(result["title"]).to match_array RDF::Literal.new("Hello, world!") + expect(result["parent_title"]).to match_array [parent_title, en_parent_title, fr_parent_title] + expect(result["parent_title_en"]).to match_array en_parent_title + expect(result["self"]).to match_array(object.to_s) + expect(result["str_wildcard"]).to include title, parent.to_s + expect(result["uri_wildcard"]).to include parent + expect(result["child_title"]).to match_array child_title + expect(result["titles"]).to match_array [title, parent_title, child_title, en_parent_title, fr_parent_title] + expect(result["no_titles"]).to be_empty + expect(result["recursive"]).to match_array [parent, grandparent] + expect(result["en_description"].first.to_s).to eq "English!" + expect(result["conditional"]).to match_array parent + expect(result["conditional_false"]).to be_empty + expect(result["int_value"]).to match_array RDF::Literal::Integer.new("1") + expect(result["numeric_value"]).to match_array RDF::Literal::Integer.new("1") + expect(result["escaped_string"]).to match_array '\"' + expect(result["and_test"]).to be_empty + expect(result["or_test"]).to match_array(object) + expect(result["is_test"]).to match_array(object) + expect(result["is_not_test"]).to be_empty + end + end end describe "functions" do