diff --git a/README.md b/README.md
index 001ffe1..dc25cd6 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ context = RDF::Graph.new << [uri, RDF::Vocab::DC.title, "Some Title"]
program = Ldpath::Program.parse my_program
output = program.evaluate uri, context: context
-# => { ... }
+# => {"title"=>["Some Title"]}
```
## Compatibility
diff --git a/lib/ldpath/field_mapping.rb b/lib/ldpath/field_mapping.rb
index 9dd6a01..d1dae1f 100644
--- a/lib/ldpath/field_mapping.rb
+++ b/lib/ldpath/field_mapping.rb
@@ -9,13 +9,13 @@ def initialize(name:, selector:, field_type: nil, options: {})
@options = options
end
- def evaluate(program, uri, context)
+ def evaluate(program, uri, context, maintain_literals: false)
case selector
when Ldpath::Selector
- return to_enum(:evaluate, program, uri, context) unless block_given?
+ return to_enum(:evaluate, program, uri, context, maintain_literals: maintain_literals) unless block_given?
- selector.evaluate(program, uri, context).each do |value|
- yield transform_value(value)
+ selector.evaluate(program, uri, context, maintain_literals: maintain_literals).each do |value|
+ yield transform_value(value, maintain_literals: maintain_literals)
end
when RDF::Literal
Array(selector.canonicalize.object)
@@ -26,18 +26,30 @@ def evaluate(program, uri, context)
private
- def transform_value(value)
- v = if value.is_a? RDF::Literal
+ def transform_value(value, maintain_literals: false)
+ v = if value.is_a?(RDF::Literal) && !maintain_literals
value.canonicalize.object
else
value
end
- if field_type
- RDF::Literal.new(v.to_s, datatype: field_type).canonicalize.object
+ if field_type && !same_type(v, field_type)
+ v_literal = RDF::Literal.new(v.to_s, datatype: field_type)
+ maintain_literals ? v_literal : v_literal.canonicalize.object
else
v
end
end
+
+ def same_type(object, field_type)
+ case object
+ when RDF::Literal
+ object.comperable_datatype? field_type
+ when RDF::URI
+ field_type.to_s.end_with? 'anyURI'
+ else
+ false
+ end
+ end
end
end
diff --git a/lib/ldpath/parser.rb b/lib/ldpath/parser.rb
index 13a39ee..4b5e6f2 100644
--- a/lib/ldpath/parser.rb
+++ b/lib/ldpath/parser.rb
@@ -1,3 +1,5 @@
+# Parslet parser for parsing ldpath programs.
+# @see https://kschiess.github.io/parslet/parser.html Parslet arser documentation
require 'parslet'
module Ldpath
diff --git a/lib/ldpath/program.rb b/lib/ldpath/program.rb
index 99a3870..10d7e32 100644
--- a/lib/ldpath/program.rb
+++ b/lib/ldpath/program.rb
@@ -1,14 +1,31 @@
+# Parse and evaluate an ldpath program.
+# @see https://kschiess.github.io/parslet/documentation.html Parslet Documentation
+# @see https://marmotta.apache.org/ldpath/language.html LDPath Language Reference
module Ldpath
class Program
ParseError = Class.new StandardError
class << self
+
+ # Parse ldpath program and apply transforms.
+ # @param program [String] the program to be parsed
+ # @param transform_context [Hash] see parslet documentation for more info
+ # @return [Ldpath::Program] instance of this class that can be evaluated on a graph
def parse(program, transform_context = {})
ast = transform.apply load(program), transform_context
Ldpath::Program.new ast.compact, transform_context
end
+ # Load the ldpath program using the ldpath parser.
+ # @param program [String] ldpath program
+ # @raise [ParseError] exception raised if parse fails
+ # @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result tree
+ # @example ldpath program (see spec/ldpath_program_spec.rb for a more details example program)
+ # @prefix dcterms : ;
+ # title = dcterms:title :: xsd:string ;
+ # parent_title = dcterms:isPartOf / dcterms:title :: xsd:string ;
+ # int_value = [^^xsd:integer] :: xsd:integer ;
def load(program)
parser.parse(program, reporter: Parslet::ErrorReporter::Deepest.new)
rescue Parslet::ParseFailed => e
@@ -36,10 +53,18 @@ def initialize(mappings, default_loader: Ldpath::Loaders::Direct.new, prefixes:
end
- def evaluate(uri, context: nil, limit_to_context: false)
- result = Ldpath::Result.new(self, uri, context: context, limit_to_context: limit_to_context)
+ # Evaluate an ldpath program returning values extracted from the graph and dereferencing the subject
+ # to get additional context unless limit_to_context==false.
+ # @param uri [RDF::URI] subject URI for matching triples from the graph
+ # @param context [RDF::Graph] the graph from which to extract values
+ # @param limit_to_context [Boolean] if true, only draw values from the passed in context; otherwise, will make curl requests to gather additional context
+ # @param maintain_literals [Boolean] if true, will return values that are RDF::Literals as RDF::Literals; otherwise, returns canonicalize form (e.g. String, Integer, etc.)
+ # @return [Array] the extracted values based on the ldpath with values that can be of type RDF::URI, RDF::Literal, String, Integer, etc.,
+ # based on the value in the graph and the value of maintain_literals.
+ def evaluate(uri, context: nil, limit_to_context: false, maintain_literals: false)
+ result = Ldpath::Result.new(self, uri, context: context, limit_to_context: limit_to_context, maintain_literals: maintain_literals)
unless filters.empty?
- return {} unless filters.all? { |f| f.evaluate(result, uri, result.context) }
+ return {} unless filters.all? { |f| f.evaluate(result, uri, result.context, maintain_literals: maintain_literals) }
end
result.to_hash
diff --git a/lib/ldpath/result.rb b/lib/ldpath/result.rb
index 78abb2b..a512251 100644
--- a/lib/ldpath/result.rb
+++ b/lib/ldpath/result.rb
@@ -3,13 +3,14 @@ class Result
include Ldpath::Functions
attr_reader :program, :uri, :cache, :loaded
- def initialize(program, uri, cache: RDF::Util::Cache.new, context: nil, limit_to_context: false)
+ def initialize(program, uri, cache: RDF::Util::Cache.new, context: nil, limit_to_context: false, maintain_literals: false)
@program = program
@uri = uri
@cache = cache
@loaded = {}
@context = context
@limit_to_context = limit_to_context
+ @maintain_literals = maintain_literals
end
def loading(uri, context)
@@ -59,7 +60,7 @@ def meta
private
def evaluate(mapping)
- mapping.evaluate(self, uri, context)
+ mapping.evaluate(self, uri, context, maintain_literals: maintain_literals?)
end
def function_method?(function)
@@ -73,5 +74,9 @@ def mappings
def limit_to_context?
@limit_to_context
end
+
+ def maintain_literals?
+ @maintain_literals
+ end
end
end
diff --git a/lib/ldpath/selectors.rb b/lib/ldpath/selectors.rb
index d511a1f..6c24c10 100644
--- a/lib/ldpath/selectors.rb
+++ b/lib/ldpath/selectors.rb
@@ -1,7 +1,7 @@
module Ldpath
class Selector
- def evaluate(program, uris, context)
- return to_enum(:evaluate, program, uris, context) unless block_given?
+ def evaluate(program, uris, context, maintain_literals: false)
+ return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given?
enum_wrap(uris).map do |uri|
loading program, uri, context
enum_flatten_one(evaluate_one(uri, context)).each do |x|
@@ -55,15 +55,15 @@ def initialize(fname, arguments = [])
@arguments = Array(arguments)
end
- def evaluate(program, uris, context)
- return to_enum(:evaluate, program, uris, context) unless block_given?
+ def evaluate(program, uris, context, maintain_literals: false)
+ return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given?
enum_wrap(uris).map do |uri|
loading program, uri, context
args = arguments.map do |i|
case i
when Selector
- i.evaluate(program, uri, context)
+ i.evaluate(program, uri, context, maintain_literals: maintain_literals)
else
i
end
@@ -138,14 +138,14 @@ def initialize(property, repeat)
@repeat = repeat
end
- def evaluate(program, uris, context)
- return to_enum(:evaluate, program, uris, context) unless block_given?
+ def evaluate(program, uris, context, maintain_literals: false)
+ return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given?
input = enum_wrap(uris)
(0..repeat.max).each_with_index do |i, idx|
break if input.none? || (repeat.max == Ldpath::Transform::Infinity && idx > 25) # we're probably lost..
- input = property.evaluate program, input, context
+ input = property.evaluate program, input, context, maintain_literals: maintain_literals
next unless idx >= repeat.min
@@ -165,19 +165,20 @@ def initialize(left, right)
end
class PathSelector < CompoundSelector
- def evaluate(program, uris, context, &block)
- return to_enum(:evaluate, program, uris, context) unless block_given?
+ def evaluate(program, uris, context, maintain_literals: false, &block)
+ return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given?
- output = left.evaluate(program, uris, context)
- right.evaluate(program, output, context, &block)
+ output = left.evaluate(program, uris, context, maintain_literals: maintain_literals)
+ right.evaluate(program, output, context, maintain_literals: maintain_literals, &block)
end
end
class UnionSelector < CompoundSelector
- def evaluate(program, uris, context)
- return to_enum(:evaluate, program, uris, context) unless block_given?
+ def evaluate(program, uris, context, maintain_literals: false)
+ return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given?
- enum_union(left.evaluate(program, uris, context), right.evaluate(program, uris, context)).each do |x|
+ enum_union(left.evaluate(program, uris, context, maintain_literals: maintain_literals),
+ right.evaluate(program, uris, context, maintain_literals: maintain_literals)).each do |x|
yield x
end
end
@@ -198,10 +199,11 @@ def enum_union(left, right)
end
class IntersectionSelector < CompoundSelector
- def evaluate(program, uris, context)
- return to_enum(:evaluate, program, uris, context) unless block_given?
+ def evaluate(program, uris, context, maintain_literals: false)
+ return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given?
- result = left.evaluate(program, uris, context).to_a & right.evaluate(program, uris, context).to_a
+ result = left.evaluate(program, uris, context, maintain_literals: maintain_literals).to_a &
+ right.evaluate(program, uris, context, maintain_literals: maintain_literals).to_a
result.each do |x|
yield x
@@ -216,10 +218,11 @@ def initialize(identifier, tap)
@tap = tap
end
- def evaluate(program, uris, context)
- return to_enum(:evaluate, program, uris, context) unless block_given?
+ def evaluate(program, uris, context, maintain_literals: false)
+ return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given?
- program.meta[identifier] = tap.evaluate(program, uris, context).map { |x| RDF::Literal.new(x.to_s).canonicalize.object }
+ program.meta[identifier] = tap.evaluate(program, uris, context, maintain_literals: maintain_literals)
+ .map { |x| RDF::Literal.new(x.to_s).canonicalize.object }
enum_wrap(uris).map do |uri|
loading program, uri, context
diff --git a/lib/ldpath/tests.rb b/lib/ldpath/tests.rb
index 48c420d..05ec16b 100644
--- a/lib/ldpath/tests.rb
+++ b/lib/ldpath/tests.rb
@@ -7,12 +7,12 @@ def initialize(delegate, test)
@test = test
end
- def evaluate(program, uris, context)
- return to_enum(:evaluate, program, uris, context) unless block_given?
+ def evaluate(program, uris, context, maintain_literals: false)
+ return to_enum(:evaluate, program, uris, context, maintain_literals: maintain_literals) unless block_given?
- entries = delegate.evaluate program, uris, context
+ entries = delegate.evaluate program, uris, context, maintain_literals: maintain_literals
entries.select do |uri|
- result = enum_wrap(test.evaluate(program, uri, context)).any? do |x|
+ result = enum_wrap(test.evaluate(program, uri, context, maintain_literals: maintain_literals)).any? do |x|
x
end
yield uri if result
@@ -26,7 +26,7 @@ def initialize(lang)
@lang = lang
end
- def evaluate(_program, uri, _context)
+ def evaluate(_program, uri, _context, maintain_literals: false)
return unless uri.literal?
uri if (lang.to_s == "none" && !uri.has_language?) || uri.language.to_s == lang.to_s
@@ -39,7 +39,7 @@ def initialize(type)
@type = type
end
- def evaluate(program, uri, _context)
+ def evaluate(program, uri, _context, maintain_literals: false)
return unless uri.literal?
uri if uri.has_datatype? && uri.datatype == type
@@ -53,8 +53,8 @@ def initialize(delegate)
@delegate = delegate
end
- def evaluate(program, uri, context)
- !enum_wrap(delegate.evaluate(program, uri, context)).any? { |x| x }
+ def evaluate(program, uri, context, maintain_literals: false)
+ !enum_wrap(delegate.evaluate(program, uri, context, maintain_literals: maintain_literals)).any? { |x| x }
end
end
@@ -66,8 +66,9 @@ def initialize(left, right)
@right = right
end
- def evaluate(program, uri, context)
- left.evaluate(program, uri, context).any? || right.evaluate(program, uri, context).any?
+ def evaluate(program, uri, context, maintain_literals: false)
+ left.evaluate(program, uri, context, maintain_literals: maintain_literals).any? ||
+ right.evaluate(program, uri, context, maintain_literals: maintain_literals).any?
end
end
@@ -79,9 +80,9 @@ def initialize(left, right)
@right = right
end
- def evaluate(program, uri, context)
- left.evaluate(program, uri, context).any? &&
- right.evaluate(program, uri, context).any?
+ def evaluate(program, uri, context, maintain_literals: false)
+ left.evaluate(program, uri, context, maintain_literals: maintain_literals).any? &&
+ right.evaluate(program, uri, context, maintain_literals: maintain_literals).any?
end
end
@@ -93,8 +94,8 @@ def initialize(left, right)
@right = right
end
- def evaluate(program, uri, context)
- left.evaluate(program, uri, context).include?(right)
+ def evaluate(program, uri, context, maintain_literals: false)
+ left.evaluate(program, uri, context, maintain_literals: maintain_literals).include?(right)
end
end
end
diff --git a/lib/ldpath/transform.rb b/lib/ldpath/transform.rb
index 216fc59..f0b6eb3 100644
--- a/lib/ldpath/transform.rb
+++ b/lib/ldpath/transform.rb
@@ -1,8 +1,11 @@
+# Support Parslet Hash transforms.
+# @see https://kschiess.github.io/parslet/transform.html Parslet transform documentation
module Ldpath
class Transform < Parslet::Transform
attr_reader :prefixes
class << self
+ # Default set of prefixes that can be used in an ldpath program without defining.
def default_prefixes
@default_prefixes ||= {
"rdf" => RDF::Vocabulary.new("http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
@@ -20,6 +23,14 @@ def default_prefixes
end
end
+ # Applies transformations to a tree that is generated by Parslet::Parser
+ # or a simple parslet. Transformation will proceed down the tree, replacing
+ # parts/all of it with new objects. The resulting object will be returned.
+ # @param obj [Object] Plain Old Ruby Object (PORO) Abstract Syntax Tree (ast) to transform
+ # @param context [] start context to inject into the bindings.
+ # @return object from the resulting transformations
+ # @see https://kschiess.github.io/parslet/transform.html Parslet transform documentation for more information on parameters and processing
+ # @see https://en.wikipedia.org/wiki/Abstract_syntax_tree Abstract syntax tree description
def apply(obj, context = nil)
context ||= {}
context[:filters] ||= []
diff --git a/spec/ldpath_program_spec.rb b/spec/ldpath_program_spec.rb
index 1e059c0..9a8f1e0 100644
--- a/spec/ldpath_program_spec.rb
+++ b/spec/ldpath_program_spec.rb
@@ -11,7 +11,8 @@
titles = dcterms:title | (dcterms:isPartOf / dcterms:title) | (^dcterms:isPartOf / dcterms:title) :: xsd:string ;
no_titles = dcterms:title & (dcterms:isPartOf / dcterms:title) & (^dcterms:isPartOf / dcterms:title) :: xsd:string ;
self = . :: xsd:string ;
-wildcard = * ::xsd:string ;
+str_wildcard = * ::xsd:string ;
+uri_wildcard = * ::xsd:anyURI ;
child_title = ^dcterms:isPartOf / dcterms:title :: xsd:string ;
child_description_en = ^dcterms:isPartOf / dcterms:description[@en] :: xsd:string ;
recursive = (dcterms:isPartOf)* ;
@@ -57,7 +58,8 @@
expect(result["parent_title"]).to match_array ["Parent title", "Parent English!", "Parent French!"]
expect(result["parent_title_en"]).to match_array "Parent English!"
expect(result["self"]).to match_array(object)
- expect(result["wildcard"]).to include "Hello, world!", parent
+ expect(result["str_wildcard"]).to include "Hello, world!", parent
+ expect(result["uri_wildcard"]).to include parent
expect(result["child_title"]).to match_array "Child title"
expect(result["titles"]).to match_array ["Hello, world!", "Parent title", "Child title", "Parent English!", "Parent French!"]
expect(result["no_titles"]).to be_empty
@@ -73,6 +75,54 @@
expect(result["is_test"]).to match_array(object)
expect(result["is_not_test"]).to be_empty
end
+
+ context "when requesting literals" do
+ let(:title) { RDF::Literal.new("Hello, world!") }
+ let(:parent_title) { RDF::Literal.new("Parent title") }
+ let(:child_title) { RDF::Literal.new("Child title") }
+ let(:en_description) { RDF::Literal.new("English!", language: "en") }
+ let(:fr_description) { RDF::Literal.new("French!", language: "fr") }
+ let(:en_parent_title) { RDF::Literal.new("Parent English!", language: "en") }
+ let(:fr_parent_title) { RDF::Literal.new("Parent French!", language: "fr") }
+
+ it "should return literals" do
+ graph << [object, RDF::Vocab::DC.title, title.canonicalize.object]
+ graph << [object, RDF::Vocab::DC.isPartOf, parent]
+ graph << [object, RDF::Vocab::DC.description, en_description]
+ graph << [object, RDF::Vocab::DC.description, fr_description]
+ graph << [object, RDF::URI.new("info:intProperty"), 1]
+ graph << [object, RDF::URI.new("info:intProperty"), "garbage"]
+ graph << [object, RDF::URI.new("info:numericProperty"), "1"]
+ graph << [parent, RDF::Vocab::DC.title, parent_title.canonicalize.object]
+ graph << [child, RDF::Vocab::DC.isPartOf, object]
+ graph << [child, RDF::Vocab::DC.title, child_title.canonicalize.object]
+ graph << [parent, RDF::Vocab::DC.title, en_parent_title]
+ graph << [parent, RDF::Vocab::DC.title, fr_parent_title]
+ graph << [parent, RDF::Vocab::DC.isPartOf, grandparent]
+
+ result = subject.evaluate object, context: graph, maintain_literals: true
+ expect(result["title"]).to match_array RDF::Literal.new("Hello, world!")
+ expect(result["parent_title"]).to match_array [parent_title, en_parent_title, fr_parent_title]
+ expect(result["parent_title_en"]).to match_array en_parent_title
+ expect(result["self"]).to match_array(object.to_s)
+ expect(result["str_wildcard"]).to include title, parent.to_s
+ expect(result["uri_wildcard"]).to include parent
+ expect(result["child_title"]).to match_array child_title
+ expect(result["titles"]).to match_array [title, parent_title, child_title, en_parent_title, fr_parent_title]
+ expect(result["no_titles"]).to be_empty
+ expect(result["recursive"]).to match_array [parent, grandparent]
+ expect(result["en_description"].first.to_s).to eq "English!"
+ expect(result["conditional"]).to match_array parent
+ expect(result["conditional_false"]).to be_empty
+ expect(result["int_value"]).to match_array RDF::Literal::Integer.new("1")
+ expect(result["numeric_value"]).to match_array RDF::Literal::Integer.new("1")
+ expect(result["escaped_string"]).to match_array '\"'
+ expect(result["and_test"]).to be_empty
+ expect(result["or_test"]).to match_array(object)
+ expect(result["is_test"]).to match_array(object)
+ expect(result["is_not_test"]).to be_empty
+ end
+ end
end
describe "functions" do