From 3db999c085c828b3f4b27cade6d14a60c644c2bd Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 26 Jul 2024 12:37:19 -0300 Subject: [PATCH] GH-5090 Lucene 9 version of the Lucene SAIL - Copied the Lucene SAIL implementation and upgraded it to Lucene 9 Signed-off-by: Richard Eckart de Castilho --- core/sail/lucene-v9/pom.xml | 85 ++ .../sail/lucene/impl/LuceneDocument.java | 199 +++ .../lucene/impl/LuceneDocumentDistance.java | 67 + .../lucene/impl/LuceneDocumentResult.java | 44 + .../sail/lucene/impl/LuceneDocumentScore.java | 55 + .../rdf4j/sail/lucene/impl/LuceneIndex.java | 1126 +++++++++++++++++ .../sail/lucene/impl/LuceneIndexNIOFS.java | 38 + .../rdf4j/sail/lucene/impl/LuceneQuery.java | 75 ++ .../rdf4j/sail/lucene/impl/ReaderMonitor.java | 75 ++ .../lucene/impl/config/LuceneSailConfig.java | 33 + .../lucene/impl/config/LuceneSailFactory.java | 65 + .../org.eclipse.rdf4j.sail.config.SailFactory | 1 + .../lucene/LuceneFuzzinessPrefixTest.java | 142 +++ .../lucene/LuceneIndexIdFilteringTest.java | 437 +++++++ .../rdf4j/sail/lucene/MultiParamTest.java | 467 +++++++ .../rdf4j/sail/lucene/TypeSpecTest.java | 491 +++++++ .../lucene/examples/LuceneSailExample.java | 149 +++ .../impl/AbstractGenericLuceneTest.java | 849 +++++++++++++ .../lucene/impl/LuceneIndexLocationTest.java | 112 ++ .../sail/lucene/impl/LuceneIndexTest.java | 573 +++++++++ .../sail/lucene/impl/LuceneSailTest.java | 28 + .../src/test/resources/logback-test.xml | 12 + core/sail/pom.xml | 1 + 23 files changed, 5124 insertions(+) create mode 100644 core/sail/lucene-v9/pom.xml create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocument.java create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentDistance.java create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentResult.java create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentScore.java create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndex.java create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexNIOFS.java create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneQuery.java create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/ReaderMonitor.java create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/config/LuceneSailConfig.java create mode 100644 core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/config/LuceneSailFactory.java create mode 100644 core/sail/lucene-v9/src/main/resources/META-INF/services/org.eclipse.rdf4j.sail.config.SailFactory create mode 100644 core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/LuceneFuzzinessPrefixTest.java create mode 100644 core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/LuceneIndexIdFilteringTest.java create mode 100644 core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/MultiParamTest.java create mode 100644 core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/TypeSpecTest.java create mode 100644 core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/examples/LuceneSailExample.java create mode 100644 core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/AbstractGenericLuceneTest.java create mode 100644 core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexLocationTest.java create mode 100644 core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexTest.java create mode 100644 core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneSailTest.java create mode 100644 core/sail/lucene-v9/src/test/resources/logback-test.xml diff --git a/core/sail/lucene-v9/pom.xml b/core/sail/lucene-v9/pom.xml new file mode 100644 index 00000000000..66bcd71be1a --- /dev/null +++ b/core/sail/lucene-v9/pom.xml @@ -0,0 +1,85 @@ + + + 4.0.0 + + org.eclipse.rdf4j + rdf4j-sail + 5.0.2-SNAPSHOT + + rdf4j-sail-lucene-v9 + RDF4J: Lucene Sail Index + StackableSail implementation offering full-text search on literals, based on Apache Lucene. + + 9.11.1 + + + + ${project.groupId} + rdf4j-sail-lucene-api + ${project.version} + + + org.apache.lucene + lucene-core + ${lucene.version} + + + org.apache.lucene + lucene-queries + ${lucene.version} + + + org.apache.lucene + lucene-highlighter + ${lucene.version} + + + org.apache.lucene + lucene-analysis-common + ${lucene.version} + + + org.apache.lucene + lucene-queryparser + ${lucene.version} + + + org.apache.lucene + lucene-spatial-extras + ${lucene.version} + + + org.apache.lucene + lucene-backward-codecs + ${lucene.version} + runtime + + + ${project.groupId} + rdf4j-queryalgebra-geosparql + ${project.version} + test + + + ${project.groupId} + rdf4j-rio-rdfxml + ${project.version} + test + + + ${project.groupId} + rdf4j-sail-memory + ${project.version} + test + + + org.locationtech.jts + jts-core + + + org.junit.vintage + junit-vintage-engine + test + + + diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocument.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocument.java new file mode 100644 index 00000000000..ff30f5b6e24 --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocument.java @@ -0,0 +1,199 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import java.io.IOException; +import java.text.ParseException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LatLonPoint; +import org.apache.lucene.document.LatLonShape; +import org.apache.lucene.geo.Line; +import org.apache.lucene.geo.Polygon; +import org.apache.lucene.geo.Rectangle; +import org.apache.lucene.geo.SimpleWKTShapeParser; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.sandbox.document.LatLonBoundingBox; +import org.apache.lucene.spatial.SpatialStrategy; +import org.eclipse.rdf4j.sail.lucene.LuceneSail; +import org.eclipse.rdf4j.sail.lucene.SearchDocument; +import org.eclipse.rdf4j.sail.lucene.SearchFields; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Function; + +public class LuceneDocument implements SearchDocument { + + private final Document doc; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private static final String POINT_FIELD_PREFIX = "_pt_"; + private static final String GEO_FIELD_PREFIX = "_geo_"; + + private final Function geoStrategyMapper; + + public LuceneDocument(Function geoStrategyMapper) { + this(new Document(), geoStrategyMapper); + } + + public LuceneDocument(Document doc, Function geoStrategyMapper) { + this.doc = doc; + this.geoStrategyMapper = geoStrategyMapper; + } + + public LuceneDocument(String id, String resourceId, String context, + Function geoStrategyMapper) { + this(geoStrategyMapper); + setId(id); + setResource(resourceId); + setContext(context); + } + + private void setId(String id) { + LuceneIndex.addIDField(id, doc); + } + + private void setContext(String context) { + LuceneIndex.addContextField(context, doc); + } + + private void setResource(String resourceId) { + LuceneIndex.addResourceField(resourceId, doc); + } + + public Document getDocument() { + return doc; + } + + @Override + public String getId() { + return doc.get(SearchFields.ID_FIELD_NAME); + } + + @Override + public String getResource() { + return doc.get(SearchFields.URI_FIELD_NAME); + } + + @Override + public String getContext() { + return doc.get(SearchFields.CONTEXT_FIELD_NAME); + } + + @Override + public Set getPropertyNames() { + List fields = doc.getFields(); + Set names = new HashSet<>(); + for (IndexableField field : fields) { + String name = field.name(); + if (SearchFields.isPropertyField(name)) { + names.add(name); + } + } + return names; + } + + @Override + public void addProperty(String name) { + // don't need to do anything + } + + /** + * Stores and indexes a property in a Document. We don't have to recalculate the concatenated text: just add another + * TEXT field and Lucene will take care of this. Additional advantage: Lucene may be able to handle the invididual + * strings in a way that may affect e.g. phrase and proximity searches (concatenation basically means loss of + * information). NOTE: The TEXT_FIELD_NAME has to be stored, see in LuceneSail + * + * @see LuceneSail + */ + @Override + public void addProperty(String name, String text) { + LuceneIndex.addPredicateField(name, text, doc); + LuceneIndex.addTextField(text, doc); + } + + /** + * Checks whether a field occurs with a specified value in a Document. + */ + @Override + public boolean hasProperty(String fieldName, String value) { + String[] fields = doc.getValues(fieldName); + if (fields != null) { + for (String field : fields) { + if (value.equals(field)) { + return true; + } + } + } + + return false; + } + + @Override + public List getProperty(String name) { + return Arrays.asList(doc.getValues(name)); + } + + private void indexShape(Object shape, String field) { + + if (shape instanceof Object[]) { // case of GEOMETRYCOLLECTION + Object[] geometries = (Object[]) shape; + + for (int i = 0; i < geometries.length; i++) { + indexShape(geometries[i], field); + } + } else { + if (shape instanceof Polygon) { // WKT:POLYGON + for (Field f : LatLonShape.createIndexableFields(GEO_FIELD_PREFIX + field, (Polygon) shape)) { + doc.add(f); + } + } else if (shape instanceof Line) { // WKT:LINESTRING + for (Field f : LatLonShape.createIndexableFields(GEO_FIELD_PREFIX + field, (Line) shape)) { + doc.add(f); + } + } else if (shape instanceof double[]) { // WKT:POINT + double[] point = (double[]) shape; + + for (Field f : LatLonShape.createIndexableFields(GEO_FIELD_PREFIX + field, point[1], + point[0])) { + doc.add(f); + } + doc.add(new LatLonPoint(POINT_FIELD_PREFIX + field, point[1], point[0])); + } else if (shape instanceof Rectangle) { // WKT:ENVELOPE / RECTANGLE + Rectangle box = (Rectangle) shape; + doc.add(new LatLonBoundingBox(GEO_FIELD_PREFIX + field, box.minLat, box.minLon, box.maxLat, + box.maxLon)); + } else { + throw new IllegalArgumentException("Geometry for shape " + shape.toString() + " is not supported"); + } + } + } + + @Override + public void addGeoProperty(String field, String value) { + LuceneIndex.addStoredOnlyPredicateField(field, value, doc); + try { + String wkt = value; + Object shape = SimpleWKTShapeParser.parse(wkt); + indexShape(shape, field); + } catch (ParseException e) { + logger.warn("error while processing geo property", e); + } catch (IOException e) { + logger.warn("error while parsing wkt geometry", e); + } + } +} diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentDistance.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentDistance.java new file mode 100644 index 00000000000..1849d7a9b61 --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentDistance.java @@ -0,0 +1,67 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import java.text.ParseException; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.search.ScoreDoc; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.sail.lucene.DocumentDistance; +import org.eclipse.rdf4j.sail.lucene.SearchFields; +import org.eclipse.rdf4j.sail.lucene.util.GeoUnits; +import org.locationtech.spatial4j.shape.Point; +import org.locationtech.spatial4j.shape.Shape; + +import com.google.common.collect.Sets; + +public class LuceneDocumentDistance extends LuceneDocumentResult implements DocumentDistance { + + private final String geoProperty; + + private final IRI units; + + private final Point origin; + + private static Set requiredFields(String geoProperty, boolean includeContext) { + Set fields = Sets.newHashSet(SearchFields.URI_FIELD_NAME, geoProperty); + if (includeContext) { + fields.add(SearchFields.CONTEXT_FIELD_NAME); + } + return fields; + } + + public LuceneDocumentDistance(ScoreDoc doc, String geoProperty, IRI units, Point origin, boolean includeContext, + LuceneIndex index) { + super(doc, index, requiredFields(geoProperty, includeContext)); + this.geoProperty = geoProperty; + this.units = units; + this.origin = origin; + } + + @Override + public double getDistance() { + List wkts = getDocument().getProperty(geoProperty); + double min = Double.POSITIVE_INFINITY; + for (String wkt : wkts) { + Shape shape; + try { + shape = index.getSpatialContext(geoProperty).readShapeFromWkt(wkt); + double dist = index.getSpatialContext(geoProperty).calcDistance(shape.getCenter(), origin); + min = Math.min(dist, min); + } catch (ParseException e) { + // ignore + } + } + return GeoUnits.fromDegrees(min, units); + } +} diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentResult.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentResult.java new file mode 100644 index 00000000000..76ee809bc8d --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentResult.java @@ -0,0 +1,44 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import java.util.Set; + +import org.apache.lucene.document.Document; +import org.apache.lucene.search.ScoreDoc; +import org.eclipse.rdf4j.sail.lucene.DocumentResult; +import org.eclipse.rdf4j.sail.lucene.SearchDocument; + +public class LuceneDocumentResult implements DocumentResult { + + protected final ScoreDoc scoreDoc; + + protected final LuceneIndex index; + + private final Set fields; + + private LuceneDocument fullDoc; + + public LuceneDocumentResult(ScoreDoc doc, LuceneIndex index, Set fields) { + this.scoreDoc = doc; + this.index = index; + this.fields = fields; + } + + @Override + public SearchDocument getDocument() { + if (fullDoc == null) { + Document doc = index.getDocument(scoreDoc.doc, fields); + fullDoc = new LuceneDocument(doc, index.getSpatialStrategyMapper()); + } + return fullDoc; + } +} diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentScore.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentScore.java new file mode 100644 index 00000000000..ec69beb9f11 --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneDocumentScore.java @@ -0,0 +1,55 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.highlight.Highlighter; +import org.eclipse.rdf4j.sail.lucene.DocumentScore; +import org.eclipse.rdf4j.sail.lucene.SearchFields; + +import com.google.common.collect.Iterables; + +public class LuceneDocumentScore extends LuceneDocumentResult implements DocumentScore { + + private final Highlighter highlighter; + + private static Set requiredFields(boolean all) { + return all ? null : Collections.singleton(SearchFields.URI_FIELD_NAME); + } + + public LuceneDocumentScore(ScoreDoc doc, Highlighter highlighter, LuceneIndex index) { + super(doc, index, requiredFields(highlighter != null)); + this.highlighter = highlighter; + } + + @Override + public float getScore() { + return scoreDoc.score; + } + + @Override + public boolean isHighlighted() { + return (highlighter != null); + } + + @Override + public Iterable getSnippets(final String field) { + List values = getDocument().getProperty(field); + if (values == null) { + return null; + } + return Iterables.transform(values, (String text) -> index.getSnippet(field, text, highlighter)); + } +} diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndex.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndex.java new file mode 100644 index 00000000000..dacb1c02cab --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndex.java @@ -0,0 +1,1126 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import static org.eclipse.rdf4j.sail.lucene.LuceneSail.FUZZY_PREFIX_LENGTH_KEY; + +import java.io.IOException; +import java.io.StringReader; +import java.lang.reflect.UndeclaredThrowableException; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; + +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.LatLonPoint; +import org.apache.lucene.document.LatLonShape; +import org.apache.lucene.document.ShapeField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.geo.Line; +import org.apache.lucene.geo.Polygon; +import org.apache.lucene.geo.Rectangle; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.similarities.ClassicSimilarity; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory; +import org.apache.lucene.spatial.query.SpatialOperation; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Bits; +import org.eclipse.rdf4j.common.iterator.EmptyIterator; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.vocabulary.GEOF; +import org.eclipse.rdf4j.query.MalformedQueryException; +import org.eclipse.rdf4j.query.algebra.Var; +import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.lucene.AbstractLuceneIndex; +import org.eclipse.rdf4j.sail.lucene.AbstractReaderMonitor; +import org.eclipse.rdf4j.sail.lucene.BulkUpdater; +import org.eclipse.rdf4j.sail.lucene.DocumentDistance; +import org.eclipse.rdf4j.sail.lucene.DocumentResult; +import org.eclipse.rdf4j.sail.lucene.DocumentScore; +import org.eclipse.rdf4j.sail.lucene.LuceneSail; +import org.eclipse.rdf4j.sail.lucene.QuerySpec; +import org.eclipse.rdf4j.sail.lucene.SearchDocument; +import org.eclipse.rdf4j.sail.lucene.SearchFields; +import org.eclipse.rdf4j.sail.lucene.SimpleBulkUpdater; +import org.eclipse.rdf4j.sail.lucene.util.GeoUnits; +import org.locationtech.spatial4j.context.SpatialContext; +import org.locationtech.spatial4j.context.SpatialContextFactory; +import org.locationtech.spatial4j.shape.Point; +import org.locationtech.spatial4j.shape.Shape; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Function; +import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; + +/** + * A LuceneIndex is a one-stop-shop abstraction of a Lucene index. It takes care of proper synchronization of + * IndexReaders, IndexWriters and IndexSearchers in a way that is suitable for a LuceneSail. + * + * @see LuceneSail + */ +public class LuceneIndex extends AbstractLuceneIndex { + + static { + // do NOT set this to Integer.MAX_VALUE, because this breaks fuzzy + // queries + BooleanQuery.setMaxClauseCount(1024 * 1024); + } + + private static final String GEO_FIELD_PREFIX = "_geo_"; + private static final String POINT_FIELD_PREFIX = "_pt_"; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + /** + * The Directory that holds the Lucene index files. + */ + private volatile Directory directory; + + /** + * The Analyzer used to tokenize strings and queries. + */ + private volatile Analyzer analyzer; + + private volatile Analyzer queryAnalyzer; + + private volatile Similarity similarity; + + private volatile int fuzzyPrefixLength; + + /** + * The IndexWriter that can be used to alter the index' contents. Created lazily. + */ + private volatile IndexWriter indexWriter; + + /** + * This holds IndexReader and IndexSearcher. + */ + protected volatile ReaderMonitor currentMonitor; + + private volatile Function geoStrategyMapper; + + private final AtomicBoolean closed = new AtomicBoolean(false); + + public LuceneIndex() { + } + + /** + * Constructor for keeping backwards compatibility. + * + * @param directory + * @param analyzer + * @throws IOException + */ + public LuceneIndex(Directory directory, Analyzer analyzer) throws IOException { + this(directory, analyzer, new ClassicSimilarity()); + } + + /** + * Creates a new LuceneIndex. + * + * @param directory The Directory in which an index can be found and/or in which index files are written. + * @param analyzer The Analyzer that will be used for tokenizing strings to index and queries. + * @param similarity The Similarity that will be used for scoring. + * @throws IOException When the Directory could not be unlocked. + */ + public LuceneIndex(Directory directory, Analyzer analyzer, Similarity similarity) throws IOException { + this.directory = directory; + this.analyzer = analyzer; + this.queryAnalyzer = analyzer; + this.similarity = similarity; + this.geoStrategyMapper = createSpatialStrategyMapper(Collections.emptyMap()); + + postInit(); + } + + @SuppressWarnings("unchecked") + @Override + public synchronized void initialize(Properties parameters) throws Exception { + super.initialize(parameters); + this.directory = createDirectory(parameters); + this.analyzer = createAnalyzer(parameters); + this.queryAnalyzer = createQueryAnalyzer(parameters); + this.similarity = createSimilarity(parameters); + // slightly hacky cast to cope with the fact that Properties is + // Map + // even though it is effectively Map + this.geoStrategyMapper = createSpatialStrategyMapper((Map) (Map) parameters); + + if (parameters.containsKey(FUZZY_PREFIX_LENGTH_KEY)) { + this.fuzzyPrefixLength = NumberUtils.toInt(parameters.getProperty(FUZZY_PREFIX_LENGTH_KEY), 0); + } + + postInit(); + } + + protected Directory createDirectory(Properties parameters) throws IOException { + Directory dir; + if (parameters.containsKey(LuceneSail.LUCENE_DIR_KEY)) { + dir = FSDirectory.open(Paths.get(parameters.getProperty(LuceneSail.LUCENE_DIR_KEY))); + } else if (parameters.containsKey(LuceneSail.LUCENE_RAMDIR_KEY) + && "true".equals(parameters.getProperty(LuceneSail.LUCENE_RAMDIR_KEY))) { + dir = new ByteBuffersDirectory(); + } else { + throw new IOException("No luceneIndex set, and no '" + LuceneSail.LUCENE_DIR_KEY + "' or '" + + LuceneSail.LUCENE_RAMDIR_KEY + "' parameter given. "); + } + return dir; + } + + protected Analyzer createAnalyzer(Properties parameters) throws Exception { + return createAnalyzerWithFallback(parameters, LuceneSail.ANALYZER_CLASS_KEY, StandardAnalyzer::new); + } + + protected Analyzer createQueryAnalyzer(Properties parameters) throws Exception { + return createAnalyzerWithFallback(parameters, LuceneSail.QUERY_ANALYZER_CLASS_KEY, StandardAnalyzer::new); + } + + private Analyzer createAnalyzerWithFallback(Properties parameters, String parameterKey, Supplier fallback) + throws Exception { + Analyzer a; + if (parameters.containsKey(parameterKey)) { + a = (Analyzer) Class.forName(parameters.getProperty(LuceneSail.ANALYZER_CLASS_KEY)) + .getDeclaredConstructor() + .newInstance(); + } else { + a = fallback.get(); + } + return a; + } + + protected Similarity createSimilarity(Properties parameters) throws Exception { + Similarity s; + if (parameters.containsKey(LuceneSail.SIMILARITY_CLASS_KEY)) { + s = (Similarity) Class.forName(parameters.getProperty(LuceneSail.SIMILARITY_CLASS_KEY)).newInstance(); + } else { + s = new ClassicSimilarity(); + } + return s; + } + + private void postInit() throws IOException { + // do some initialization for new indices + if (!DirectoryReader.indexExists(directory)) { + logger.debug("creating new Lucene index in directory {}", directory); + IndexWriterConfig indexWriterConfig = getIndexWriterConfig(); + indexWriterConfig.setOpenMode(OpenMode.CREATE); + IndexWriter writer = new IndexWriter(directory, indexWriterConfig); + writer.close(); + } + } + + protected Function createSpatialStrategyMapper(Map parameters) { + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + SpatialContext geoContext = SpatialContextFactory.makeSpatialContext(parameters, classLoader); + final SpatialPrefixTree spt = SpatialPrefixTreeFactory.makeSPT(parameters, classLoader, geoContext); + return (String field) -> new RecursivePrefixTreeStrategy(spt, GEO_FIELD_PREFIX + field); + } + + @Override + protected SpatialContext getSpatialContext(String property) { + return geoStrategyMapper.apply(property).getSpatialContext(); + } + + // //////////////////////////////// Setters and getters + + public Directory getDirectory() { + return directory; + } + + public Analyzer getAnalyzer() { + return analyzer; + } + + @VisibleForTesting + Analyzer getQueryAnalyzer() { + return analyzer; + } + + public Function getSpatialStrategyMapper() { + return geoStrategyMapper; + } + + // //////////////////////////////// Methods for controlled index access + // For quick'n'easy access to reader, the indexreader is returned directly + // result LuceneQueryIterators use the more elaborate + // ReaderMonitor directly to be able to close the reader when they + // are done. + + public synchronized IndexReader getIndexReader() throws IOException { + if (closed.get()) { + throw new SailException("Index has been closed"); + } + return getIndexSearcher().getIndexReader(); + } + + public synchronized IndexSearcher getIndexSearcher() throws IOException { + if (closed.get()) { + throw new SailException("Index has been closed"); + } + IndexSearcher indexSearcher = getCurrentMonitor().getIndexSearcher(); + indexSearcher.setSimilarity(similarity); + return indexSearcher; + } + + /** + * Current monitor holds instance of IndexReader and IndexSearcher It is used to keep track of readers + */ + @Override + public synchronized ReaderMonitor getCurrentMonitor() { + if (closed.get()) { + throw new SailException("Index has been closed"); + } + if (currentMonitor == null) { + currentMonitor = new ReaderMonitor(this, directory); + } + return currentMonitor; + } + + public synchronized IndexWriter getIndexWriter() throws IOException { + if (closed.get()) { + throw new SailException("Index has been closed"); + } + if (indexWriter == null || !indexWriter.isOpen()) { + IndexWriterConfig indexWriterConfig = getIndexWriterConfig(); + indexWriter = new IndexWriter(directory, indexWriterConfig); + } + return indexWriter; + } + + @Override + public void shutDown() throws IOException { + // try-finally setup ensures that closing of an instance is not skipped + // when an earlier instance resulted in an IOException + // FIXME: is there a more elegant way to ensure this? + if (closed.compareAndSet(false, true)) { + try { + // This close oldMonitors which hold InderReader and + // IndexSeracher + // Monitor close IndexReader and IndexSearcher + ReaderMonitor toCloseCurrentMonitor = currentMonitor; + currentMonitor = null; + if (toCloseCurrentMonitor != null) { + toCloseCurrentMonitor.close(); + } + } finally { + List exceptions = new ArrayList<>(); + try { + synchronized (oldmonitors) { + if (!oldmonitors.isEmpty()) { + logger.warn( + "LuceneSail: On shutdown {} IndexReaders were not closed. This is due to non-closed Query Iterators, which must be closed!", + oldmonitors.size()); + } + for (AbstractReaderMonitor monitor : oldmonitors) { + try { + monitor.close(); + } catch (Throwable e) { + exceptions.add(e); + } + } + oldmonitors.clear(); + } + } finally { + try { + IndexWriter toCloseIndexWriter = indexWriter; + indexWriter = null; + if (toCloseIndexWriter != null) { + toCloseIndexWriter.close(); + } + } finally { + if (!exceptions.isEmpty()) { + throw new UndeclaredThrowableException(exceptions.get(0)); + } + } + } + } + } + } + + // //////////////////////////////// Methods for updating the index + + @Override + protected synchronized SearchDocument getDocument(String id) throws IOException { + Document document = getDocument(idTerm(id)); + return (document != null) ? new LuceneDocument(document, geoStrategyMapper) : null; + } + + @Override + protected synchronized Iterable getDocuments(String resourceId) throws IOException { + List docs = getDocuments(new Term(SearchFields.URI_FIELD_NAME, resourceId)); + return Iterables.transform(docs, (Document doc) -> new LuceneDocument(doc, geoStrategyMapper)); + } + + @Override + protected synchronized SearchDocument newDocument(String id, String resourceId, String context) { + return new LuceneDocument(id, resourceId, context, geoStrategyMapper); + } + + @Override + protected synchronized SearchDocument copyDocument(SearchDocument doc) { + Document document = ((LuceneDocument) doc).getDocument(); + Document newDocument = new Document(); + + // add all existing fields (including id, uri, context, and text) + for (IndexableField oldField : document.getFields()) { + newDocument.add(oldField); + } + return new LuceneDocument(newDocument, geoStrategyMapper); + } + + @Override + protected synchronized void addDocument(SearchDocument doc) throws IOException { + getIndexWriter().addDocument(((LuceneDocument) doc).getDocument()); + } + + @Override + protected synchronized void updateDocument(SearchDocument doc) throws IOException { + getIndexWriter().updateDocument(idTerm(doc.getId()), ((LuceneDocument) doc).getDocument()); + } + + @Override + protected synchronized void deleteDocument(SearchDocument doc) throws IOException { + getIndexWriter().deleteDocuments(idTerm(doc.getId())); + } + + @Override + protected synchronized BulkUpdater newBulkUpdate() { + return new SimpleBulkUpdater(this); + } + + private Term idTerm(String id) { + return new Term(SearchFields.ID_FIELD_NAME, id); + } + + /** + * Returns a Document representing the specified document ID (combination of resource and context), or null when no + * such Document exists yet. + */ + private Document getDocument(Term idTerm) throws IOException { + IndexReader reader = getIndexReader(); + List leaves = reader.leaves(); + int size = leaves.size(); + for (int i = 0; i < size; i++) { + LeafReader lreader = leaves.get(i).reader(); + Document document = getDocument(lreader, idTerm); + if (document != null) { + return document; + } + } + // no such Document + return null; + } + + private static Document getDocument(LeafReader reader, Term term) throws IOException { + PostingsEnum docs = reader.postings(term); + if (docs != null) { + int docId = docs.nextDoc(); + // PostingsEnum may contain deleted documents, we have to cope for it + while (docId != PostingsEnum.NO_MORE_DOCS) { + + // if document is deleted, skip and continue + Bits liveDocs = reader.getLiveDocs(); + if (liveDocs != null && !liveDocs.get(docId)) { + docId = docs.nextDoc(); + continue; + } + if (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) { + throw new IllegalStateException("Multiple Documents for term " + term.text()); + } + return readDocument(reader, docId, null); + } + } + return null; + } + + /** + * Returns a list of Documents representing the specified Resource (empty when no such Document exists yet). Each + * document represent a set of statements with the specified Resource as a subject, which are stored in a specific + * context + */ + private List getDocuments(Term uriTerm) throws IOException { + List result = new ArrayList<>(); + + IndexReader reader = getIndexReader(); + List leaves = reader.leaves(); + int size = leaves.size(); + for (int i = 0; i < size; i++) { + LeafReader lreader = leaves.get(i).reader(); + addDocuments(lreader, uriTerm, result); + } + + return result; + } + + private static void addDocuments(LeafReader reader, Term term, Collection documents) throws IOException { + PostingsEnum docs = reader.postings(term); + if (docs != null) { + int docId; + while ((docId = docs.nextDoc()) != PostingsEnum.NO_MORE_DOCS) { + Bits liveDocs = reader.getLiveDocs(); + // Maybe some of the docs have been deleted! Check that too.. + if (liveDocs != null && !liveDocs.get(docId)) { + continue; + } + Document document = readDocument(reader, docId, null); + documents.add(document); + } + } + } + + /** + * Returns a Document representing the specified Resource and Context combination, or null when no such Document + * exists yet. + * + * @param subject + * @param context + * @return document + * @throws IOException + */ + public synchronized Document getDocument(Resource subject, Resource context) throws IOException { + // fetch the Document representing this Resource + String resourceId = SearchFields.getResourceID(subject); + String contextId = SearchFields.getContextID(context); + Term idTerm = new Term(SearchFields.ID_FIELD_NAME, SearchFields.formIdString(resourceId, contextId)); + return getDocument(idTerm); + } + + /** + * Returns a list of Documents representing the specified Resource (empty when no such Document exists yet).Each + * document represent a set of statements with the specified Resource as a subject, which are stored in a specific + * context + * + * @param subject + * @return list of documents + * @throws IOException + */ + public synchronized List getDocuments(Resource subject) throws IOException { + String resourceId = SearchFields.getResourceID(subject); + Term uriTerm = new Term(SearchFields.URI_FIELD_NAME, resourceId); + return getDocuments(uriTerm); + } + + /** + * Stores and indexes an ID in a Document. + * + * @param id + * @param document + */ + public static void addIDField(String id, Document document) { + document.add(new StringField(SearchFields.ID_FIELD_NAME, id, Store.YES)); + } + + /** + * Add the "context" value to the doc + * + * @param context the context or null, if null-context + * @param document the document + */ + public static void addContextField(String context, Document document) { + if (context != null) { + document.add(new StringField(SearchFields.CONTEXT_FIELD_NAME, context, Store.YES)); + } + } + + /** + * Stores and indexes the resource ID in a Document. + * + * @param resourceId + * @param document + */ + public static void addResourceField(String resourceId, Document document) { + document.add(new StringField(SearchFields.URI_FIELD_NAME, resourceId, Store.YES)); + } + + public static void addPredicateField(String predicate, String text, Document document) { + // store this predicate + document.add(new TextField(predicate, text, Store.YES)); + } + + public static void addStoredOnlyPredicateField(String predicate, String text, Document document) { + // store this predicate + document.add(new StoredField(predicate, text)); + } + + public static void addTextField(String text, Document document) { + // and in TEXT_FIELD_NAME + document.add(new TextField(SearchFields.TEXT_FIELD_NAME, text, Store.YES)); + } + + /** + * invalidate readers, free them if possible (readers that are still open by a {@link LuceneQueryConnection} will + * not be closed. Synchronized on oldmonitors because it manipulates them + * + * @throws IOException + */ + private void invalidateReaders() throws IOException { + synchronized (oldmonitors) { + // Move current monitor to old monitors and set null + if (currentMonitor != null) { + // we do NOT close it directly as it may be used by an open + // result + // iterator, hence moving it to the + // list of oldmonitors where it is handled as other older + // monitors + oldmonitors.add(currentMonitor); + } + currentMonitor = null; + + // close all monitors if possible + for (Iterator i = oldmonitors.iterator(); i.hasNext();) { + AbstractReaderMonitor monitor = i.next(); + if (monitor.closeWhenPossible()) { + i.remove(); + } + } + + // check if all readers were closed + if (oldmonitors.isEmpty()) { + logger.debug("Deleting unused files from Lucene index"); + + // clean up unused files (marked as 'deletable' in Luke + // Filewalker) + getIndexWriter().deleteUnusedFiles(); + + // logIndexStats(); + } + } + } + + @SuppressWarnings("unused") + private void logIndexStats() { + try { + IndexReader reader; + try { + reader = getIndexReader(); + + Document doc; + int totalFields = 0; + + Set ids = new HashSet<>(); + String[] idArray; + int count = 0; + for (int i = 0; i < reader.maxDoc(); i++) { + if (isDeleted(reader, i)) { + continue; + } + doc = readDocument(reader, i, null); + totalFields += doc.getFields().size(); + count++; + idArray = doc.getValues("id"); + for (String id : idArray) { + ids.add(id); + } + + } + + logger.info("Total documents in the index: " + reader.numDocs() + + ", number of deletable documents in the index: " + reader.numDeletedDocs() + + ", valid documents: " + count + ", total fields in all documents: " + totalFields + + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs()); + logger.info("Distinct ids in the index: " + ids.size()); + + } finally { + ReaderMonitor toCloseCurrentMonitor = currentMonitor; + currentMonitor = null; + if (toCloseCurrentMonitor != null) { + toCloseCurrentMonitor.closeWhenPossible(); + } + } + } catch (IOException e) { + logger.warn(e.getMessage(), e); + } + + } + + @Override + public synchronized void begin() throws IOException { + // nothing to do + } + + /** + * Commits any changes done to the LuceneIndex since the last commit. The semantics is synchronous to + * SailConnection.commit(), i.e. the LuceneIndex should be committed/rolled back whenever the LuceneSailConnection + * is committed/rolled back. + */ + @Override + public synchronized void commit() throws IOException { + getIndexWriter().commit(); + // the old IndexReaders/Searchers are not outdated + invalidateReaders(); + } + + @Override + public synchronized void rollback() throws IOException { + getIndexWriter().rollback(); + } + + // //////////////////////////////// Methods for querying the index + + /** + * Parse the passed query. + * + * @param subject subject + * @param spec spec + * @return the parsed query + * @throws MalformedQueryException when the parsing breaks + * @throws IOException + */ + @Override + protected Iterable query(Resource subject, QuerySpec spec) + throws MalformedQueryException, IOException { + Query q; + try { + q = createQuery(spec.getQueryPatterns()); + } catch (ParseException e) { + throw new MalformedQueryException(e); + } + + if (q == null) { + return EmptyIterator::new; + } + + final Highlighter highlighter; + if (spec.isHighlight()) { + Formatter formatter = new SimpleHTMLFormatter(SearchFields.HIGHLIGHTER_PRE_TAG, + SearchFields.HIGHLIGHTER_POST_TAG); + highlighter = new Highlighter(formatter, new QueryScorer(q)); + } else { + highlighter = null; + } + + TopDocs docs; + if (subject != null) { + docs = search(subject, q); + } else { + docs = search(q); + } + return Iterables.transform(Arrays.asList(docs.scoreDocs), + (ScoreDoc doc) -> new LuceneDocumentScore(doc, highlighter, LuceneIndex.this)); + } + + /** + * create a query from the params + * + * @param queryPatterns the params + * @return boolean query for multiple params, query for single param, null for empty collection + * @throws ParseException query parsing exception + */ + private Query createQuery(Collection queryPatterns) throws ParseException { + Iterator it = queryPatterns.iterator(); + + if (!it.hasNext()) { + return null; + } + + QuerySpec.QueryParam first = it.next(); + + Query q = getQueryParser(first.getProperty()).parse(first.getQuery()); + if (!it.hasNext()) { + return q; + } + + BooleanQuery.Builder bld = new BooleanQuery.Builder(); + if (first.getBoost() != null) { + q = new BoostQuery(q, first.getBoost()); + } + bld.add(q, Occur.SHOULD); + do { + QuerySpec.QueryParam param = it.next(); + Query parsedQuery = getQueryParser(param.getProperty()).parse(param.getQuery()); + if (param.getBoost() != null) { + parsedQuery = new BoostQuery(parsedQuery, param.getBoost()); + } + bld.add(parsedQuery, Occur.SHOULD); + } while (it.hasNext()); + + return bld.build(); + } + + @Override + protected Iterable geoQuery(final IRI geoProperty, Point p, final IRI units, + double distance, String distanceVar, Var contextVar) throws MalformedQueryException, IOException { + double degs = GeoUnits.toDegrees(distance, units); + final String geoField = SearchFields.getPropertyField(geoProperty); + SpatialContext context = SpatialContext.GEO; + final Shape boundingCircle = context.getShapeFactory().circle(p, degs); + + // use LatLonPoint for distance query after indexing it with the same data structure + + Query q = LatLonPoint.newDistanceQuery(POINT_FIELD_PREFIX + geoField, p.getY(), p.getX(), distance); + if (contextVar != null) { + q = addContextTerm(q, (Resource) contextVar.getValue()); + } + + TopDocs docs = search(q); + final boolean requireContext = (contextVar != null && !contextVar.hasValue()); + return Iterables.transform(Arrays.asList(docs.scoreDocs), + (ScoreDoc doc) -> new LuceneDocumentDistance(doc, geoField, units, boundingCircle.getCenter(), + requireContext, + LuceneIndex.this)); + } + + private Query addContextTerm(Query q, Resource ctx) { + BooleanQuery.Builder combinedQuery = new BooleanQuery.Builder(); + TermQuery idQuery = new TermQuery(new Term(SearchFields.CONTEXT_FIELD_NAME, SearchFields.getContextID(ctx))); + // the specified named graph or not the unnamed graph + combinedQuery.add(idQuery, ctx != null ? Occur.MUST : Occur.MUST_NOT); + combinedQuery.add(q, Occur.MUST); + return combinedQuery.build(); + } + + @Override + protected Iterable geoRelationQuery(String relation, IRI geoProperty, String wkt, + Var contextVar) throws MalformedQueryException, IOException { + + Object shape = null; + try { + shape = super.parseLuceneQueryShape(SearchFields.getPropertyField(geoProperty), wkt); + } catch (java.text.ParseException e) { + logger.error("error while parsing wkt geometry", e); + } + SpatialOperation op = toSpatialOp(relation); + if (op == null) { + return null; + } + final String geoField = SearchFields.getPropertyField(geoProperty); + + // Use the new indexing algorithm from lucene (LatLonShape) + Query q = makeQuery(op, GEO_FIELD_PREFIX + geoField, shape); + if (contextVar != null) { + q = addContextTerm(q, (Resource) contextVar.getValue()); + } + + TopDocs docs = search(q); + final Set fields = Sets.newHashSet(SearchFields.URI_FIELD_NAME, geoField); + if (contextVar != null && !contextVar.hasValue()) { + fields.add(SearchFields.CONTEXT_FIELD_NAME); + } + return Iterables.transform(Arrays.asList(docs.scoreDocs), + (ScoreDoc doc) -> new LuceneDocumentResult(doc, LuceneIndex.this, fields)); + } + + private ShapeField.QueryRelation getRelation(SpatialOperation op) { + switch (op.toString()) { + case "Contains": + return ShapeField.QueryRelation.INTERSECTS; + case "Within": + return ShapeField.QueryRelation.WITHIN; + case "Disjoint": + return ShapeField.QueryRelation.DISJOINT; + default: + throw new IllegalArgumentException("The geo function [" + op.toString() + "] is not supported"); + } + } + + private Query makeQuery(SpatialOperation op, String geoField, Object shape) { + Query q = null; + ShapeField.QueryRelation relation = getRelation(op); + if (shape instanceof double[]) { + double[] point = (double[]) shape; + q = LatLonShape.newBoxQuery(geoField, relation, point[1], point[1], point[0], point[0]); + } else if (shape instanceof Polygon) { + q = LatLonShape.newPolygonQuery(geoField, relation, (Polygon) shape); + } else if (shape instanceof Polygon[]) { + q = LatLonShape.newPolygonQuery(geoField, relation, (Polygon[]) shape); + } else if (shape instanceof Line) { + q = LatLonShape.newLineQuery(geoField, relation, (Line) shape); + } else if (shape instanceof Line[]) { + q = LatLonShape.newLineQuery(geoField, relation, (Line[]) shape); + } else if (shape instanceof Rectangle) { + Rectangle box = (Rectangle) shape; + q = LatLonShape.newBoxQuery(geoField, relation, box.minLat, box.minLon, box.maxLat, box.maxLon); + } else if (shape instanceof Rectangle[]) { + Rectangle box = ((Rectangle[]) shape)[0]; + q = LatLonShape.newBoxQuery(geoField, relation, box.minLat, box.minLon, box.maxLat, box.maxLon); + } + return q; + } + + private SpatialOperation toSpatialOp(String relation) { + if (GEOF.SF_INTERSECTS.stringValue().equals(relation)) { + return SpatialOperation.Intersects; + } else if (GEOF.SF_DISJOINT.stringValue().equals(relation)) { + return SpatialOperation.IsDisjointTo; + } else if (GEOF.SF_EQUALS.stringValue().equals(relation)) { + return SpatialOperation.IsEqualTo; + } else if (GEOF.SF_OVERLAPS.stringValue().equals(relation)) { + return SpatialOperation.Overlaps; + } else if (GEOF.EH_COVERED_BY.stringValue().equals(relation)) { + return SpatialOperation.IsWithin; + } else if (GEOF.EH_COVERS.stringValue().equals(relation)) { + return SpatialOperation.Contains; + } else if (GEOF.SF_WITHIN.stringValue().equals(relation)) { + return SpatialOperation.IsWithin; + } else if (GEOF.EH_CONTAINS.stringValue().equals(relation)) { + return SpatialOperation.Contains; + } + return null; + } + + /** + * Returns the lucene hit with the given id of the respective lucene query + * + * @param docId the id of the document to return + * @param fieldsToLoad + * @return the requested hit, or null if it fails + */ + public synchronized Document getDocument(int docId, Set fieldsToLoad) { + try { + return readDocument(getIndexReader(), docId, fieldsToLoad); + } catch (CorruptIndexException e) { + logger.error("The index seems to be corrupted:", e); + return null; + } catch (IOException e) { + logger.error("Could not read from index:", e); + return null; + } + } + + public synchronized String getSnippet(String fieldName, String text, Highlighter highlighter) { + String snippet; + try { + TokenStream tokenStream = getAnalyzer().tokenStream(fieldName, new StringReader(text)); + snippet = highlighter.getBestFragments(tokenStream, text, 2, "..."); + } catch (Exception e) { + logger.error("Exception while getting snippet for field " + fieldName, e); + snippet = null; + } + return snippet; + } + + /** + * Evaluates the given query only for the given resource. + * + * @param resource + * @param query + * @return top documents + * @throws IOException + */ + public synchronized TopDocs search(Resource resource, Query query) throws IOException { + // rewrite the query + TermQuery idQuery = new TermQuery(new Term(SearchFields.URI_FIELD_NAME, SearchFields.getResourceID(resource))); + BooleanQuery.Builder combinedQuery = new BooleanQuery.Builder(); + combinedQuery.add(idQuery, Occur.MUST); + combinedQuery.add(query, Occur.MUST); + return search(combinedQuery.build()); + } + + /** + * Evaluates the given query and returns the results as a TopDocs instance. + * + * @param query + * @return top documents + * @throws IOException + */ + public synchronized TopDocs search(Query query) throws IOException { + int nDocs; + if (maxDocs > 0) { + nDocs = maxDocs; + } else { + nDocs = Math.max(getIndexReader().numDocs(), 1); + } + return getIndexSearcher().search(query, nDocs); + } + + private QueryParser getQueryParser(IRI propertyURI) { + String fieldName; + // check out which query parser to use, based on the given property URI + if (propertyURI == null) { + // if we have no property given, we create a default query parser which has the TEXT_FIELD_NAME as the + // default field + fieldName = SearchFields.TEXT_FIELD_NAME; + } else { + // otherwise we create a query parser that has the given property as the default field + fieldName = SearchFields.getPropertyField(propertyURI); + } + + QueryParser queryParser = new QueryParser(fieldName, queryAnalyzer); + queryParser.setFuzzyPrefixLength(fuzzyPrefixLength); + return queryParser; + } + + /** + * @param contexts + * @throws IOException + */ + @Override + public synchronized void clearContexts(Resource... contexts) throws IOException { + logger.debug("deleting contexts: {}", Arrays.toString(contexts)); + // these resources have to be read from the underlying rdf store + // and their triples have to be added to the luceneindex after deletion + // of + // documents + + // remove all contexts passed + for (Resource context : contexts) { + // attention: context can be NULL! + String contextString = SearchFields.getContextID(context); + Term contextTerm = new Term(SearchFields.CONTEXT_FIELD_NAME, contextString); + + // now delete all documents from the deleted context + getIndexWriter().deleteDocuments(contextTerm); + } + } + + /** + * + */ + @Override + public synchronized void clear() throws IOException { + if (closed.get()) { + throw new SailException("Index has been closed"); + } + // clear + // the old IndexReaders/Searchers are not outdated + invalidateReaders(); + if (indexWriter != null) { + indexWriter.close(); + } + + // crate new writer + IndexWriterConfig indexWriterConfig = getIndexWriterConfig(); + indexWriterConfig.setOpenMode(OpenMode.CREATE); + indexWriter = new IndexWriter(directory, indexWriterConfig); + indexWriter.close(); + indexWriter = null; + + } + + // + // Lucene helper methods + // + + /** + * Method produces {@link IndexWriterConfig} using settings. + * + * @return + */ + private IndexWriterConfig getIndexWriterConfig() { + IndexWriterConfig cnf = new IndexWriterConfig(analyzer); + cnf.setSimilarity(similarity); + return cnf; + } + + private static boolean isDeleted(IndexReader reader, int docId) { + if (reader.hasDeletions()) { + List leaves = reader.leaves(); + int size = leaves.size(); + for (int i = 0; i < size; i++) { + Bits liveDocs = leaves.get(i).reader().getLiveDocs(); + if (docId < liveDocs.length()) { + boolean isDeleted = !liveDocs.get(docId); + if (isDeleted) { + return true; + } + } + } + return false; + } else { + return false; + } + } + + private static Document readDocument(IndexReader reader, int docId, Set fieldsToLoad) throws IOException { + DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad); + reader.document(docId, visitor); + return visitor.getDocument(); + } + + static class DocumentStoredFieldVisitor extends StoredFieldVisitor { + + private final Set fieldsToLoad; + + private final Document document = new Document(); + + DocumentStoredFieldVisitor(Set fieldsToLoad) { + this.fieldsToLoad = fieldsToLoad; + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + return (fieldsToLoad == null || fieldsToLoad.contains(fieldInfo.name)) ? Status.YES : Status.NO; + } + + @Override + public void stringField(FieldInfo fieldInfo, String stringValue) throws IOException { + String name = fieldInfo.name; + if (SearchFields.ID_FIELD_NAME.equals(name)) { + addIDField(stringValue, document); + } else if (SearchFields.CONTEXT_FIELD_NAME.equals(name)) { + addContextField(stringValue, document); + } else if (SearchFields.URI_FIELD_NAME.equals(name)) { + addResourceField(stringValue, document); + } else if (SearchFields.TEXT_FIELD_NAME.equals(name)) { + addTextField(stringValue, document); + } else { + addPredicateField(name, stringValue, document); + } + } + + Document getDocument() { + return document; + } + } +} diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexNIOFS.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexNIOFS.java new file mode 100644 index 00000000000..01b387bead3 --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexNIOFS.java @@ -0,0 +1,38 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import java.io.IOException; +import java.nio.file.Paths; +import java.util.Properties; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.NIOFSDirectory; +import org.eclipse.rdf4j.sail.lucene.LuceneSail; + +/** + * LuceneIndex which uses a NIOFSDirectory instead of MMapDirectory to avoid the JVM crash (see + * http:// + * stackoverflow.com/questions/8224843/jvm-crashes-on-lucene-datainput- readvint). + * + * @author andriy.nikolov + */ +public class LuceneIndexNIOFS extends LuceneIndex { + + @Override + protected Directory createDirectory(Properties parameters) throws IOException { + if (parameters.containsKey(LuceneSail.LUCENE_DIR_KEY)) { + return new NIOFSDirectory(Paths.get(parameters.getProperty(LuceneSail.LUCENE_DIR_KEY))); + } else { + return super.createDirectory(parameters); + } + } +} diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneQuery.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneQuery.java new file mode 100644 index 00000000000..bc49a6549dd --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneQuery.java @@ -0,0 +1,75 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.sail.lucene.DocumentScore; +import org.eclipse.rdf4j.sail.lucene.SearchFields; +import org.eclipse.rdf4j.sail.lucene.SearchQuery; + +import com.google.common.base.Function; +import com.google.common.collect.Iterables; + +/** + * To be removed, no longer used. + */ +@Deprecated +public class LuceneQuery implements SearchQuery { + + private final Query query; + + private final LuceneIndex index; + + private Highlighter highlighter; + + @Deprecated + public LuceneQuery(Query q, LuceneIndex index) { + this.query = q; + this.index = index; + } + + @Override + @Deprecated + public Iterable query(Resource resource) throws IOException { + TopDocs docs; + if (resource != null) { + docs = index.search(resource, query); + } else { + docs = index.search(query); + } + return Iterables.transform(Arrays.asList(docs.scoreDocs), new Function<>() { + + @Override + public DocumentScore apply(ScoreDoc doc) { + return new LuceneDocumentScore(doc, highlighter, index); + } + }); + } + + @Override + @Deprecated + public void highlight(IRI property) { + Formatter formatter = new SimpleHTMLFormatter(SearchFields.HIGHLIGHTER_PRE_TAG, + SearchFields.HIGHLIGHTER_POST_TAG); + highlighter = new Highlighter(formatter, new QueryScorer(query)); + } +} diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/ReaderMonitor.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/ReaderMonitor.java new file mode 100644 index 00000000000..cdfac5254e5 --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/ReaderMonitor.java @@ -0,0 +1,75 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import java.io.IOException; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.eclipse.rdf4j.sail.lucene.AbstractReaderMonitor; + +/** + * ReaderMonitor holds IndexReader and IndexSearcher. When ReaderMonitor is closed it do not close IndexReader and + * IndexSearcher as long as someone reads from them. Variable readingCount remember how many times it was read. + * + * @author Tomasz Trela, DFKI Gmbh + */ +public class ReaderMonitor extends AbstractReaderMonitor { + + /** + * The IndexSearcher that can be used to query the current index' contents. + */ + private IndexSearcher indexSearcher; + + private IOException indexSearcherCreateException; + + /** + * If exception occur when create indexReader it will be thrown on getIndexReader or get IndexSearcher + * + * @param index + * @param directory Initializes IndexReader + */ + public ReaderMonitor(final LuceneIndex index, Directory directory) { + super(index); + try { + IndexReader indexReader = DirectoryReader.open(directory); + indexSearcher = new IndexSearcher(indexReader); + } catch (IOException e) { + indexSearcherCreateException = e; + } + } + + /** + * @throws IOException + */ + @Override + protected void handleClose() throws IOException { + try { + if (indexSearcher != null) { + indexSearcher.getIndexReader().close(); + } + } finally { + indexSearcher = null; + } + } + + // //////////////////////////////Methods for controlled index access + + protected IndexSearcher getIndexSearcher() throws IOException { + if (indexSearcherCreateException != null) { + throw indexSearcherCreateException; + } + return indexSearcher; + } + +} diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/config/LuceneSailConfig.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/config/LuceneSailConfig.java new file mode 100644 index 00000000000..5f97b014fab --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/config/LuceneSailConfig.java @@ -0,0 +1,33 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl.config; + +import org.eclipse.rdf4j.sail.config.SailImplConfig; +import org.eclipse.rdf4j.sail.lucene.config.AbstractLuceneSailConfig; + +public class LuceneSailConfig extends AbstractLuceneSailConfig { + + public LuceneSailConfig() { + super(LuceneSailFactory.SAIL_TYPE); + } + + public LuceneSailConfig(SailImplConfig delegate) { + super(LuceneSailFactory.SAIL_TYPE, delegate); + } + + public LuceneSailConfig(String luceneDir) { + super(LuceneSailFactory.SAIL_TYPE, luceneDir); + } + + public LuceneSailConfig(String luceneDir, SailImplConfig delegate) { + super(LuceneSailFactory.SAIL_TYPE, luceneDir, delegate); + } +} diff --git a/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/config/LuceneSailFactory.java b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/config/LuceneSailFactory.java new file mode 100644 index 00000000000..c431785475d --- /dev/null +++ b/core/sail/lucene-v9/src/main/java/org/eclipse/rdf4j/sail/lucene/impl/config/LuceneSailFactory.java @@ -0,0 +1,65 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl.config; + +import org.eclipse.rdf4j.sail.Sail; +import org.eclipse.rdf4j.sail.config.SailConfigException; +import org.eclipse.rdf4j.sail.config.SailFactory; +import org.eclipse.rdf4j.sail.config.SailImplConfig; +import org.eclipse.rdf4j.sail.lucene.LuceneSail; +import org.eclipse.rdf4j.sail.lucene.config.AbstractLuceneSailConfig; +import org.eclipse.rdf4j.sail.lucene.impl.LuceneIndex; + +/** + * A {@link SailFactory} that creates {@link LuceneSail}s based on RDF configuration data. + */ +public class LuceneSailFactory implements SailFactory { + + /** + * The type of repositories that are created by this factory. + * + * @see SailFactory#getSailType() + */ + public static final String SAIL_TYPE = "openrdf:LuceneSail"; + + /** + * Returns the Sail's type: openrdf:LuceneSail. + */ + @Override + public String getSailType() { + return SAIL_TYPE; + } + + @Override + public SailImplConfig getConfig() { + return new LuceneSailConfig(); + } + + @Override + public Sail getSail(SailImplConfig config) throws SailConfigException { + if (!SAIL_TYPE.equals(config.getType())) { + throw new SailConfigException("Invalid Sail type: " + config.getType()); + } + + LuceneSail luceneSail = new LuceneSail(); + luceneSail.setParameter(LuceneSail.INDEX_CLASS_KEY, LuceneIndex.class.getName()); + + if (config instanceof AbstractLuceneSailConfig) { + AbstractLuceneSailConfig luceneConfig = (AbstractLuceneSailConfig) config; + luceneSail.setParameter(LuceneSail.LUCENE_DIR_KEY, luceneConfig.getIndexDir()); + for (String key : luceneConfig.getParameterNames()) { + luceneSail.setParameter(key, luceneConfig.getParameter(key)); + } + } + + return luceneSail; + } +} diff --git a/core/sail/lucene-v9/src/main/resources/META-INF/services/org.eclipse.rdf4j.sail.config.SailFactory b/core/sail/lucene-v9/src/main/resources/META-INF/services/org.eclipse.rdf4j.sail.config.SailFactory new file mode 100644 index 00000000000..ba0b5f2dcaa --- /dev/null +++ b/core/sail/lucene-v9/src/main/resources/META-INF/services/org.eclipse.rdf4j.sail.config.SailFactory @@ -0,0 +1 @@ +org.eclipse.rdf4j.sail.lucene.impl.config.LuceneSailFactory diff --git a/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/LuceneFuzzinessPrefixTest.java b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/LuceneFuzzinessPrefixTest.java new file mode 100644 index 00000000000..753c8e73d32 --- /dev/null +++ b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/LuceneFuzzinessPrefixTest.java @@ -0,0 +1,142 @@ +/******************************************************************************* + * Copyright (c) 2022 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.eclipse.rdf4j.query.QueryLanguage.SPARQL; +import static org.eclipse.rdf4j.sail.lucene.LuceneSail.FUZZY_PREFIX_LENGTH_KEY; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.TupleQuery; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.memory.MemoryStore; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class LuceneFuzzinessPrefixTest { + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + private static final String NAMESPACE = "http://example.org/"; + private static final String PREFIXES = joinLines( + "PREFIX search: ", + "PREFIX rdfs: ", + "PREFIX ex: <" + NAMESPACE + ">"); + + private static IRI iri(String name) { + return VF.createIRI(NAMESPACE + name); + } + + private static String joinLines(String... lines) { + return String.join(" \n", lines); + } + + private LuceneSail sail; + private MemoryStore memoryStore; + private SailRepository repository; + @TempDir + private File dataDir; + + @BeforeEach + public void setup() { + memoryStore = new MemoryStore(); + sail = new LuceneSail(); + sail.setParameter(LuceneSail.LUCENE_DIR_KEY, "lucene-index"); + sail.setParameter(LuceneSail.INDEX_CLASS_KEY, LuceneSail.DEFAULT_INDEX_CLASS); + } + + private void initSail() { + sail.setBaseSail(memoryStore); + repository = new SailRepository(sail); + repository.setDataDir(dataDir); + repository.init(); + + add( + VF.createStatement(iri("element1"), iri("text"), VF.createLiteral("eclipse")), + VF.createStatement(iri("element2"), iri("text"), VF.createLiteral("foundation")), + VF.createStatement(iri("element3"), iri("text"), VF.createLiteral("ide")) + ); + } + + private void add(Statement... statements) { + try (SailRepositoryConnection connection = repository.getConnection()) { + for (Statement stmt : statements) { + connection.add(stmt); + } + } + } + + @Test + public void testFuzzinessPrefixLength_default() { + // Arrange + initSail(); + + // Act + List results = executeQuery(); + + // Assert + assertThat(results).containsExactlyInAnyOrder("element1"); + } + + @Test + public void testFuzzinessPrefixLength_custom() { + // Arrange + sail.setParameter(FUZZY_PREFIX_LENGTH_KEY, "1"); + initSail(); + + // Act + List results = executeQuery(); + + // Assert + assertThat(results).containsExactlyInAnyOrder("element1"); + } + + @Test + public void testFuzzinessPrefixLength_custom_shouldExcludeResult() { + // Arrange + sail.setParameter(FUZZY_PREFIX_LENGTH_KEY, "2"); + initSail(); + + // Act + List results = executeQuery(); + + // Assert + assertThat(results).isEmpty(); + } + + private List executeQuery() { + List results = new ArrayList<>(); + try (SailRepositoryConnection connection = repository.getConnection()) { + TupleQuery query = connection.prepareTupleQuery(SPARQL, PREFIXES + "\n" + joinLines( + "SELECT ?result {", + " ?result search:matches ?match .", + " ?match search:query 'eXlipse~1' .", + "}")); + + try (TupleQueryResult result = query.evaluate()) { + for (BindingSet set : result) { + String element = set.getValue("result").stringValue().substring(NAMESPACE.length()); + results.add(element); + } + } + } + return results; + } +} diff --git a/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/LuceneIndexIdFilteringTest.java b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/LuceneIndexIdFilteringTest.java new file mode 100644 index 00000000000..ce8a2d5bc0e --- /dev/null +++ b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/LuceneIndexIdFilteringTest.java @@ -0,0 +1,437 @@ +/******************************************************************************* + * Copyright (c) 2022 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.QueryLanguage; +import org.eclipse.rdf4j.query.TupleQuery; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.evaluation.TupleFunctionEvaluationMode; +import org.eclipse.rdf4j.sail.memory.MemoryStore; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Lists; + +public class LuceneIndexIdFilteringTest { + + private static final Logger LOG = LoggerFactory.getLogger(LuceneIndexIdFilteringTest.class); + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + private static final String NAMESPACE = "http://example.org/"; + private static final String PREFIXES = joinLines( + "PREFIX search: ", + "PREFIX rdfs: ", + "PREFIX ex: <" + NAMESPACE + ">"); + + private static IRI iri(String name) { + return VF.createIRI(NAMESPACE + name); + } + + private static String joinLines(String... lines) { + return String.join(" \n", lines); + } + + LuceneSail sailType1, sailType2, sailType3; + SailRepository repository; + + @BeforeEach + public void setup(@TempDir File dataDir) { + // sails schema + // sailType1(LuceneSail) -> sailType2(LuceneSail) -> sailType3(LuceneSail) -> memoryStore(MemoryStore) + + MemoryStore memoryStore = new MemoryStore(); + + // sail with the ex:text3 filter + sailType3 = new LuceneSail(); + sailType3.setParameter(LuceneSail.INDEXEDFIELDS, "index.1=" + NAMESPACE + "text3"); + sailType3.setParameter(LuceneSail.LUCENE_DIR_KEY, "lucene-index3"); + sailType3.setParameter(LuceneSail.INDEX_CLASS_KEY, LuceneSail.DEFAULT_INDEX_CLASS); + sailType3.setEvaluationMode(TupleFunctionEvaluationMode.TRIPLE_SOURCE); + sailType3.setBaseSail(memoryStore); + + // sail with the ex:text2 filter + sailType2 = new LuceneSail(); + sailType2.setParameter(LuceneSail.INDEXEDFIELDS, "index.1=" + NAMESPACE + "text2"); + sailType2.setParameter(LuceneSail.LUCENE_DIR_KEY, "lucene-index2"); + sailType2.setParameter(LuceneSail.INDEX_CLASS_KEY, LuceneSail.DEFAULT_INDEX_CLASS); + sailType2.setEvaluationMode(TupleFunctionEvaluationMode.NATIVE); + sailType2.setBaseSail(sailType3); + + // sail with the ex:text1 filter + sailType1 = new LuceneSail(); + sailType1.setParameter(LuceneSail.INDEXEDFIELDS, "index.1=" + NAMESPACE + "text1"); + sailType1.setParameter(LuceneSail.LUCENE_DIR_KEY, "lucene-index1"); + sailType1.setParameter(LuceneSail.INDEX_CLASS_KEY, LuceneSail.DEFAULT_INDEX_CLASS); + sailType1.setEvaluationMode(TupleFunctionEvaluationMode.NATIVE); + sailType1.setBaseSail(sailType2); + sailType1.setDataDir(dataDir); + } + + private void initSails() { + repository = new SailRepository(sailType1); + repository.init(); + + // add test elements + add( + VF.createStatement(iri("element1"), iri("text1"), VF.createLiteral("text")), + VF.createStatement(iri("element2"), iri("text1"), VF.createLiteral("text")), + VF.createStatement(iri("element2"), iri("text2"), VF.createLiteral("text")), + VF.createStatement(iri("element3"), iri("text3"), VF.createLiteral("text")) + ); + } + + private void add(Statement... statements) { + try (SailRepositoryConnection connection = repository.getConnection()) { + for (Statement stmt : statements) { + connection.add(stmt); + } + } + } + + private void assertSearchQuery(String queryStr, String... exceptedElements) { + // using a list for duplicates + List exceptedDocSet = Lists.newArrayList(exceptedElements); + + try (SailRepositoryConnection connection = repository.getConnection()) { + // fire a query with the subject pre-specified + TupleQuery query = connection.prepareTupleQuery(QueryLanguage.SPARQL, PREFIXES + "\n" + queryStr); + try (TupleQueryResult result = query.evaluate()) { + while (result.hasNext()) { + BindingSet set = result.next(); + String element = set.getValue("result").stringValue().substring(NAMESPACE.length()); + if (!exceptedDocSet.remove(element)) { + LOG.error("Docs: " + exceptedDocSet); + LOG.error("Remaining:"); + while (result.hasNext()) { + set = result.next(); + LOG.error("- {}", set.getValue("result").stringValue().substring(NAMESPACE.length())); + } + fail("The element '" + element + "' was in the index, but wasn't excepted"); + } + } + } + + if (!exceptedDocSet.isEmpty()) { + fail("Unexpected docs: " + exceptedDocSet); + } + } + } + + @Test + public void noConfigTest() { + // no config + initSails(); + + assertSearchQuery(joinLines( + "SELECT ?result {", + " ?result search:matches ?match .", + " ?match search:query 'text' .", + "}" + ), "element1", "element2"); + } + + @Test + public void idConfigTest() { + sailType1.setParameter(LuceneSail.INDEX_ID, NAMESPACE + "lucene1"); + sailType2.setParameter(LuceneSail.INDEX_ID, NAMESPACE + "lucene2"); + sailType3.setParameter(LuceneSail.INDEX_ID, NAMESPACE + "lucene3"); + initSails(); + + // try query on index 1 + assertSearchQuery(joinLines( + "SELECT ?result {", + " ?result search:matches ?match .", + " ?match search:indexid ex:lucene1 .", + " ?match search:query 'text' .", + "}" + ), "element1", "element2"); + + // try query on index 2 + assertSearchQuery(joinLines( + "SELECT ?result {", + " ?result search:matches ?match .", + " ?match search:indexid ex:lucene2 .", + " ?match search:query 'text' .", + "}" + ), "element2"); + + // try query on index 3 + assertSearchQuery(joinLines( + "SELECT ?result {", + " ?result search:matches ?match .", + " ?match search:indexid ex:lucene3 .", + " ?match search:query 'text' .", + "}" + ), "element3"); + + // try query on index 2 and 3 + assertSearchQuery(joinLines( + "SELECT ?result {", + " {", + " ?result search:matches ?match .", + " ?match search:indexid ex:lucene2 .", + " ?match search:query 'text' .", + " } UNION {", + " ?result search:matches ?match2 .", + " ?match2 search:indexid ex:lucene3 .", + " ?match2 search:query 'text' .", + " }", + "}" + ), "element2", "element3"); + + // try query on index 1 and 2 + assertSearchQuery(joinLines( + "SELECT ?result {", + " {", + " ?result search:matches ?match .", + " ?match search:indexid ex:lucene1 .", + " ?match search:query 'text' .", + " } UNION {", + " ?result search:matches ?match2 .", + " ?match2 search:indexid ex:lucene2 .", + " ?match2 search:query 'text' .", + " }", + "}" + ), "element1", "element2", "element2"); + + // try query on index 1, 2 and 3 + assertSearchQuery(joinLines( + "SELECT ?result {", + " {", + " ?result search:matches ?match .", + " ?match search:indexid ex:lucene2 .", + " ?match search:query 'text' .", + " } UNION {", + " ?result search:matches ?match2 .", + " ?match2 search:indexid ex:lucene3 .", + " ?match2 search:query 'text' .", + " } UNION {", + " ?result search:matches ?match3 .", + " ?match3 search:indexid ex:lucene1 .", + " ?match3 search:query 'text' .", + " }", + "}" + ), "element1", "element2", "element2", "element3"); + } + + private void assertSearchQuery(String queryStr, boolean union, QueryElement... exceptedElements) { + // using a list for duplicates + try (SailRepositoryConnection connection = repository.getConnection()) { + // fire a query with the subject pre-specified + TupleQuery query = connection.prepareTupleQuery(QueryLanguage.SPARQL, PREFIXES + "\n" + queryStr); + try (TupleQueryResult result = query.evaluate()) { + while (result.hasNext()) { + BindingSet set = result.next(); + Value elementValue = null; + for (QueryElement el : exceptedElements) { + // union lines shouldn't be containing multiple results + if (union && elementValue != null) { + assertNull(set.getValue(el.resultName), "union test returns multiple results"); + continue; + } + elementValue = set.getValue(el.resultName); + if (elementValue == null) { + continue; + } + + String element = elementValue.stringValue().substring(NAMESPACE.length()); + if (!el.elements.remove(element)) { + LOG.error("Docs: " + el.elements); + LOG.error("Remaining:"); + while (result.hasNext()) { + set = result.next(); + LOG.error("- {}", set); + } + fail("The element '" + element + "' was in the index " + + el.resultName + ", but wasn't excepted"); + } + } + assertNotNull(elementValue, "No element for the set: " + set); + } + } + + List missing = new ArrayList<>(); + + // check for missing elements + for (QueryElement el : exceptedElements) { + if (!el.elements.isEmpty()) { + missing.add(el); + } + } + + if (!missing.isEmpty()) { + fail("Unexpected docs: " + missing); + } + } + } + + private void assertUnionSearchQuery(String queryStr, QueryElement... exceptedElements) { + assertSearchQuery(queryStr, true, exceptedElements); + } + + private void assertJoinSearchQuery(String queryStr, QueryElement... exceptedElements) { + assertSearchQuery(queryStr, false, exceptedElements); + } + + @Test + public void idConfigUnionTest() { + sailType1.setParameter(LuceneSail.INDEX_ID, NAMESPACE + "lucene1"); + sailType2.setParameter(LuceneSail.INDEX_ID, NAMESPACE + "lucene2"); + sailType3.setParameter(LuceneSail.INDEX_ID, NAMESPACE + "lucene3"); + initSails(); + + // try query on index 2 and 3 + assertUnionSearchQuery(joinLines( + "SELECT ?result ?result2 {", + " {", + " ?result search:matches ?match .", + " ?match search:indexid ex:lucene2 .", + " ?match search:query 'text' .", + " } UNION {", + " ?result2 search:matches ?match2 .", + " ?match2 search:indexid ex:lucene3 .", + " ?match2 search:query 'text' .", + " }", + "}" + ), + new QueryElement("result", "element2"), + new QueryElement("result2", "element3")); + + // try query on index 1 and 2 + assertUnionSearchQuery(joinLines( + "SELECT ?result ?result2 {", + " {", + " ?result search:matches ?match .", + " ?match search:indexid ex:lucene1 .", + " ?match search:query 'text' .", + " } UNION {", + " ?result2 search:matches ?match2 .", + " ?match2 search:indexid ex:lucene2 .", + " ?match2 search:query 'text' .", + " }", + "}" + ), + new QueryElement("result", "element1", "element2"), + new QueryElement("result2", "element2")); + + // try query on index 1, 2 and 3 + assertUnionSearchQuery(joinLines( + "SELECT ?result ?result2 ?result3 {", + " {", + " ?result search:matches ?match .", + " ?match search:indexid ex:lucene1 .", + " ?match search:query 'text' .", + " } UNION {", + " ?result2 search:matches ?match2 .", + " ?match2 search:indexid ex:lucene2 .", + " ?match2 search:query 'text' .", + " } UNION {", + " ?result3 search:matches ?match3 .", + " ?match3 search:indexid ex:lucene3 .", + " ?match3 search:query 'text' .", + " }", + "}" + ), + new QueryElement("result", "element1", "element2"), + new QueryElement("result2", "element2"), + new QueryElement("result3", "element3")); + } + + @Test + public void idConfigJoinTest() { + sailType1.setParameter(LuceneSail.INDEX_ID, NAMESPACE + "lucene1"); + sailType2.setParameter(LuceneSail.INDEX_ID, NAMESPACE + "lucene2"); + sailType3.setParameter(LuceneSail.INDEX_ID, NAMESPACE + "lucene3"); + initSails(); + add( + VF.createStatement(iri("element4"), iri("text2"), VF.createLiteral("text")), + VF.createStatement(iri("element5"), iri("text3"), VF.createLiteral("text")), + VF.createStatement(iri("element4"), iri("friend"), iri("element5")) + ); + + // try query on index 2 and 3 + assertJoinSearchQuery(joinLines( + "SELECT ?result1 ?result2 {", + " {", + " ?result1 search:matches ?match .", + " ?match search:indexid ex:lucene2 .", + " ?match search:query 'text' .", + "", + " ?result2 search:matches ?match2 .", + " ?match2 search:indexid ex:lucene3 .", + " ?match2 search:query 'text' .", + " }", + "}" + ), + // twice the elements because we are doing result1 x result2 + new QueryElement("result1", "element2", "element4", "element2", "element4"), + new QueryElement("result2", "element3", "element5", "element3", "element5")); + + // try query on index 2 and 3 with a join on the result + assertJoinSearchQuery(joinLines( + "SELECT ?result1 ?result2 {", + " {", + " ?result1 search:matches ?match .", + " ?match search:indexid ex:lucene2 .", + " ?match search:query 'text' .", + "", + " ?result2 search:matches ?match2 .", + " ?match2 search:indexid ex:lucene3 .", + " ?match2 search:query 'text' .", + "", + " ?result1 ex:friend ?result2 .", + " }", + "}" + ), + new QueryElement("result1", "element4"), + new QueryElement("result2", "element5")); + } + + static class QueryElement { + + List elements; + String resultName; + + /** + * a result element of a query, shouldn't be used twice + * + * @param resultName the result object to use + * @param exceptedElements the excepted element + */ + QueryElement(String resultName, String... exceptedElements) { + this.resultName = resultName; + this.elements = Lists.newArrayList(exceptedElements); + } + + @Override + public String toString() { + return elements + ": " + resultName; + } + } +} diff --git a/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/MultiParamTest.java b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/MultiParamTest.java new file mode 100644 index 00000000000..8397cbe32b3 --- /dev/null +++ b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/MultiParamTest.java @@ -0,0 +1,467 @@ +/******************************************************************************* + * Copyright (c) 2022 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene; + +import static org.eclipse.rdf4j.model.util.Values.literal; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.util.Values; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.evaluation.TupleFunctionEvaluationMode; +import org.eclipse.rdf4j.sail.memory.MemoryStore; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class MultiParamTest { + private static final String NAMESPACE = "http://example.org/"; + private static final String PREFIXES = joinLines( + "PREFIX search: ", + "PREFIX rdfs: ", + "PREFIX ex: <" + NAMESPACE + ">"); + + private static IRI iri(String name) { + return Values.iri(NAMESPACE + name); + } + + private static String joinLines(String... lines) { + return String.join(" \n", lines); + } + + private static final IRI elem1 = iri("elem1"); + private static final IRI elem2 = iri("elem2"); + private static final IRI elem3 = iri("elem3"); + private static final IRI elem4 = iri("elem4"); + private static final IRI elem5 = iri("elem5"); + private static final IRI elem6 = iri("elem6"); + private static final IRI elem7 = iri("elem7"); + + private static final IRI p1 = iri("p1"); + private static final IRI p2 = iri("p2"); + private static final IRI p3 = iri("p3"); + + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + LuceneSail luceneSail; + SailRepository repository; + SailRepositoryConnection conn; + + @Before + public void setup() throws IOException { + MemoryStore memoryStore = new MemoryStore(); + // sail with the ex:text1 filter + luceneSail = new LuceneSail(); + luceneSail.setParameter(LuceneSail.INDEX_CLASS_KEY, LuceneSail.DEFAULT_INDEX_CLASS); + luceneSail.setEvaluationMode(TupleFunctionEvaluationMode.NATIVE); + luceneSail.setBaseSail(memoryStore); + luceneSail.setDataDir(tmpFolder.newFolder()); + repository = new SailRepository(luceneSail); + repository.init(); + + // add test elements + conn = repository.getConnection(); + conn.begin(); + + conn.add(elem1, p1, literal("aaa")); + conn.add(elem1, p2, literal("bbb")); + conn.add(elem1, p3, literal("ccc")); + + conn.add(elem2, p1, literal("aaa")); + conn.add(elem2, p2, literal("ddd")); + conn.add(elem2, p3, literal("ccc")); + + conn.add(elem3, p1, literal("ddd")); + conn.add(elem3, p2, literal("bbb")); + conn.add(elem3, p3, literal("fff")); + + conn.add(elem4, p1, literal("ddd")); + conn.add(elem4, p2, literal("ggg")); + conn.add(elem4, p3, literal("ccc")); + + conn.add(elem5, p1, literal("hhh")); + conn.add(elem5, p2, literal("eee")); + conn.add(elem5, p3, literal("aaa")); + + conn.add(elem6, p1, literal("iii zzz yyy")); + conn.add(elem6, p2, literal("jjj zzz")); + conn.add(elem6, p3, literal("kkk")); + + conn.add(elem7, p1, literal("iii zzz")); + conn.add(elem7, p2, literal("jjj zzz yyy")); + conn.add(elem7, p3, literal("kkk")); + + conn.commit(); + } + + @After + public void complete() { + try { + conn.close(); + } finally { + repository.shutDown(); + } + } + + @Test + public void testPredicateSimple() { + try (TupleQueryResult result = conn.prepareTupleQuery(joinLines( + PREFIXES, + "SELECT * {", + " ?subj search:matches [", + " search:query \"aaa\"", + " ]", + "}" + )).evaluate()) { + Set values = new HashSet<>(Set.of( + elem1.toString(), + elem2.toString(), + elem5.toString() + )); + + while (result.hasNext()) { + Value next = result.next().getValue("subj"); + assertTrue("unknown value: " + next, values.remove(next.toString())); + } + assertTrue("missing value" + values, values.isEmpty()); + } + } + + @Test + public void testPredicateMulti() { + try (TupleQueryResult result = conn.prepareTupleQuery(joinLines( + PREFIXES, + "SELECT * {", + " ?subj search:matches [", + " search:query [ ", + " search:query \"aaa\"", + " ]", + " ]", + "}" + )).evaluate()) { + Set values = new HashSet<>(Set.of( + elem1.toString(), + elem2.toString(), + elem5.toString() + )); + + while (result.hasNext()) { + Value next = result.next().getValue("subj"); + assertTrue("unknown value: " + next, values.remove(next.toString())); + } + assertTrue("missing value" + values, values.isEmpty()); + } + } + + @Test + public void testMultiPredicate() { + try (TupleQueryResult result = conn.prepareTupleQuery(joinLines( + PREFIXES, + "SELECT * {", + " ?subj search:matches [", + " search:query [ ", + " search:query \"aaa\" ;", + " search:property ex:p1", + " ]", + " ]", + "}" + )).evaluate()) { + Set values = new HashSet<>(Set.of( + elem1.toString(), + elem2.toString() + )); + + while (result.hasNext()) { + Value next = result.next().getValue("subj"); + assertTrue("unknown value: " + next, values.remove(next.toString())); + } + assertTrue("missing value" + values, values.isEmpty()); + } + } + + @Test + public void testMultiQuery() { + try (TupleQueryResult result = conn.prepareTupleQuery(joinLines( + PREFIXES, + "SELECT * {", + " ?subj search:matches [", + " search:query", + " [", + " search:query \"aaa\" ;", + " search:property ex:p1", + " ] , [", + " search:query \"bbb\" ;", + " search:property ex:p2", + " ]", + " ]", + "}" + )).evaluate()) { + Set values = new HashSet<>(Set.of( + elem1.toString(), + elem2.toString(), + elem3.toString() + )); + + while (result.hasNext()) { + BindingSet binding = result.next(); + Value next = binding.getValue("subj"); + assertTrue("unknown value: " + next, values.remove(next.toString())); + } + assertTrue("missing value" + values, values.isEmpty()); + } + } + + @Test + public void testMultiSnippetQuery() { + try (TupleQueryResult result = conn.prepareTupleQuery(joinLines( + PREFIXES, + "SELECT * {", + " ?subj search:matches [", + " search:query", + " [", + " search:query \"aaa\" ;", + " search:property ex:p1 ;", + " search:snippet ?sp1 ;", + " ] , [", + " search:query \"bbb\" ;", + " search:property ex:p2 ;", + " search:snippet ?sp2 ;", + " ]", + " ]", + "}" + )).evaluate()) { + Set values = new HashSet<>(Set.of( + elem1 + ":\"aaa\":\"bbb\"", + elem2 + ":\"aaa\":null", + elem3 + ":null:\"bbb\"" + )); + + while (result.hasNext()) { + BindingSet bindings = result.next(); + Value next = bindings.getValue("subj"); + Value snippet1 = bindings.getValue("sp1"); + Value snippet2 = bindings.getValue("sp2"); + String obj = next + ":" + snippet1 + ":" + snippet2; + assertTrue("unknown value: " + obj, values.remove(obj)); + } + assertTrue("missing value" + values, values.isEmpty()); + } + } + + @Test + public void testMultiOrderQuery() { + try (TupleQueryResult result = conn.prepareTupleQuery(joinLines( + PREFIXES, + "SELECT * {", + " ?subj search:matches [", + " search:query", + " [", + " search:query \"iii\" ;", + " search:property ex:p1 ;", + " search:boost 0.2 ;", + " ] , [", + " search:query \"jjj\" ;", + " search:property ex:p2 ;", + " search:boost 0.8 ;", + " ] ;", + " search:score ?score", + " ]", + "}" + )).evaluate()) { + String[] values = new String[] { + elem6.toString(), + elem7.toString() + }; + Iterator it = Arrays.stream(values).iterator(); + + while (result.hasNext()) { + if (!it.hasNext()) { + do { + System.out.println(result.next()); + } while (result.hasNext()); + fail("too many binding"); + } + BindingSet bindings = result.next(); + String exceptedValue = it.next(); + Value next = bindings.getValue("subj"); + assertEquals(exceptedValue, next.toString()); + } + if (it.hasNext()) { + do { + System.out.println(it.next()); + } while (it.hasNext()); + fail(); + } + } + try (TupleQueryResult result = conn.prepareTupleQuery(joinLines( + PREFIXES, + "SELECT * {", + " ?subj search:matches [", + " search:query", + " [", + " search:query \"iii\" ;", + " search:property ex:p1 ;", + " search:boost 0.8 ;", + " ] , [", + " search:query \"jjj\" ;", + " search:property ex:p2 ;", + " search:boost 0.2 ;", + " ] ;", + " search:score ?score", + " ]", + "}" + )).evaluate()) { + String[] values = new String[] { + elem7.toString(), + elem6.toString() + }; + Iterator it = Arrays.stream(values).iterator(); + + while (result.hasNext()) { + if (!it.hasNext()) { + do { + System.out.println(result.next()); + } while (result.hasNext()); + fail("too many binding"); + } + BindingSet bindings = result.next(); + String exceptedValue = it.next(); + Value next = bindings.getValue("subj"); + assertEquals(exceptedValue, next.toString()); + } + if (it.hasNext()) { + do { + System.out.println(it.next()); + } while (it.hasNext()); + fail(); + } + } + } + + @Test + public void testMultiOrderSnippetQuery() { + try (TupleQueryResult result = conn.prepareTupleQuery(joinLines( + PREFIXES, + "SELECT * {", + " ?subj search:matches [", + " search:query", + " [", + " search:query \"iii\" ;", + " search:property ex:p1 ;", + " search:boost 0.2 ;", + " search:snippet ?sp1 ;", + " ] , [", + " search:query \"jjj\" ;", + " search:property ex:p2 ;", + " search:boost 0.8 ;", + " search:snippet ?sp2 ;", + " ] ;", + " search:score ?score", + " ]", + "}" + )).evaluate()) { + String[] values = new String[] { + elem6 + ":iii zzz yyy:jjj zzz", + elem7 + ":iii zzz:jjj zzz yyy" + }; + Iterator it = Arrays.stream(values).iterator(); + + while (result.hasNext()) { + if (!it.hasNext()) { + do { + System.out.println(result.next()); + } while (result.hasNext()); + fail("too many binding"); + } + String exceptedValue = it.next(); + BindingSet bindings = result.next(); + Value snippetValue1 = bindings.getValue("sp1"); + String snippet1 = snippetValue1 == null ? "" : snippetValue1.stringValue(); + Value snippetValue2 = bindings.getValue("sp2"); + String snippet2 = snippetValue2 == null ? "" : snippetValue2.stringValue(); + Value next = bindings.getValue("subj"); + String actualValue = next + ":" + snippet1 + ":" + snippet2; + assertEquals(exceptedValue, actualValue); + } + if (it.hasNext()) { + do { + System.out.println(it.next()); + } while (it.hasNext()); + fail(); + } + } + try (TupleQueryResult result = conn.prepareTupleQuery(joinLines( + PREFIXES, + "SELECT * {", + " ?subj search:matches [", + " search:query", + " [", + " search:query \"iii\" ;", + " search:property ex:p1 ;", + " search:boost 0.8 ;", + " search:snippet ?sp1 ;", + " ] , [", + " search:query \"jjj\" ;", + " search:property ex:p2 ;", + " search:boost 0.2 ;", + " search:snippet ?sp2 ;", + " ] ;", + " search:score ?score", + " ]", + "}" + )).evaluate()) { + String[] values = new String[] { + elem7 + ":iii zzz:jjj zzz yyy", + elem6 + ":iii zzz yyy:jjj zzz" + }; + Iterator it = Arrays.stream(values).iterator(); + + while (result.hasNext()) { + if (!it.hasNext()) { + do { + System.out.println(result.next()); + } while (result.hasNext()); + fail("too many binding"); + } + BindingSet bindings = result.next(); + String exceptedValue = it.next(); + Value snippetValue1 = bindings.getValue("sp1"); + String snippet1 = snippetValue1 == null ? "" : snippetValue1.stringValue(); + Value snippetValue2 = bindings.getValue("sp2"); + String snippet2 = snippetValue2 == null ? "" : snippetValue2.stringValue(); + Value next = bindings.getValue("subj"); + String actualValue = next + ":" + snippet1 + ":" + snippet2; + assertEquals(exceptedValue, actualValue); + } + if (it.hasNext()) { + do { + System.out.println(it.next()); + } while (it.hasNext()); + fail(); + } + } + } +} diff --git a/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/TypeSpecTest.java b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/TypeSpecTest.java new file mode 100644 index 00000000000..bf1f6328516 --- /dev/null +++ b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/TypeSpecTest.java @@ -0,0 +1,491 @@ +/******************************************************************************* + * Copyright (c) 2022 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene; + +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.File; +import java.util.Set; + +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.QueryLanguage; +import org.eclipse.rdf4j.query.TupleQuery; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.memory.MemoryStore; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Sets; + +public class TypeSpecTest { + + private static final Logger LOG = LoggerFactory.getLogger(TypeSpecTest.class); + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + private static final String EX_NS = "http://example.org/"; + private static final String PREDICATE_TYPEOF = EX_NS + "typeof"; + private static final String PREDICATE_TEXT = EX_NS + "text"; + + private static Statement typeof(String subject) { + return VF.createStatement( + VF.createIRI(EX_NS + subject), + VF.createIRI(PREDICATE_TYPEOF), + VF.createIRI(EX_NS + "type1") + ); + } + + private static Statement typeRDF(String subject) { + return VF.createStatement( + VF.createIRI(EX_NS + subject), + RDF.TYPE, + VF.createIRI(EX_NS + "type2") + ); + } + + private static Statement literal(String subject, String value) { + return VF.createStatement( + VF.createIRI(EX_NS + subject), + VF.createIRI(PREDICATE_TEXT), + VF.createLiteral(value) + ); + } + + LuceneSail sail; + MemoryStore memoryStore; + SailRepository repository; + @TempDir + File dataDir; + + @BeforeEach + public void setup() { + memoryStore = new MemoryStore(); + // enable lock tracking + sail = new LuceneSail(); + sail.setParameter(LuceneSail.LUCENE_DIR_KEY, "lucene-index"); + sail.setParameter(LuceneSail.INDEX_CLASS_KEY, LuceneSail.DEFAULT_INDEX_CLASS); + } + + private void initSail() { + sail.setBaseSail(memoryStore); + repository = new SailRepository(sail); + repository.setDataDir(dataDir); + repository.init(); + } + + private void add(Statement... statements) { + try (SailRepositoryConnection connection = repository.getConnection()) { + connection.begin(); + for (Statement s : statements) { + connection.add(s); + } + connection.commit(); + } + } + + private void remove(Statement... statements) { + try (SailRepositoryConnection connection = repository.getConnection()) { + connection.begin(); + for (Statement s : statements) { + connection.remove(s); + } + connection.commit(); + } + } + + private void addRemove(Statement[] toAdd, Statement[] toRemove) { + try (SailRepositoryConnection connection = repository.getConnection()) { + connection.begin(); + for (Statement s : toAdd) { + connection.add(s); + } + for (Statement s : toRemove) { + connection.remove(s); + } + connection.commit(); + } + } + + /** + * assert the repository only contains the excepted documents + * + * @param exceptedDocuments the excepted documents + */ + private void assertQuery(String... exceptedDocuments) { + try (SailRepositoryConnection connection = repository.getConnection()) { + String queryStr = ""; + queryStr += "PREFIX search: "; + queryStr += "PREFIX rdfs: "; + queryStr += "SELECT DISTINCT ?result { "; + queryStr += " ?result search:matches ?match . "; + queryStr += " ?match search:query 'text' . }"; + + Set exceptedDocSet = Sets.newHashSet(exceptedDocuments); + + // fire a query with the subject pre-specified + TupleQuery query = connection.prepareTupleQuery(QueryLanguage.SPARQL, queryStr); + try (TupleQueryResult result = query.evaluate()) { + while (result.hasNext()) { + BindingSet set = result.next(); + String element = set.getValue("result").stringValue().substring(EX_NS.length()); + if (!exceptedDocSet.remove(element)) { + LOG.error("Docs: " + exceptedDocSet); + LOG.error("Remaining:"); + while (result.hasNext()) { + set = result.next(); + LOG.error("- {}", set.getValue("result").stringValue().substring(EX_NS.length())); + } + fail("The element '" + element + "' was in the index, but wasn't excepted"); + } + } + } + + if (!exceptedDocSet.isEmpty()) { + fail("Unexpected docs: " + exceptedDocSet); + } + } + } + + @Test + public void noConfigTest() { + // no config to add + initSail(); + + // initial data + add( + literal("aaa", "text aaa"), + literal("bbb", "text bbb"), + literal("ccc", "text ccc"), + typeof("bbb"), + typeof("eee") + ); + + assertQuery( + "aaa", "bbb", "ccc" + ); + + // test backtrace of add(aaa, typeof, type1) -> (aaa, text, "text aaa") add + add( + typeof("aaa") + ); + + assertQuery( + "aaa", "bbb", "ccc" + ); + + // test backtrace of remove(bbb, typeof, type1) -> (bbb, text, "text bbb") removed + remove( + typeof("bbb") + ); + + assertQuery( + "aaa", "bbb", "ccc" + ); + + // test add without calling sail (ddd, text, "text ddd") add + add( + typeof("ddd"), + literal("ddd", "text ddd") + ); + + assertQuery( + "aaa", "bbb", "ccc", "ddd" + ); + + // test add with calling sail (eee, text, "text eee") add + add( + literal("eee", "text eee") + ); + + assertQuery( + "aaa", "bbb", "ccc", "ddd", "eee" + ); + + // test adding and remove typeof in the same addRemove (eee, text, "text eee") shouldn't be added + addRemove( + // add + new Statement[] { + typeof("fff"), + literal("fff", "text fff") + }, + // remove + new Statement[] { + typeof("fff") + } + ); + + assertQuery( + "aaa", "bbb", "ccc", "ddd", "eee", "fff" + ); + + remove( + literal("aaa", "text aaa") + ); + + assertQuery( + "bbb", "ccc", "ddd", "eee", "fff" + ); + } + + @Test + public void typeTest() { + sail.setParameter(LuceneSail.INDEXEDTYPES, (PREDICATE_TYPEOF + "=" + EX_NS + "type1") + .replaceAll("[:]", "\\\\:")); + initSail(); + + // initial data + add( + literal("aaa", "text aaa"), + literal("bbb", "text bbb"), + literal("ccc", "text ccc"), + typeof("bbb"), + typeof("eee") + ); + + assertQuery( + "bbb" + ); + + // test backtrace of add(aaa, typeof, type1) -> (aaa, text, "text aaa") add + add( + typeof("aaa") + ); + + assertQuery( + "aaa", "bbb" + ); + + // test backtrace of remove(bbb, typeof, type1) -> (bbb, text, "text bbb") removed + remove( + typeof("bbb") + ); + + assertQuery( + "aaa" + ); + + // test add without calling sail (ddd, text, "text ddd") add + add( + typeof("ddd"), + literal("ddd", "text ddd") + ); + + assertQuery( + "aaa", "ddd" + ); + + // test add with calling sail (eee, text, "text eee") add + add( + literal("eee", "text eee") + ); + + assertQuery( + "aaa", "ddd", "eee" + ); + + // test adding and remove typeof in the same addRemove (eee, text, "text eee") shouldn't be added + addRemove( + // add + new Statement[] { + typeof("fff"), + literal("fff", "text fff") + }, + // remove + new Statement[] { + typeof("fff") + } + ); + + assertQuery( + "aaa", "ddd", "eee" + ); + + remove( + literal("aaa", "text aaa") + ); + + assertQuery( + "ddd", "eee" + ); + } + + @Test + public void typePartialModeTest() { + sail.setParameter(LuceneSail.INDEXEDTYPES, (PREDICATE_TYPEOF + "=" + EX_NS + "type1") + .replaceAll("[:]", "\\\\:")); + sail.setIndexBacktraceMode(TypeBacktraceMode.PARTIAL); + initSail(); + + // initial data + add( + literal("aaa", "text aaa"), + literal("bbb", "text bbb"), + literal("ccc", "text ccc"), + typeof("bbb"), + typeof("eee") + ); + + assertQuery( + "bbb" + ); + + // test backtrace of add(aaa, typeof, type1) -> (aaa, text, "text aaa") add + add( + typeof("aaa") + ); + + assertQuery( + "bbb" + ); + + // test backtrace of remove(bbb, typeof, type1) -> (bbb, text, "text bbb") removed + remove( + typeof("bbb") + ); + + assertQuery( + "bbb" + ); + + // test add without calling sail (ddd, text, "text ddd") add + add( + typeof("ddd"), + literal("ddd", "text ddd") + ); + + assertQuery( + "bbb", "ddd" + ); + + // test add with calling sail (eee, text, "text eee") add + add( + literal("eee", "text eee") + ); + + assertQuery( + "bbb", "ddd", "eee" + ); + + // test adding and remove typeof in the same addRemove (eee, text, "text eee") shouldn't be added + addRemove( + // add + new Statement[] { + typeof("fff"), + literal("fff", "text fff") + }, + // remove + new Statement[] { + typeof("fff") + } + ); + + assertQuery( + "bbb", "ddd", "eee" + ); + + remove( + literal("aaa", "text aaa") + ); + + assertQuery( + "bbb", "ddd", "eee" + ); + } + + @Test + public void typeRDFTest() { + sail.setParameter(LuceneSail.INDEXEDTYPES, ("a=" + EX_NS + "type2") + .replaceAll("[:]", "\\\\:")); + initSail(); + + // initial data + add( + literal("aaa", "text aaa"), + literal("bbb", "text bbb"), + literal("ccc", "text ccc"), + typeRDF("bbb"), + typeRDF("eee") + ); + + assertQuery( + "bbb" + ); + + // test backtrace of add(aaa, typeof, type1) -> (aaa, text, "text aaa") add + add( + typeRDF("aaa") + ); + + assertQuery( + "aaa", "bbb" + ); + + // test backtrace of remove(bbb, typeof, type1) -> (bbb, text, "text bbb") removed + remove( + typeRDF("bbb") + ); + + assertQuery( + "aaa" + ); + + // test add without calling sail (ddd, text, "text ddd") add + add( + typeRDF("ddd"), + literal("ddd", "text ddd") + ); + + assertQuery( + "aaa", "ddd" + ); + + // test add with calling sail (eee, text, "text eee") add + add( + literal("eee", "text eee") + ); + + assertQuery( + "aaa", "ddd", "eee" + ); + + // test adding and remove typeof in the same addRemove (eee, text, "text eee") shouldn't be added + addRemove( + // add + new Statement[] { + typeRDF("fff"), + literal("fff", "text fff") + }, + // remove + new Statement[] { + typeRDF("fff") + } + ); + + assertQuery( + "aaa", "ddd", "eee" + ); + + remove( + literal("aaa", "text aaa") + ); + + assertQuery( + "ddd", "eee" + ); + } +} diff --git a/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/examples/LuceneSailExample.java b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/examples/LuceneSailExample.java new file mode 100644 index 00000000000..b0d86d053e0 --- /dev/null +++ b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/examples/LuceneSailExample.java @@ -0,0 +1,149 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.examples; + +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.query.Binding; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.GraphQuery; +import org.eclipse.rdf4j.query.GraphQueryResult; +import org.eclipse.rdf4j.query.MalformedQueryException; +import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.QueryLanguage; +import org.eclipse.rdf4j.query.TupleQuery; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.RepositoryConnection; +import org.eclipse.rdf4j.repository.RepositoryException; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.sail.lucene.LuceneSail; +import org.eclipse.rdf4j.sail.lucene.LuceneSailSchema; +import org.eclipse.rdf4j.sail.lucene.impl.LuceneIndex; +import org.eclipse.rdf4j.sail.memory.MemoryStore; + +/** + * Example code showing how to use the LuceneSail + * + * @author sauermann + */ +public class LuceneSailExample { + + /** + * Create a lucene sail and use it + * + * @param args + */ + public static void main(String[] args) throws Exception { + createSimple(); + } + + /** + * Create a LuceneSail and add some triples to it, ask a query. + */ + public static void createSimple() throws Exception { + // create a sesame memory sail + MemoryStore memoryStore = new MemoryStore(); + + // create a lucenesail to wrap the memorystore + LuceneSail lucenesail = new LuceneSail(); + lucenesail.setParameter(LuceneSail.INDEX_CLASS_KEY, LuceneIndex.class.getName()); + // set this parameter to let the lucene index store its data in ram + lucenesail.setParameter(LuceneSail.LUCENE_RAMDIR_KEY, "true"); + // set this parameter to store the lucene index on disk + // lucenesail.setParameter(LuceneSail.LUCENE_DIR_KEY, + // "./data/mydirectory"); + + // wrap memorystore in a lucenesail + lucenesail.setBaseSail(memoryStore); + + // create a Repository to access the sails + SailRepository repository = new SailRepository(lucenesail); + + try ( // add some test data, the FOAF ont + SailRepositoryConnection connection = repository.getConnection()) { + connection.begin(); + connection.add(LuceneSailExample.class.getResourceAsStream("/org/openrdf/sail/lucene/examples/foaf.rdfs"), + "", RDFFormat.RDFXML); + connection.commit(); + + // search for resources that mention "person" + String queryString = "PREFIX search: <" + LuceneSailSchema.NAMESPACE + "> \n" + + "PREFIX rdf: \n" + "SELECT * WHERE { \n" + + "?subject search:matches ?match . \n" + "?match search:query \"person\" ; \n" + + " search:property ?property ; \n" + " search:score ?score ; \n" + + " search:snippet ?snippet . \n" + "?subject rdf:type ?type . \n" + "} LIMIT 3 \n" + + "BINDINGS ?type { \n" + " () \n" + "}"; + tupleQuery(queryString, connection); + + // search for property "name" with domain "person" + queryString = "PREFIX search: <" + LuceneSailSchema.NAMESPACE + "> \n" + + "PREFIX rdfs: \n" + "SELECT * WHERE { \n" + + "?subject rdfs:domain ?domain . \n" + "?subject search:matches ?match . \n" + + "?match search:query \"chat\" ; \n" + " search:score ?score . \n" + + "?domain search:matches ?match2 . \n" + "?match2 search:query \"person\" ; \n" + + " search:score ?score2 . \n" + "} LIMIT 5"; + tupleQuery(queryString, connection); + + // search in subquery and filter results + queryString = "PREFIX search: <" + LuceneSailSchema.NAMESPACE + "> \n" + "SELECT * WHERE { \n" + + "{ SELECT * WHERE { \n" + " ?subject search:matches ?match . \n" + + " ?match search:query \"person\" ; \n" + " search:property ?property ; \n" + + " search:score ?score ; \n" + " search:snippet ?snippet . \n" + "} } \n" + + "FILTER(CONTAINS(STR(?subject), \"Person\")) \n" + "} \n" + ""; + tupleQuery(queryString, connection); + + // search for property "homepage" with domain foaf:Person + queryString = "PREFIX search: <" + LuceneSailSchema.NAMESPACE + "> \n" + + "PREFIX foaf: \n" + + "PREFIX rdfs: \n" + + "CONSTRUCT { ?x rdfs:domain foaf:Person } \n" + "WHERE { \n" + "?x rdfs:domain foaf:Person . \n" + + "?x search:matches ?match . \n" + "?match search:query \"homepage\" ; \n" + + " search:property ?property ; \n" + " search:score ?score ; \n" + + " search:snippet ?snippet . \n" + "} LIMIT 3 \n"; + graphQuery(queryString, connection); + } finally { + repository.shutDown(); + } + } + + private static void tupleQuery(String queryString, RepositoryConnection connection) + throws QueryEvaluationException, RepositoryException, MalformedQueryException { + System.out.println("Running query: \n" + queryString); + TupleQuery query = connection.prepareTupleQuery(QueryLanguage.SPARQL, queryString); + try (TupleQueryResult result = query.evaluate()) { + // print the results + System.out.println("Query results:"); + while (result.hasNext()) { + BindingSet bindings = result.next(); + System.out.println("found match: "); + for (Binding binding : bindings) { + System.out.println("\t" + binding.getName() + ": " + binding.getValue()); + } + } + } + } + + private static void graphQuery(String queryString, RepositoryConnection connection) + throws RepositoryException, MalformedQueryException, QueryEvaluationException { + System.out.println("Running query: \n" + queryString); + GraphQuery query = connection.prepareGraphQuery(QueryLanguage.SPARQL, queryString); + try (GraphQueryResult result = query.evaluate()) { + // print the results + while (result.hasNext()) { + Statement stmt = result.next(); + System.out.println("found match: " + stmt.getSubject().stringValue() + "\t" + + stmt.getPredicate().stringValue() + "\t" + stmt.getObject().stringValue()); + } + } + + } +} diff --git a/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/AbstractGenericLuceneTest.java b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/AbstractGenericLuceneTest.java new file mode 100644 index 00000000000..1e09c30169e --- /dev/null +++ b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/AbstractGenericLuceneTest.java @@ -0,0 +1,849 @@ +/******************************************************************************* + * Copyright (c) 2017 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.MATCHES; +import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.PROPERTY; +import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.QUERY; +import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SCORE; +import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SNIPPET; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.eclipse.rdf4j.common.concurrent.locks.Properties; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.FOAF; +import org.eclipse.rdf4j.model.vocabulary.RDFS; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.GraphQuery; +import org.eclipse.rdf4j.query.GraphQueryResult; +import org.eclipse.rdf4j.query.MalformedQueryException; +import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.QueryLanguage; +import org.eclipse.rdf4j.query.TupleQuery; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.RepositoryConnection; +import org.eclipse.rdf4j.repository.RepositoryException; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.sail.lucene.LuceneSail; +import org.eclipse.rdf4j.sail.lucene.LuceneSailSchema; +import org.eclipse.rdf4j.sail.memory.MemoryStore; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Timeout(value = 10, unit = TimeUnit.MINUTES) +public abstract class AbstractGenericLuceneTest { + protected static final ValueFactory vf = SimpleValueFactory.getInstance(); + + public static final String QUERY_STRING; + + public static final IRI SUBJECT_1 = vf.createIRI("urn:subject1"); + + public static final IRI SUBJECT_2 = vf.createIRI("urn:subject2"); + + public static final IRI SUBJECT_3 = vf.createIRI("urn:subject3"); + + public static final IRI SUBJECT_4 = vf.createIRI("urn:subject4"); + + public static final IRI SUBJECT_5 = vf.createIRI("urn:subject5"); + + public static final IRI CONTEXT_1 = vf.createIRI("urn:context1"); + + public static final IRI CONTEXT_2 = vf.createIRI("urn:context2"); + + public static final IRI CONTEXT_3 = vf.createIRI("urn:context3"); + + public static final IRI PREDICATE_1 = vf.createIRI("urn:predicate1"); + + public static final IRI PREDICATE_2 = vf.createIRI("urn:predicate2"); + + public static final IRI PREDICATE_3 = vf.createIRI("urn:predicate3"); + + static final Logger LOG = LoggerFactory.getLogger(AbstractGenericLuceneTest.class); + + private final Random random = new Random(43252333); + + protected LuceneSail sail; + + protected Repository repository; + + protected RepositoryConnection connection; + + static { + StringBuilder buffer = new StringBuilder(); + buffer.append("SELECT ?Subject ?Score "); + buffer.append("WHERE { ?Subject <" + MATCHES + "> [ "); + buffer.append(" <" + QUERY + "> ?Query; "); + buffer.append(" <" + SCORE + "> ?Score ].} "); + QUERY_STRING = buffer.toString(); + } + + protected abstract void configure(LuceneSail sail) throws IOException; + + @BeforeEach + public void setUp() throws Exception { + // set logging, uncomment this to get better logging for debugging + // org.apache.log4j.BasicConfigurator.configure(); + + // setup a LuceneSail + MemoryStore memoryStore = new MemoryStore(); + // enable lock tracking + Properties.setLockTrackingEnabled(true); + sail = new LuceneSail(); + configure(sail); + sail.setBaseSail(memoryStore); + + // create a Repository wrapping the LuceneSail + repository = new SailRepository(sail); + + // add some statements to it + connection = repository.getConnection(); + connection.begin(); + connection.add(SUBJECT_1, PREDICATE_1, vf.createLiteral("one")); + connection.add(SUBJECT_1, PREDICATE_1, vf.createLiteral("five")); + connection.add(SUBJECT_1, PREDICATE_2, vf.createLiteral("two")); + connection.add(SUBJECT_2, PREDICATE_1, vf.createLiteral("one")); + connection.add(SUBJECT_2, PREDICATE_2, vf.createLiteral("three")); + connection.add(SUBJECT_3, PREDICATE_1, vf.createLiteral("four")); + connection.add(SUBJECT_3, PREDICATE_2, vf.createLiteral("one")); + connection.add(SUBJECT_3, PREDICATE_3, SUBJECT_1); + connection.add(SUBJECT_3, PREDICATE_3, SUBJECT_2); + connection.commit(); + } + + @AfterEach + public void tearDown() throws RepositoryException { + try { + if (connection != null) { + connection.close(); + } + } finally { + if (repository != null) { + repository.shutDown(); + } + } + Properties.setLockTrackingEnabled(false); + } + + @Test + public void testComplexQueryTwo() throws MalformedQueryException, RepositoryException, QueryEvaluationException { + // prepare the query + StringBuilder buffer = new StringBuilder(); + buffer.append("SELECT ?Resource ?Matching ?Score "); + buffer.append("WHERE { ?Resource <" + PREDICATE_3 + "> ?Matching .\n"); + buffer.append(" ?Matching <" + MATCHES + "> [ \n "); + buffer.append(" <" + QUERY + "> \"two\"; \n"); + buffer.append(" <" + SCORE + "> ?Score ]. }"); + String q = buffer.toString(); + + // fire a query for all subjects with a given term + TupleQuery query = connection.prepareTupleQuery(q); + + // check the results + try (TupleQueryResult result = query.evaluate()) { + // check the results + assertTrue(result.hasNext()); + BindingSet bindings = result.next(); + assertEquals(SUBJECT_3, (IRI) bindings.getValue("Resource")); + assertEquals(SUBJECT_1, (IRI) bindings.getValue("Matching")); + assertNotNull(bindings.getValue("Score")); + + assertFalse(result.hasNext()); + } + } + + private void evaluate(String[] queries, ArrayList>> expectedResults) + throws MalformedQueryException, RepositoryException, QueryEvaluationException { + for (int queryID = 0; queryID < queries.length; queryID++) { + String sparql = queries[queryID]; + List> expectedResultSet = expectedResults.get(queryID); + + // fire the query + TupleQuery query = connection.prepareTupleQuery(sparql); + int actualResults; + Set matched; + // check the results + try (TupleQueryResult tqr = query.evaluate()) { + // check the results + actualResults = 0; + matched = new HashSet<>(); + while (tqr.hasNext()) { + BindingSet bs = tqr.next(); + actualResults++; + + boolean matches; + for (int resultSetID = 0; resultSetID < expectedResultSet.size(); resultSetID++) { + // ignore results that matched before + if (matched.contains(resultSetID)) { + continue; + } + + // assume it matches + matches = true; + + // get the result we compare with now + Map expectedResult = new HashMap<>(expectedResultSet.get(resultSetID)); + + // get all var names + Collection vars = new ArrayList<>(expectedResult.keySet()); + + // check if all actual results are expected + for (String var : vars) { + String expectedVal = expectedResult.get(var); + Value actualVal = bs.getValue(var); + + if (expectedVal == null) { + // don't care about the actual value, as long as there is + // one + if (actualVal == null) { + matches = false; + break; + } + } else { + // compare the values + if ((actualVal == null) || (expectedVal.compareTo(actualVal.stringValue()) != 0)) { + matches = false; + break; + } + } + + // remove the matched result so that we do not match it twice + expectedResult.remove(var); + } + + // check if expected results were existing + if (!expectedResult.isEmpty()) { + matches = false; + } + + if (matches) { + matched.add(resultSetID); + break; + } + } + } + } + + // the number of matched expected results must be equal to the number + // of actual results + assertEquals(expectedResultSet.size(), matched.size(), + "How many expected results were retrieved for query #" + queryID + "?"); + assertEquals(expectedResultSet.size(), actualResults, + "How many actual results were retrieved for query #" + queryID + "?"); + } + } + + @Test + public void testPredicateLuceneQueries() + throws MalformedQueryException, RepositoryException, QueryEvaluationException { + // prepare the query + String[] queries = new String[] { + "SELECT ?Resource ?Score ?Snippet \n" + + "WHERE { " + + " ?Resource <" + MATCHES + "> [ \n" + + " <" + QUERY + "> \"one\"; \n" + + " <" + SCORE + "> ?Score; \n" + + " <" + SNIPPET + "> ?Snippet ] }\n", + "SELECT ?Resource ?Score ?Snippet \n" + + "WHERE { " + + " ?Resource <" + MATCHES + "> [ \n" + + " <" + QUERY + "> \"five\"; \n" + + " <" + SCORE + "> ?Score; \n" + + " <" + SNIPPET + "> ?Snippet ] }\n" }; + + ArrayList>> allResults = new ArrayList<>(); + + // create a new result set + ArrayList> resultSet = new ArrayList<>(); + + // one possible result + Map result1 = new HashMap<>(); + result1.put("Resource", SUBJECT_1.stringValue()); + result1.put("Score", null); // null means: ignore the value + result1.put("Snippet", "one"); + resultSet.add(result1); + + // another possible result + Map result2 = new HashMap<>(); + result2.put("Resource", SUBJECT_2.stringValue()); + result2.put("Score", null); // null means: ignore the value + result2.put("Snippet", "one"); + resultSet.add(result2); + + // another possible result + Map result3 = new HashMap<>(); + result3.put("Resource", SUBJECT_3.stringValue()); + result3.put("Score", null); // null means: ignore the value + result3.put("Snippet", "one"); + resultSet.add(result3); + + // add the results of for the first query + allResults.add(resultSet); + + // recreate a result set + resultSet = new ArrayList<>(); + + // one possible result + Map result = new HashMap<>(); + result.put("Resource", SUBJECT_1.stringValue()); + result.put("Score", null); // null means: ignore the value + result.put("Snippet", "five"); + resultSet.add(result); + + // add the results of for the first query + allResults.add(resultSet); + + evaluate(queries, allResults); + } + + @Test + public void testSnippetQueries() throws MalformedQueryException, RepositoryException, QueryEvaluationException { + // prepare the query + // search for the term "one", but only in predicate 1 + StringBuilder buffer = new StringBuilder(); + buffer.append("SELECT ?Resource ?Score \n"); + buffer.append("WHERE { \n"); + buffer.append(" ?Resource <" + MATCHES + "> [\n "); + buffer.append(" <" + QUERY + "> \"one\";\n"); + buffer.append(" <" + PROPERTY + "> <" + PREDICATE_1 + ">;\n "); + buffer.append(" <" + SCORE + "> ?Score ]. } "); + String q = buffer.toString(); + + // fire the query + TupleQuery query = connection.prepareTupleQuery(q); + try (TupleQueryResult result = query.evaluate()) { + + // check the results + BindingSet bindings; + + // the first result is subject 1 and has a score + int results = 0; + Set expectedSubject = new HashSet<>(); + expectedSubject.add(SUBJECT_1); + expectedSubject.add(SUBJECT_2); + while (result.hasNext()) { + results++; + bindings = result.next(); + + // the resource should be among the set of expected subjects, if so, + // remove it from the set + assertTrue(expectedSubject.remove(bindings.getValue("Resource"))); + + // there should be a score + assertNotNull(bindings.getValue("Score")); + } + + // there should have been only 2 results + assertEquals(2, results); + + result.close(); + } + } + + /** + * Test if the snippets do not accidentially come from the "text" field while we actually expect them to come from + * the predicate field. + */ + @Test + public void testSnippetLimitedToPredicate() + throws MalformedQueryException, RepositoryException, QueryEvaluationException { + try (RepositoryConnection localConnection = repository.getConnection()) { + localConnection.begin(); + // we use the string 'charly' as test-case. the snippets should contain + // "come" and "unicorn" + // and 'poor' should not be returned if we limit on predicate1 + // and watch http://www.youtube.com/watch?v=Q5im0Ssyyus like 25mio others + localConnection.add(SUBJECT_1, PREDICATE_1, vf.createLiteral("come charly lets go to candy mountain")); + localConnection.add(SUBJECT_1, PREDICATE_1, vf.createLiteral("but the unicorn charly said to goaway")); + localConnection.add(SUBJECT_1, PREDICATE_2, vf.createLiteral("there was poor charly without a kidney")); + localConnection.commit(); + } + + // prepare the query + // search for the term "charly", but only in predicate 1 + StringBuilder buffer = new StringBuilder(); + buffer.append("SELECT ?Resource ?Snippet ?Score \n"); + buffer.append("WHERE { \n"); + buffer.append(" ?Resource <" + MATCHES + "> [\n "); + buffer.append(" <" + QUERY + "> \"charly\";\n"); + buffer.append(" <" + PROPERTY + "> <" + PREDICATE_1 + ">;\n "); + buffer.append(" <" + SNIPPET + "> ?Snippet; "); + buffer.append(" <" + SCORE + "> ?Score ]. } "); + String q = buffer.toString(); + + // fire the query + TupleQuery query = connection.prepareTupleQuery(q); + try (TupleQueryResult result = query.evaluate()) { + + // check the results + BindingSet bindings; + + // the first result is subject 1 and has a score + int results = 0; + Set expectedSnippetPart = new HashSet<>(); + expectedSnippetPart.add("come"); + expectedSnippetPart.add("unicorn"); + String notexpected = "poor"; + while (result.hasNext()) { + results++; + bindings = result.next(); + + // the resource should be among the set of expected subjects, if so, + // remove it from the set + String snippet = ((Literal) bindings.getValue("Snippet")).stringValue(); + boolean foundexpected = false; + for (Iterator i = expectedSnippetPart.iterator(); i.hasNext();) { + String expected = i.next(); + if (snippet.contains(expected)) { + foundexpected = true; + i.remove(); + } + } + if (snippet.contains(notexpected)) { + fail("snippet '" + snippet + "' contained value '" + notexpected + "' from predicate " + + PREDICATE_2); + } + if (!foundexpected) { + fail("did not find any of the expected strings " + expectedSnippetPart + " in the snippet " + + snippet); + } + + // there should be a score + assertNotNull(bindings.getValue("Score")); + } + + // we found all + assertTrue(expectedSnippetPart.isEmpty(), "These were expected but not found: " + expectedSnippetPart); + + assertEquals(2, results, "there should have been 2 results"); + } + } + + @Test + public void testCharlyTerm() { + + try (RepositoryConnection localConnection = repository.getConnection()) { + localConnection.begin(); + // we use the string 'charly' as test-case. the snippets should contain + // "come" and "unicorn" + // and 'poor' should not be returned if we limit on predicate1 + // and watch http://www.youtube.com/watch?v=Q5im0Ssyyus like 25mio others + localConnection.add(SUBJECT_1, PREDICATE_1, vf.createLiteral("come charly lets go to candy mountain")); + localConnection.add(SUBJECT_1, PREDICATE_1, vf.createLiteral("but the unicorn charly said to goaway")); + localConnection.add(SUBJECT_1, PREDICATE_2, vf.createLiteral("there was poor charly without a kidney")); + localConnection.commit(); + } + // search for the term "charly" in all predicates + StringBuilder buffer = new StringBuilder(); + buffer.append("SELECT ?Resource ?Snippet ?Score \n"); + buffer.append("WHERE { \n"); + buffer.append(" ?Resource <" + MATCHES + "> [\n "); + buffer.append(" <" + QUERY + "> \"charly\";\n"); + buffer.append(" <" + SNIPPET + "> ?Snippet; "); + buffer.append(" <" + SCORE + "> ?Score ]. } "); + String q = buffer.toString(); + + // fire the query + TupleQuery query = connection.prepareTupleQuery(q); + try (TupleQueryResult result = query.evaluate()) { + + // check the results + BindingSet bindings; + + // the first result is subject 1 and has a score + int results = 0; + Set expectedSnippetPart = new HashSet<>(); + expectedSnippetPart.add("come"); + expectedSnippetPart.add("unicorn"); + expectedSnippetPart.add("poor"); + + while (result.hasNext()) { + results++; + bindings = result.next(); + + // the resource should be among the set of expected subjects, if so, + // remove it from the set + String snippet = ((Literal) bindings.getValue("Snippet")).stringValue(); + boolean foundexpected = false; + for (Iterator i = expectedSnippetPart.iterator(); i.hasNext();) { + String expected = i.next(); + if (snippet.contains(expected)) { + foundexpected = true; + i.remove(); + } + } + if (!foundexpected) { + fail("did not find any of the expected strings " + expectedSnippetPart + " in the snippet " + + snippet); + } + + // there should be a score + assertNotNull(bindings.getValue("Score")); + } + + // we found all + assertTrue(expectedSnippetPart.isEmpty(), "These were expected but not found: " + expectedSnippetPart); + + assertEquals(3, results, "there should have been 3 results"); + } + } + + @Test + public void testGraphQuery() throws QueryEvaluationException, MalformedQueryException, RepositoryException { + IRI score = vf.createIRI(LuceneSailSchema.NAMESPACE + "score"); + StringBuilder query = new StringBuilder(); + + // here we would expect two links from SUBJECT3 to SUBJECT1 and SUBJECT2 + // and one link from SUBJECT3 to its score + query.append("PREFIX lucenesail: <" + LuceneSailSchema.NAMESPACE + "> \n"); + query.append("CONSTRUCT { \n"); + query.append(" ?r <" + PREDICATE_3 + "> ?r2 ; \n"); + query.append(" <" + score + "> ?s . }\n"); + query.append("WHERE {\n"); + query.append(" ?r lucenesail:matches ?match. ?match lucenesail:query \"four\"; \n"); + query.append(" lucenesail:score ?s . \n"); + query.append(" ?r <" + PREDICATE_3 + "> ?r2 } \n"); + + int r = 0; + int n = 0; + GraphQuery gq = connection.prepareGraphQuery(query.toString()); + try (GraphQueryResult result = gq.evaluate()) { + while (result.hasNext()) { + Statement statement = result.next(); + n++; + + if (statement.getSubject().equals(SUBJECT_3) && statement.getPredicate().equals(PREDICATE_3) + && statement.getObject().equals(SUBJECT_1)) { + r |= 1; + continue; + } + if (statement.getSubject().equals(SUBJECT_3) && statement.getPredicate().equals(PREDICATE_3) + && statement.getObject().equals(SUBJECT_2)) { + r |= 2; + continue; + } + if (statement.getSubject().equals(SUBJECT_3) && statement.getPredicate().equals(score)) { + r |= 4; + continue; + } + } + + assertEquals(3, n); + assertEquals(7, r); + + } + + } + + @Test + public void testQueryWithSpecifiedSubject() + throws RepositoryException, MalformedQueryException, QueryEvaluationException { + // fire a query with the subject pre-specified + TupleQuery query = connection.prepareTupleQuery(QUERY_STRING); + query.setBinding("Subject", SUBJECT_1); + query.setBinding("Query", vf.createLiteral("one")); + // check that this subject and only this subject is returned + try (TupleQueryResult result = query.evaluate()) { + // check that this subject and only this subject is returned + assertTrue(result.hasNext()); + BindingSet bindings = result.next(); + assertEquals(SUBJECT_1, (IRI) bindings.getValue("Subject")); + assertNotNull(bindings.getValue("Score")); + assertFalse(result.hasNext()); + } + } + + @Test + public void testUnionQuery() throws RepositoryException, MalformedQueryException, QueryEvaluationException { + String queryStr = ""; + queryStr += "PREFIX search: "; + queryStr += "PREFIX rdfs: "; + queryStr += "SELECT DISTINCT ?result { "; + queryStr += "{ ?result search:matches ?match1 . "; + queryStr += " ?match1 search:query 'one' ; "; + queryStr += " search:property . }"; + queryStr += " UNION "; + queryStr += "{ ?result search:matches ?match2 . "; + queryStr += " ?match2 search:query 'one' ; "; + queryStr += " search:property . } "; + queryStr += "} "; + + // fire a query with the subject pre-specified + TupleQuery query = connection.prepareTupleQuery(QueryLanguage.SPARQL, queryStr); + try (TupleQueryResult result = query.evaluate()) { + assertThat(result.hasNext()).isTrue(); + } + } + + /** + * Tests adding data to two contexts (graphs). + * + */ + @Test + public void testContextHandling() { + connection.add(SUBJECT_4, PREDICATE_1, vf.createLiteral("sfourponecone"), CONTEXT_1); + connection.add(SUBJECT_4, PREDICATE_2, vf.createLiteral("sfourptwocone"), CONTEXT_1); + connection.add(SUBJECT_5, PREDICATE_1, vf.createLiteral("sfiveponecone"), CONTEXT_1); + connection.add(SUBJECT_5, PREDICATE_1, vf.createLiteral("sfiveponectwo"), CONTEXT_2); + connection.add(SUBJECT_5, PREDICATE_2, vf.createLiteral("sfiveptwoctwo"), CONTEXT_2); + connection.commit(); + // connection.close(); + // connection = repository.getConnection(); + // connection.setAutoCommit(false); + // test querying + assertQueryResult("sfourponecone", PREDICATE_1, SUBJECT_4); + assertQueryResult("sfourptwocone", PREDICATE_2, SUBJECT_4); + assertQueryResult("sfiveponecone", PREDICATE_1, SUBJECT_5); + assertQueryResult("sfiveponectwo", PREDICATE_1, SUBJECT_5); + assertQueryResult("sfiveptwoctwo", PREDICATE_2, SUBJECT_5); + // blind test to see if this method works: + assertNoQueryResult("johannesgrenzfurthner"); + // remove a context + connection.clear(CONTEXT_1); + connection.commit(); + assertNoQueryResult("sfourponecone"); + assertNoQueryResult("sfourptwocone"); + assertNoQueryResult("sfiveponecone"); + assertQueryResult("sfiveponectwo", PREDICATE_1, SUBJECT_5); + assertQueryResult("sfiveptwoctwo", PREDICATE_2, SUBJECT_5); + } + + /** + * we experienced problems with the NULL context and lucenesail in August 2008 + * + */ + @Test + public void testNullContextHandling() { + connection.add(SUBJECT_4, PREDICATE_1, vf.createLiteral("sfourponecone")); + connection.add(SUBJECT_4, PREDICATE_2, vf.createLiteral("sfourptwocone")); + connection.add(SUBJECT_5, PREDICATE_1, vf.createLiteral("sfiveponecone")); + connection.add(SUBJECT_5, PREDICATE_1, vf.createLiteral("sfiveponectwo"), CONTEXT_2); + connection.add(SUBJECT_5, PREDICATE_2, vf.createLiteral("sfiveptwoctwo"), CONTEXT_2); + connection.commit(); + // connection.close(); + // connection = repository.getConnection(); + // connection.setAutoCommit(false); + // test querying + assertQueryResult("sfourponecone", PREDICATE_1, SUBJECT_4); + assertQueryResult("sfourptwocone", PREDICATE_2, SUBJECT_4); + assertQueryResult("sfiveponecone", PREDICATE_1, SUBJECT_5); + assertQueryResult("sfiveponectwo", PREDICATE_1, SUBJECT_5); + assertQueryResult("sfiveptwoctwo", PREDICATE_2, SUBJECT_5); + // blind test to see if this method works: + assertNoQueryResult("johannesgrenzfurthner"); + // remove a context + connection.clear((Resource) null); + connection.commit(); + assertNoQueryResult("sfourponecone"); + assertNoQueryResult("sfourptwocone"); + assertNoQueryResult("sfiveponecone"); + assertQueryResult("sfiveponectwo", PREDICATE_1, SUBJECT_5); + assertQueryResult("sfiveptwoctwo", PREDICATE_2, SUBJECT_5); + } + + @Test + public void testFuzzyQuery() throws MalformedQueryException, RepositoryException, QueryEvaluationException { + // prepare the query + // search for the term "one" with 80% fuzzyness + StringBuilder buffer = new StringBuilder(); + buffer.append("SELECT ?Resource ?Score \n"); + buffer.append("WHERE { \n"); + buffer.append(" ?Resource <" + MATCHES + "> [\n "); + buffer.append(" <" + QUERY + "> \"one~0.8\";\n"); + buffer.append(" <" + SCORE + "> ?Score ]. } "); + String q = buffer.toString(); + + // fire the query + TupleQuery query = connection.prepareTupleQuery(q); + // check the results + try (TupleQueryResult result = query.evaluate()) { + // check the results + BindingSet bindings; + + // the first result is subject 1 and has a score + int results = 0; + Set expectedSubject = new HashSet<>(); + expectedSubject.add(SUBJECT_1); + expectedSubject.add(SUBJECT_2); + expectedSubject.add(SUBJECT_3); + while (result.hasNext()) { + results++; + bindings = result.next(); + + // the resource should be among the set of expected subjects, if so, + // remove it from the set + assertTrue(expectedSubject.remove((IRI) bindings.getValue("Resource"))); + + // there should be a score + assertNotNull(bindings.getValue("Score")); + } + + // there should have been 3 results + assertEquals(3, results); + } + } + + /** + * Checks if reindexing does not corrupt the new index and if complex query still is evaluated properly. + * + */ + @Test + public void testReindexing() { + sail.reindex(); + testComplexQueryTwo(); + } + + @Test + public void testPropertyVar() throws MalformedQueryException, RepositoryException, QueryEvaluationException { + StringBuilder buffer = new StringBuilder(); + buffer.append("SELECT ?Resource ?Property \n"); + buffer.append("WHERE { \n"); + buffer.append(" ?Resource <" + MATCHES + "> [\n "); + buffer.append(" <" + QUERY + "> \"one\";\n"); + buffer.append(" <" + PROPERTY + "> ?Property ]. } "); + String q = buffer.toString(); + + // fire the query + TupleQuery query = connection.prepareTupleQuery(q); + try (TupleQueryResult result = query.evaluate()) { + int results = 0; + Map expectedSubject = new HashMap<>(); + expectedSubject.put(SUBJECT_1, PREDICATE_1); + expectedSubject.put(SUBJECT_2, PREDICATE_1); + expectedSubject.put(SUBJECT_3, PREDICATE_2); + while (result.hasNext()) { + results++; + BindingSet bindings = result.next(); + + // the resource should be among the set of expected subjects, if so, + // remove it from the set + Value subject = bindings.getValue("Resource"); + IRI expectedProperty = expectedSubject.remove(subject); + assertEquals(expectedProperty, bindings.getValue("Property"), "For subject " + subject); + } + + // there should have been 3 results + assertEquals(3, results); + } + } + + @Test + public void testMultithreadedAdd() throws InterruptedException { + int numThreads = 3; + final CountDownLatch startLatch = new CountDownLatch(1); + final CountDownLatch endLatch = new CountDownLatch(numThreads); + final Set exceptions = ConcurrentHashMap.newKeySet(); + for (int i = 0; i < numThreads; i++) { + new Thread(new Runnable() { + + private final long iterationCount = 10 + Math.round(random.nextDouble() * 100); + + @Override + public void run() { + try (RepositoryConnection con = repository.getConnection()) { + startLatch.await(); + for (long i = 0; i < iterationCount; i++) { + con.add(vf.createIRI("ex:" + i), vf.createIRI("ex:prop" + i % 3), vf.createLiteral(i)); + } + } catch (Throwable e) { + exceptions.add(e); + throw new AssertionError(e); + } finally { + endLatch.countDown(); + } + } + }).start(); + } + startLatch.countDown(); + endLatch.await(); + for (Throwable e : exceptions) { + e.printStackTrace(System.err); + } + assertEquals(0, exceptions.size(), "Exceptions occurred during testMultithreadedAdd, see stacktraces above"); + } + + @Test + public void testIndexWriterState() { + final String brokenTrig = "{ broken }"; + RepositoryConnection conn = repository.getConnection(); + try (StringReader sr = new StringReader(brokenTrig)) { + conn.add(sr, "http://example.org/", RDFFormat.TRIG); + } catch (Exception e) { + // expected parse exception + LOG.debug("Parse exception: {}", e.getMessage()); + } + conn.close(); + conn = repository.getConnection(); + conn.clear(); // make sure this can be executed multiple times + conn.add(FOAF.PERSON, RDFS.LABEL, SimpleValueFactory.getInstance().createLiteral("abc")); + conn.close(); + } + + protected void assertQueryResult(String literal, IRI predicate, Resource resultUri) { + // fire a query for all subjects with a given term + String queryString = "SELECT ?Resource " + "WHERE { ?Resource <" + MATCHES + "> [ " + " <" + QUERY + "> \"" + + literal + "\" ]. } "; + TupleQuery query = connection.prepareTupleQuery(queryString); + try (TupleQueryResult result = query.evaluate()) { + // check the result + assertTrue(result.hasNext(), + "query for literal '" + literal + " did not return any results, expected was " + resultUri); + BindingSet bindings = result.next(); + assertEquals(resultUri, bindings.getValue("Resource"), + "query for literal '" + literal + " did not return the expected resource"); + assertFalse(result.hasNext()); + } + } + + protected void assertNoQueryResult(String literal) { + // fire a query for all subjects with a given term + String queryString = "SELECT ?Resource " + "WHERE { ?Resource <" + MATCHES + "> [ " + " <" + QUERY + "> \"" + + literal + "\" ]. } "; + TupleQuery query = connection.prepareTupleQuery(queryString); + try (TupleQueryResult result = query.evaluate()) { + // check the result + assertFalse(result.hasNext(), + "query for literal '" + literal + " did return results, which was not expected."); + } + } + +} diff --git a/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexLocationTest.java b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexLocationTest.java new file mode 100644 index 00000000000..46fa5974035 --- /dev/null +++ b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexLocationTest.java @@ -0,0 +1,112 @@ +/******************************************************************************* + * Copyright (c) 2021 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.nio.file.Path; +import java.util.stream.IntStream; + +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.repository.RepositoryConnection; +import org.eclipse.rdf4j.repository.RepositoryException; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.Sail; +import org.eclipse.rdf4j.sail.lucene.LuceneSail; +import org.eclipse.rdf4j.sail.memory.MemoryStore; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This unit test reproduces issue #41 + * + * @author Jacek Grzebyta + */ +public class LuceneIndexLocationTest { + + private final Logger log = LoggerFactory.getLogger(getClass()); + + private final String luceneIndexPath = "sail-index"; + + Sail sail; + + SailRepository repository; + + RepositoryConnection connection; + + private final ValueFactory vf = SimpleValueFactory.getInstance(); + + /** + * Set up memory storage located within temporary folder + * + */ + @BeforeEach + public void setUp(@TempDir File dataDir) { + sail = new MemoryStore(); + + LuceneSail lucene = new LuceneSail(); + lucene.setBaseSail(sail); + lucene.setParameter(LuceneSail.LUCENE_DIR_KEY, luceneIndexPath); + lucene.setParameter(LuceneSail.INDEX_CLASS_KEY, LuceneSail.DEFAULT_INDEX_CLASS); + + repository = new SailRepository(lucene); + repository.setDataDir(dataDir); + + try ( // create temporary transaction to load data + SailRepositoryConnection cnx = repository.getConnection()) { + cnx.begin(); + + IntStream.rangeClosed(0, 50) + .forEach(i -> cnx.add(vf.createStatement(vf.createIRI("urn:subject" + i), + vf.createIRI("urn:predicate:" + i), vf.createLiteral("Value" + i)))); + cnx.commit(); + } + connection = repository.getConnection(); + } + + @AfterEach + public void tearDown() throws RepositoryException { + try { + if (connection != null) { + connection.close(); + } + } finally { + if (repository != null) { + repository.shutDown(); + } + } + } + + /** + * Check Lucene index location + * + */ + @Test + public void IndexLocationTest() { + File dataDir = repository.getDataDir(); + Path lucenePath = repository.getDataDir().toPath().resolve(luceneIndexPath); + + log.info("Lucene index location: {}", lucenePath); + assertEquals(dataDir.getAbsolutePath() + File.separator + luceneIndexPath, + lucenePath.toAbsolutePath().toString()); + + assertTrue(lucenePath.toFile().exists()); + assertTrue(lucenePath.toFile().isDirectory()); + } +} diff --git a/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexTest.java b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexTest.java new file mode 100644 index 00000000000..c539c7845cf --- /dev/null +++ b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneIndexTest.java @@ -0,0 +1,573 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Function; + +import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.MultiTerms; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TotalHitCountCollector; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.eclipse.rdf4j.common.concurrent.locks.Properties; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.impl.TreeModel; +import org.eclipse.rdf4j.model.vocabulary.GEO; +import org.eclipse.rdf4j.model.vocabulary.GEOF; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.repository.util.Repositories; +import org.eclipse.rdf4j.sail.evaluation.TupleFunctionEvaluationMode; +import org.eclipse.rdf4j.sail.lucene.LuceneSail; +import org.eclipse.rdf4j.sail.lucene.SearchFields; +import org.eclipse.rdf4j.sail.memory.MemoryStore; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class LuceneIndexTest { + + private static final ValueFactory vf = SimpleValueFactory.getInstance(); + + public static final IRI CONTEXT_1 = vf.createIRI("urn:context1"); + + public static final IRI CONTEXT_2 = vf.createIRI("urn:context2"); + + public static final IRI CONTEXT_3 = vf.createIRI("urn:context3"); + + // create some objects that we will use throughout this test + IRI subject = vf.createIRI("urn:subj"); + + IRI subject2 = vf.createIRI("urn:subj2"); + + IRI predicate1 = vf.createIRI("urn:pred1"); + + IRI predicate2 = vf.createIRI("urn:pred2"); + + Literal object1 = vf.createLiteral("object1"); + + Literal object2 = vf.createLiteral("object2"); + + Literal object3 = vf.createLiteral("cats"); + + Literal object4 = vf.createLiteral("dogs"); + + Literal object5 = vf.createLiteral("chicken"); + + Statement statement11 = vf.createStatement(subject, predicate1, object1); + + Statement statement12 = vf.createStatement(subject, predicate2, object2); + + Statement statement21 = vf.createStatement(subject2, predicate1, object3); + + Statement statement22 = vf.createStatement(subject2, predicate2, object4); + + Statement statement23 = vf.createStatement(subject2, predicate2, object5); + + Statement statementContext111 = vf.createStatement(subject, predicate1, object1, CONTEXT_1); + + Statement statementContext121 = vf.createStatement(subject, predicate2, object2, CONTEXT_1); + + Statement statementContext211 = vf.createStatement(subject2, predicate1, object3, CONTEXT_1); + + Statement statementContext222 = vf.createStatement(subject2, predicate2, object4, CONTEXT_2); + + Statement statementContext232 = vf.createStatement(subject2, predicate2, object5, CONTEXT_2); + + // add a statement to an index + ByteBuffersDirectory directory; + + StandardAnalyzer analyzer; + + LuceneIndex index; + + @BeforeEach + public void setUp() throws Exception { + directory = new ByteBuffersDirectory(); + analyzer = new StandardAnalyzer(); + index = new LuceneIndex(directory, analyzer); + } + + @AfterEach + public void tearDown() throws Exception { + index.shutDown(); + Properties.setLockTrackingEnabled(false); + } + + @Test + public void testAddStatement() throws IOException, ParseException { + // add a statement to an index + index.begin(); + index.addStatement(statement11); + index.commit(); + + // check that it arrived properly + DirectoryReader reader = DirectoryReader.open(directory); + assertEquals(1, reader.numDocs()); + + Term term = new Term(SearchFields.URI_FIELD_NAME, subject.toString()); + PostingsEnum docs = termDocs(reader, term); + assertTrue(next(docs)); + + int documentNr = docs.docID(); + Document document = reader.document(documentNr); + assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); + assertEquals(object1.getLabel(), document.get(predicate1.toString())); + + assertFalse(next(docs)); + reader.close(); + + // add another statement + index.begin(); + index.addStatement(statement12); + index.commit(); + + // See if everything remains consistent. We must create a new IndexReader + // in order to be able to see the updates + reader = DirectoryReader.open(directory); + assertEquals(1, reader.numDocs()); // #docs should *not* have increased + + docs = termDocs(reader, term); + assertTrue(next(docs)); + + documentNr = docs.docID(); + document = reader.document(documentNr); + assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); + assertEquals(object1.getLabel(), document.get(predicate1.toString())); + assertEquals(object2.getLabel(), document.get(predicate2.toString())); + + assertFalse(next(docs)); + + // see if we can query for these literals + IndexSearcher searcher = new IndexSearcher(reader); + QueryParser parser = new QueryParser(SearchFields.TEXT_FIELD_NAME, analyzer); + + Query query = parser.parse(object1.getLabel()); + System.out.println("query=" + query); + TotalHitCountCollector results = new TotalHitCountCollector(); + searcher.search(query, results); + assertEquals(1, results.getTotalHits()); + + query = parser.parse(object2.getLabel()); + results = new TotalHitCountCollector(); + searcher.search(query, results); + assertEquals(1, results.getTotalHits()); + + reader.close(); + + // remove the first statement + index.begin(); + index.removeStatement(statement11); + index.commit(); + + // check that that statement is actually removed and that the other still + // exists + reader = DirectoryReader.open(directory); + assertEquals(1, reader.numDocs()); + + docs = termDocs(reader, term); + assertTrue(next(docs)); + + documentNr = docs.docID(); + document = reader.document(documentNr); + assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); + assertNull(document.get(predicate1.toString())); + assertEquals(object2.getLabel(), document.get(predicate2.toString())); + + assertFalse(next(docs)); + + reader.close(); + + // remove the other statement + index.begin(); + index.removeStatement(statement12); + index.commit(); + + // check that there are no documents left (i.e. the last Document was + // removed completely, rather than its remaining triple removed) + reader = DirectoryReader.open(directory); + assertEquals(0, reader.numDocs()); + reader.close(); + } + + /** + * NB: this is a convenient but very slow way of getting termDocs. It is sufficient for testing purposes. + * + * @throws IOException + */ + private static PostingsEnum termDocs(IndexReader reader, Term term) throws IOException { + return MultiTerms.getTermPostingsEnum(reader, term.field(), term.bytes()); + } + + private static boolean next(PostingsEnum docs) throws IOException { + return (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS); + } + + @Test + public void testAddMultiple() throws Exception { + // add a statement to an index + HashSet added = new HashSet<>(); + HashSet removed = new HashSet<>(); + added.add(statement11); + added.add(statement12); + added.add(statement21); + added.add(statement22); + index.begin(); + index.addRemoveStatements(added, removed); + index.commit(); + + try ( // check that it arrived properly + DirectoryReader reader = DirectoryReader.open(directory)) { + assertEquals(2, reader.numDocs()); + } + + // check the documents + Document document = index.getDocuments(subject).iterator().next(); + assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); + assertStatement(statement11, document); + assertStatement(statement12, document); + + document = index.getDocuments(subject2).iterator().next(); + assertEquals(subject2.toString(), document.get(SearchFields.URI_FIELD_NAME)); + assertStatement(statement21, document); + assertStatement(statement22, document); + + // check if the text field stores all added string values + Set texts = new HashSet<>(); + texts.add("cats"); + texts.add("dogs"); + // FIXME + // assertTexts(texts, document); + + // add/remove one + added.clear(); + removed.clear(); + added.add(statement23); + removed.add(statement22); + index.begin(); + index.addRemoveStatements(added, removed); + index.commit(); + + // check doc 2 + document = index.getDocuments(subject2).iterator().next(); + assertEquals(subject2.toString(), document.get(SearchFields.URI_FIELD_NAME)); + assertStatement(statement21, document); + assertStatement(statement23, document); + assertNoStatement(statement22, document); + + // check if the text field stores all added and no deleted string values + texts.remove("dogs"); + texts.add("chicken"); + // FIXME + // assertTexts(texts, document); + + // TODO: check deletion of the rest + + } + + /** + * Contexts can only be tested in combination with a sail, as the triples have to be retrieved from the sail + * + * @throws Exception + */ + @Test + public void testContexts() throws Exception { + // add a sail + MemoryStore memoryStore = new MemoryStore(); + // enable lock tracking + org.eclipse.rdf4j.common.concurrent.locks.Properties.setLockTrackingEnabled(true); + LuceneSail sail = new LuceneSail(); + sail.setBaseSail(memoryStore); + sail.setLuceneIndex(index); + + // create a Repository wrapping the LuceneSail + SailRepository repository = new SailRepository(sail); + + try ( // now add the statements through the repo + // add statements with context + SailRepositoryConnection connection = repository.getConnection()) { + connection.begin(); + connection.add(statementContext111, statementContext111.getContext()); + connection.add(statementContext121, statementContext121.getContext()); + connection.add(statementContext211, statementContext211.getContext()); + connection.add(statementContext222, statementContext222.getContext()); + connection.add(statementContext232, statementContext232.getContext()); + connection.commit(); + + // check if they are there + assertStatement(statementContext111); + assertStatement(statementContext121); + assertStatement(statementContext211); + assertStatement(statementContext222); + assertStatement(statementContext232); + + // delete context 1 + connection.begin(); + connection.clear(new Resource[] { CONTEXT_1 }); + connection.commit(); + assertNoStatement(statementContext111); + assertNoStatement(statementContext121); + assertNoStatement(statementContext211); + assertStatement(statementContext222); + assertStatement(statementContext232); + } finally { +// close repo + repository.shutDown(); + } + } + + /** + * Contexts can only be tested in combination with a sail, as the triples have to be retrieved from the sail + * + * @throws Exception + */ + @Test + public void testContextsRemoveContext2() throws Exception { + // add a sail + MemoryStore memoryStore = new MemoryStore(); + // enable lock tracking + org.eclipse.rdf4j.common.concurrent.locks.Properties.setLockTrackingEnabled(true); + LuceneSail sail = new LuceneSail(); + sail.setBaseSail(memoryStore); + sail.setLuceneIndex(index); + + // create a Repository wrapping the LuceneSail + SailRepository repository = new SailRepository(sail); + + try ( // now add the statements through the repo + // add statements with context + SailRepositoryConnection connection = repository.getConnection()) { + connection.begin(); + connection.add(statementContext111, statementContext111.getContext()); + connection.add(statementContext121, statementContext121.getContext()); + connection.add(statementContext211, statementContext211.getContext()); + connection.add(statementContext222, statementContext222.getContext()); + connection.add(statementContext232, statementContext232.getContext()); + connection.commit(); + + // check if they are there + assertStatement(statementContext111); + assertStatement(statementContext121); + assertStatement(statementContext211); + assertStatement(statementContext222); + assertStatement(statementContext232); + + // delete context 2 + connection.begin(); + connection.clear(new Resource[] { CONTEXT_2 }); + connection.commit(); + assertStatement(statementContext111); + assertStatement(statementContext121); + assertStatement(statementContext211); + assertNoStatement(statementContext222); + assertNoStatement(statementContext232); + } finally { +// close repo + repository.shutDown(); + } + } + + @Test + public void testRejectedDatatypes() { + IRI STRING = vf.createIRI("http://www.w3.org/2001/XMLSchema#string"); + IRI FLOAT = vf.createIRI("http://www.w3.org/2001/XMLSchema#float"); + Literal literal1 = vf.createLiteral("hi there"); + Literal literal2 = vf.createLiteral("hi there, too", STRING); + Literal literal3 = vf.createLiteral("1.0"); + Literal literal4 = vf.createLiteral("1.0", FLOAT); + assertEquals(true, index.accept(literal1), "Is the first literal accepted?"); + assertEquals(true, index.accept(literal2), "Is the second literal accepted?"); + assertEquals(true, index.accept(literal3), "Is the third literal accepted?"); + assertEquals(false, index.accept(literal4), "Is the fourth literal accepted?"); + } + + @Test + public void testInstantiatesCustomQueryAnalyzer() throws Exception { + LuceneIndex index = new LuceneIndex(); + java.util.Properties props = new java.util.Properties(); + props.put(LuceneSail.QUERY_ANALYZER_CLASS_KEY, EnglishAnalyzer.class.getName()); + props.put(LuceneSail.ANALYZER_CLASS_KEY, EnglishAnalyzer.class.getName()); + props.put(LuceneSail.LUCENE_RAMDIR_KEY, "true"); + index.initialize(props); + + assertTrue(index.getAnalyzer() instanceof EnglishAnalyzer); + assertTrue(index.getQueryAnalyzer() instanceof EnglishAnalyzer); + } + + private void assertStatement(Statement statement) throws Exception { + Document document = index.getDocument(statement.getSubject(), statement.getContext()); + if (document == null) { + fail("Missing document " + statement.getSubject()); + } + assertStatement(statement, document); + } + + private void assertNoStatement(Statement statement) throws Exception { + Document document = index.getDocument(statement.getSubject(), statement.getContext()); + if (document == null) { + return; + } + assertNoStatement(statement, document); + } + + /** + * @param statement112 + * @param document + */ + private void assertStatement(Statement statement, Document document) { + IndexableField[] fields = document.getFields(SearchFields.getPropertyField(statement.getPredicate())); + assertNotNull(fields, "field " + statement.getPredicate() + " not found in document " + document); + for (IndexableField f : fields) { + if (((Literal) statement.getObject()).getLabel().equals(f.stringValue())) { + return; + } + } + fail("Statement not found in document " + statement); + } + + /** + * @param statement112 + * @param document + */ + private void assertNoStatement(Statement statement, Document document) { + IndexableField[] fields = document.getFields(SearchFields.getPropertyField(statement.getPredicate())); + if (fields == null) { + return; + } + for (IndexableField f : fields) { + if (((Literal) statement.getObject()).getLabel().equals(f.stringValue())) { + fail("Statement should not be found in document " + statement); + } + } + + } + + /* + * private void assertTexts(Set texts, Document document) { Set toFind = new HashSet(texts); + * Set found = new HashSet(); for(Field field : document.getFields(LuceneIndex.TEXT_FIELD_NAME)) { + * // is the field value expected and not yet been found? if(toFind.remove(field.stringValue())) { // add it to the + * found set // (it was already remove from the toFind list in the if clause) found.add(field.stringValue()); } else + * { assertEquals( "Was the text value '" + field.stringValue() + "' expected to exist?", false, true); } } + * for(String notFound : toFind) { assertEquals("Was the expected text value '" + notFound + "' found?", true, + * false); } } + */ + + @Test + public void geoSparqlQueryTest() { + final String prefix = "http://www.example.org/#"; + final String prefixes = "PREFIX ex: <" + prefix + ">\n" + + "PREFIX geof: <" + GEOF.NAMESPACE + ">\n" + + "PREFIX geo: <" + CoreDatatype.GEO.NAMESPACE + ">\n" + + "PREFIX uom: \n"; + Model data = new TreeModel(); + + IRI cp = vf.createIRI(prefix + "cp"); + IRI bm = vf.createIRI(prefix + "bm"); + IRI nkv = vf.createIRI(prefix + "nkv"); + + data.add(cp, GEO.AS_WKT, vf.createLiteral("Point(4.38436 45.44917)", CoreDatatype.GEO.WKT_LITERAL)); + data.add(bm, GEO.AS_WKT, vf.createLiteral("Point(4.38311 45.45423)", CoreDatatype.GEO.WKT_LITERAL)); + data.add(nkv, GEO.AS_WKT, vf.createLiteral("Point(4.87306 45.77903)", CoreDatatype.GEO.WKT_LITERAL)); + data.add(vf.createIRI(prefix + "arp"), GEO.AS_WKT, + vf.createLiteral("Point(2.89271 42.69848)", CoreDatatype.GEO.WKT_LITERAL)); + + String polyVill = "POLYGON((4.864712 45.784405, 4.883165 45.787756, 4.889946 45.785781, 4.904881 45.767403, 4.900761 45.765487, 4.872093 45.770995, 4.86454 45.770457, 4.858789 45.770277, 4.859905 45.784644, 4.864712 45.784405))"; + String polySain = "POLYGON((4.380627 45.463983, 4.400539 45.462177, 4.428349 45.436286, 4.399509 45.411346, 4.374447 45.426528, 4.370499 45.450618, 4.380627 45.463983))"; + + SailRepository m1 = new SailRepository(new MemoryStore()); + LuceneSail lc = new LuceneSail(); + lc.setBaseSail(new MemoryStore()); + lc.setParameter(LuceneSail.WKT_FIELDS, GEO.AS_WKT.toString()); + lc.setLuceneIndex(index); + lc.setEvaluationMode(TupleFunctionEvaluationMode.NATIVE); + SailRepository m2 = new SailRepository(lc); + + // add test data + Repositories.consume(m1, conn -> conn.add(data)); + Repositories.consume(m2, conn -> conn.add(data)); + + lc.reindex(); + + Function> toval = (res) -> { + Set list = new HashSet<>(); + while (res.hasNext()) { + BindingSet next = res.next(); + list.add(next.getValue("v")); + } + return list; + }; + + // test queries + + String q0 = prefixes + + "SELECT * {\n" + + " ?v geo:asWKT ?loc .\n" + + " FILTER(geof:distance(\"Point(4.386914 45.440637)\"^^geo:wktLiteral, ?loc, uom:metre) < 10000) \n" + + "}\n"; + Set q0ex = Set.of(bm, cp); + + String q1 = prefixes + + "SELECT * {\n" + + " ?v geo:asWKT ?loc .\n" + + " FILTER(geof:ehContains(\"" + polySain + "\"^^geo:wktLiteral, ?loc)) \n" + + "}\n"; + Set q1ex = Set.of(bm, cp); + + String q2 = prefixes + + "SELECT * {\n" + + " ?v geo:asWKT ?loc .\n" + + " FILTER(geof:ehContains(\"" + polyVill + "\"^^geo:wktLiteral, ?loc)) \n" + + "}\n"; + Set q2ex = Set.of(nkv); + + Set nlcq0 = Repositories.tupleQuery(m1, q0, toval); + Set nlcq1 = Repositories.tupleQuery(m1, q1, toval); + Set nlcq2 = Repositories.tupleQuery(m1, q2, toval); + + assertEquals(q0ex, nlcq0); + assertEquals(q1ex, nlcq1); + assertEquals(q2ex, nlcq2); + + assertEquals(nlcq0, Repositories.tupleQuery(m2, q0, toval)); + assertEquals(nlcq1, Repositories.tupleQuery(m2, q1, toval)); + assertEquals(nlcq2, Repositories.tupleQuery(m2, q2, toval)); + } +} diff --git a/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneSailTest.java b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneSailTest.java new file mode 100644 index 00000000000..da4d3d11f6d --- /dev/null +++ b/core/sail/lucene-v9/src/test/java/org/eclipse/rdf4j/sail/lucene/impl/LuceneSailTest.java @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lucene.impl; + +import java.io.IOException; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.eclipse.rdf4j.sail.lucene.LuceneSail; + +public class LuceneSailTest extends AbstractGenericLuceneTest { + + private LuceneIndex index; + + @Override + protected void configure(LuceneSail sail) throws IOException { + index = new LuceneIndex(new ByteBuffersDirectory(), new StandardAnalyzer()); + sail.setLuceneIndex(index); + } +} diff --git a/core/sail/lucene-v9/src/test/resources/logback-test.xml b/core/sail/lucene-v9/src/test/resources/logback-test.xml new file mode 100644 index 00000000000..64b3764879e --- /dev/null +++ b/core/sail/lucene-v9/src/test/resources/logback-test.xml @@ -0,0 +1,12 @@ + + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %msg%n + + + + + + + diff --git a/core/sail/pom.xml b/core/sail/pom.xml index 8ec1ee27a6a..dbb9114887d 100644 --- a/core/sail/pom.xml +++ b/core/sail/pom.xml @@ -21,6 +21,7 @@ lmdb lucene-api lucene + lucene-v9 solr elasticsearch elasticsearch-store