Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a limit for graph phrase query expansion #34061

Merged
merged 6 commits into from
Oct 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,15 @@
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
import org.apache.lucene.util.QueryBuilder;
import org.elasticsearch.common.Booleans;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.mapper.AllFieldMapper;
Expand All @@ -58,9 +64,11 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Collections;

import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.common.lucene.search.Queries.fixNegativeQueryIfNeeded;

Expand All @@ -72,6 +80,7 @@
* as well as the query on the name.
*/
public class MapperQueryParser extends AnalyzingQueryParser {
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(MapperQueryParser.class));

public static final Map<String, FieldQueryExtension> FIELD_QUERY_EXTENSIONS;

Expand Down Expand Up @@ -828,6 +837,7 @@ public Query parse(String query) throws ParseException {
* Checks if graph analysis should be enabled for the field depending
* on the provided {@link Analyzer}
*/
@Override
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
String queryText, boolean quoted, int phraseSlop) {
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
Expand All @@ -849,4 +859,131 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
throw new RuntimeException("Error analyzing query text", e);
}
}

/**
* See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
*/
@Override
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
return analyzeGraphPhraseWithLimit(source, field, phraseSlop, this::createSpanQuery, shouldApplyGraphPhraseLimit());
}

/** A BiFuntion that can throw an IOException */
@FunctionalInterface
public interface CheckedBiFunction<T, U, R> {

/**
* Applies this function to the given arguments.
*
* @param t the first function argument
* @param u the second function argument
* @return the function result
*/
R apply(T t, U u) throws IOException;
}

/**
* Checks the value of the JVM option <code>es.query.write.apply_graph_phrase_limit</code> to determine
* if the analysis of graph phrase should be limited to {@link BooleanQuery#getMaxClauseCount()}.
* The JVM option can only be set to <code>true</code> (false is the default value), any other value
* will throw an {@link IllegalArgumentException}.
*/
public static boolean shouldApplyGraphPhraseLimit() {
String value = System.getProperty("es.query.apply_graph_phrase_limit");
if (value == null) {
return false;
} else if ("true".equals(value) == false) {
throw new IllegalArgumentException("[" + value + "] is not a valid value for the JVM option:" +
"[es.query.apply_graph_phrase_limit]. Set it to [true] to activate the limit.");
colings86 marked this conversation as resolved.
Show resolved Hide resolved
} else {
return true;
}
}

/**
* Overrides {@link QueryBuilder#analyzeGraphPhrase(TokenStream, String, int)} to add
* a limit (see {@link BooleanQuery#getMaxClauseCount()}) to the number of {@link SpanQuery}
* that this method can create.
*/
public static SpanQuery analyzeGraphPhraseWithLimit(TokenStream source, String field, int phraseSlop,
CheckedBiFunction<TokenStream, String, SpanQuery> spanQueryFunc,
boolean isHardLimit) throws IOException {
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
List<SpanQuery> clauses = new ArrayList<>();
int[] articulationPoints = graph.articulationPoints();
int lastState = 0;
int maxBooleanClause = BooleanQuery.getMaxClauseCount();
for (int i = 0; i <= articulationPoints.length; i++) {
int start = lastState;
int end = -1;
if (i < articulationPoints.length) {
end = articulationPoints[i];
}
lastState = end;
final SpanQuery queryPos;
if (graph.hasSidePath(start)) {
List<SpanQuery> queries = new ArrayList<>();
Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
while (it.hasNext()) {
TokenStream ts = it.next();
SpanQuery q = spanQueryFunc.apply(ts, field);
if (q != null) {
if (queries.size() >= maxBooleanClause) {
if (isHardLimit) {
throw new BooleanQuery.TooManyClauses();
} else {

}
}
queries.add(q);
}
}
if (queries.size() > 0) {
queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0]));
} else {
queryPos = null;
}
} else {
Term[] terms = graph.getTerms(field, start);
assert terms.length > 0;
if (terms.length >= maxBooleanClause) {
if (isHardLimit) {
throw new BooleanQuery.TooManyClauses();
} else {
DEPRECATION_LOGGER.deprecated("Phrase query on field:[" + field + "] reached the max boolean" +
" clause limit [" + maxBooleanClause + "] after expansion. This query will throw an error in" +
" the next major version.");
}
}
if (terms.length == 1) {
queryPos = new SpanTermQuery(terms[0]);
} else {
SpanTermQuery[] orClauses = new SpanTermQuery[terms.length];
for (int idx = 0; idx < terms.length; idx++) {
orClauses[idx] = new SpanTermQuery(terms[idx]);
}
queryPos = new SpanOrQuery(orClauses);
}
}
if (queryPos != null) {
if (clauses.size() >= maxBooleanClause) {
if (isHardLimit) {
throw new BooleanQuery.TooManyClauses();
} else {
DEPRECATION_LOGGER.deprecated("Phrase query on field:[" + field + "] reached the max boolean" +
" clause limit [" + maxBooleanClause + "] after expansion. This query will throw an error in" +
" the next major version.");
}
}
clauses.add(queryPos);
}
}
if (clauses.isEmpty()) {
return null;
} else if (clauses.size() == 1) {
return clauses.get(0);
} else {
return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand All @@ -41,6 +43,9 @@
import java.util.List;
import java.util.ArrayList;

import static org.apache.lucene.queryparser.classic.MapperQueryParser.analyzeGraphPhraseWithLimit;
import static org.apache.lucene.queryparser.classic.MapperQueryParser.shouldApplyGraphPhraseLimit;

/**
* Wrapper class for Lucene's SimpleQueryParser that allows us to redefine
* different types of queries.
Expand Down Expand Up @@ -173,6 +178,7 @@ public Query newPrefixQuery(String text) {
* Checks if graph analysis should be enabled for the field depending
* on the provided {@link Analyzer}
*/
@Override
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
String queryText, boolean quoted, int phraseSlop) {
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
Expand All @@ -195,6 +201,14 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
}
}

/**
* See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
*/
@Override
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
return analyzeGraphPhraseWithLimit(source, field, phraseSlop, this::createSpanQuery, shouldApplyGraphPhraseLimit());
}

private static Query wrapWithBoost(Query query, float boost) {
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
return new BoostQuery(query, boost);
Expand Down
12 changes: 12 additions & 0 deletions core/src/main/java/org/elasticsearch/index/search/MatchQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
Expand Down Expand Up @@ -58,6 +59,9 @@

import java.io.IOException;

import static org.apache.lucene.queryparser.classic.MapperQueryParser.analyzeGraphPhraseWithLimit;
import static org.apache.lucene.queryparser.classic.MapperQueryParser.shouldApplyGraphPhraseLimit;

public class MatchQuery {

public enum Type implements Writeable {
Expand Down Expand Up @@ -349,6 +353,14 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
}
}

/**
* See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
*/
@Override
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
return analyzeGraphPhraseWithLimit(source, field, phraseSlop, this::createSpanQuery, shouldApplyGraphPhraseLimit());
}

public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
return toMultiPhrasePrefix(query, phraseSlop, maxExpansions);
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/java/org/elasticsearch/search/SearchModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.search;

import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.elasticsearch.common.NamedRegistry;
import org.elasticsearch.common.geo.ShapesAvailability;
Expand Down Expand Up @@ -258,6 +259,7 @@

import static java.util.Collections.unmodifiableMap;
import static java.util.Objects.requireNonNull;
import static org.apache.lucene.queryparser.classic.MapperQueryParser.shouldApplyGraphPhraseLimit;

/**
* Sets up things that can be done at search time like queries, aggregations, and suggesters.
Expand All @@ -282,6 +284,8 @@ public class SearchModule {
public SearchModule(Settings settings, boolean transportClient, List<SearchPlugin> plugins) {
this.settings = settings;
this.transportClient = transportClient;
// checks if the system property es.query.apply_graph_phrase_limit is set to a valid value
shouldApplyGraphPhraseLimit();
registerSuggesters(plugins);
highlighters = setupHighlighters(settings, plugins);
registerScoreFunctions(plugins);
Expand Down
Loading