Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FVH BaseFragmentsBuilder does not properly support colored pre/post tags #13934

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -405,12 +405,10 @@ protected String getPostTag(int num) {
}

protected String getPreTag(String[] preTags, int num) {
int n = num % preTags.length;
return preTags[n];
return preTags[num];
}

protected String getPostTag(String[] postTags, int num) {
int n = num % postTags.length;
return postTags[n];
return postTags[num];
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,26 +57,33 @@ public class FieldQuery {
// fieldMatch==false, Map<null,setOfTermsInQueries>
Map<String, Set<String>> termSetMap = new HashMap<>();

int termOrPhraseNumber; // used for colored tag support
// index of the original query term or phrase in the list of expanded terms or phrases
final Map<String, Integer> queryIndexHighlights = new HashMap<>();
int previousIndex = 0;

// The maximum number of different matching terms accumulated from any one MultiTermQuery
private static final int MAX_MTQ_TERMS = 1024;

public FieldQuery(Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch)
throws IOException {
this.fieldMatch = fieldMatch;
Set<Query> flatQueries = new LinkedHashSet<>();
IndexSearcher searcher;
if (reader == null) {
searcher = null;
} else {
searcher = new IndexSearcher(reader);
}
final Set<Query> flatQueries = new LinkedHashSet<>();
final IndexSearcher searcher = reader == null ? null : new IndexSearcher(reader);

buildQueryIndexHighlights(query);
flatten(query, searcher, flatQueries, 1f);
saveTerms(flatQueries, searcher);
Collection<Query> expandQueries = expand(flatQueries);

for (Query flatQuery : expandQueries) {
int queryIndex;
if (this.queryIndexHighlights.containsKey(flatQuery.toString())) {
queryIndex = this.queryIndexHighlights.get(flatQuery.toString());
previousIndex = queryIndex;
} else {
queryIndex = previousIndex;
}

QueryPhraseMap rootMap = getRootMap(flatQuery);
rootMap.add(flatQuery, reader);
float boost = 1f;
Expand All @@ -88,12 +95,21 @@ public FieldQuery(Query query, IndexReader reader, boolean phraseHighlight, bool
if (!phraseHighlight && flatQuery instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery) flatQuery;
if (pq.getTerms().length > 1) {
for (Term term : pq.getTerms()) rootMap.addTerm(term, boost);
for (Term term : pq.getTerms()) rootMap.addTerm(term, boost, queryIndex);
}
}
}
}

private void buildQueryIndexHighlights(Query query) {
if (query instanceof BooleanQuery booleanQuery) {
final List<BooleanClause> clauses = booleanQuery.clauses();
for (int i = 0; i < clauses.size(); i++) {
queryIndexHighlights.put(clauses.get(i).query().toString(), i);
}
}
}

/**
* For backwards compatibility you can initialize FieldQuery without an IndexReader, which is only
* required to support MultiTermQuery
Expand Down Expand Up @@ -372,10 +388,6 @@ private QueryPhraseMap getRootMap(String fieldName) {
return rootMaps.get(fieldMatch ? fieldName : null);
}

int nextTermOrPhraseNumber() {
return termOrPhraseNumber++;
}

/** Internal structure of a query for highlighting: represents a nested query structure */
public static class QueryPhraseMap {

Expand All @@ -390,9 +402,9 @@ public QueryPhraseMap(FieldQuery fieldQuery) {
this.fieldQuery = fieldQuery;
}

void addTerm(Term term, float boost) {
void addTerm(Term term, float boost, int queryIndex) {
QueryPhraseMap map = getOrNewMap(subMap, term.text());
map.markTerminal(boost);
map.markTerminal(boost, queryIndex);
}

private QueryPhraseMap getOrNewMap(Map<String, QueryPhraseMap> subMap, String term) {
Expand All @@ -405,14 +417,20 @@ private QueryPhraseMap getOrNewMap(Map<String, QueryPhraseMap> subMap, String te
}

void add(Query query, IndexReader reader) {
int highlightsLength = fieldQuery.queryIndexHighlights.size();
int queryIndex = Math.min(fieldQuery.previousIndex + 1, highlightsLength - 1);
if (fieldQuery.queryIndexHighlights.containsKey(query.toString())) {
queryIndex = fieldQuery.queryIndexHighlights.get(query.toString());
}

float boost = 1f;
while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
query = bq.getQuery();
boost = bq.getBoost();
}
if (query instanceof TermQuery) {
addTerm(((TermQuery) query).getTerm(), boost);
addTerm(((TermQuery) query).getTerm(), boost, queryIndex);
} else if (query instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery) query;
Term[] terms = pq.getTerms();
Expand All @@ -422,7 +440,7 @@ void add(Query query, IndexReader reader) {
qpm = getOrNewMap(map, term.text());
map = qpm.subMap;
}
qpm.markTerminal(pq.getSlop(), boost);
qpm.markTerminal(pq.getSlop(), boost, queryIndex);
} else
throw new RuntimeException("query \"" + query.toString() + "\" must be flatten first.");
}
Expand All @@ -431,15 +449,15 @@ public QueryPhraseMap getTermMap(String term) {
return subMap.get(term);
}

private void markTerminal(float boost) {
markTerminal(0, boost);
private void markTerminal(float boost, int queryIndex) {
markTerminal(0, boost, queryIndex);
}

private void markTerminal(int slop, float boost) {
private void markTerminal(int slop, float boost, int queryIndex) {
this.terminal = true;
this.slop = slop;
this.boost = boost;
this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
this.termOrPhraseNumber = queryIndex;
}

public boolean isTerminal() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
Expand All @@ -36,6 +38,7 @@
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.util.BytesRef;

public class TestFieldQuery extends AbstractTestCase {
Expand Down Expand Up @@ -953,4 +956,20 @@ public void testFlattenConstantScoreQuery() throws Exception {
fq.flatten(query, searcher, flatQueries, 1f);
assertCollectionQueries(flatQueries, tq(boost, "A"));
}

public void testTermOrPhraseNumberShouldBeSameAsOriginalQuerySize() throws IOException {
// Arrange
final String field = "field";
final QueryParser queryParser = new QueryParser(field, new MockAnalyzer(random()));
final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
queryBuilder.add(queryParser.createBooleanQuery(field, "A B"), Occur.SHOULD);
queryBuilder.add(queryParser.createPhraseQuery(field, "C B", 0), Occur.SHOULD);
queryBuilder.add(queryParser.createPhraseQuery(field, "C B", 2), Occur.SHOULD);

// Act
final FieldQuery fieldQuery = new FieldQuery(queryBuilder.build(), true, true);

// Assert
assertEquals(2, fieldQuery.previousIndex);
}
}
Loading