Skip to content

Commit

Permalink
Refactor fuzziness interface on query builders (opensearch-project#5433)
Browse files Browse the repository at this point in the history
* Refactor Object to Fuzziness type for all query builders

Signed-off-by: noCharger <[email protected]>

* Revise on bwc

Signed-off-by: noCharger <[email protected]>

* Update change log

Signed-off-by: noCharger <[email protected]>

Signed-off-by: noCharger <[email protected]>
Co-authored-by: Daniel (dB.) Doubrovkine <[email protected]>
(cherry picked from commit d3f6dfa)
  • Loading branch information
noCharger committed Dec 15, 2022
1 parent 4f26823 commit 1e6b12a
Show file tree
Hide file tree
Showing 13 changed files with 131 additions and 26 deletions.
43 changes: 43 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,49 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Pre conditions check before updating weighted routing metadata ([#4955](https://github.com/opensearch-project/OpenSearch/pull/4955))

### Deprecated
<<<<<<< HEAD
=======

### Removed
- Remove deprecated code to add node name into log pattern of log4j property file ([#4568](https://github.com/opensearch-project/OpenSearch/pull/4568))
- Unused object and import within TransportClusterAllocationExplainAction ([#4639](https://github.com/opensearch-project/OpenSearch/pull/4639))
- Remove LegacyESVersion.V_7_0_* and V_7_1_* Constants ([#2768](https://https://github.com/opensearch-project/OpenSearch/pull/2768))
- Remove LegacyESVersion.V_7_2_ and V_7_3_ Constants ([#4702](https://github.com/opensearch-project/OpenSearch/pull/4702))
- Always auto release the flood stage block ([#4703](https://github.com/opensearch-project/OpenSearch/pull/4703))
- Remove LegacyESVersion.V_7_4_ and V_7_5_ Constants ([#4704](https://github.com/opensearch-project/OpenSearch/pull/4704))
- Remove Legacy Version support from Snapshot/Restore Service ([#4728](https://github.com/opensearch-project/OpenSearch/pull/4728))
- Remove deprecated serialization logic from pipeline aggs ([#4847](https://github.com/opensearch-project/OpenSearch/pull/4847))
- Remove unused private methods ([#4926](https://github.com/opensearch-project/OpenSearch/pull/4926))
- Remove LegacyESVersion.V_7_8_ and V_7_9_ Constants ([#4855](https://github.com/opensearch-project/OpenSearch/pull/4855))
- Remove LegacyESVersion.V_7_6_ and V_7_7_ Constants ([#4837](https://github.com/opensearch-project/OpenSearch/pull/4837))
- Remove LegacyESVersion.V_7_10_ Constants ([#5018](https://github.com/opensearch-project/OpenSearch/pull/5018))
- Remove Version.V_1_ Constants ([#5021](https://github.com/opensearch-project/OpenSearch/pull/5021))

### Fixed
- Fix 'org.apache.hc.core5.http.ParseException: Invalid protocol version' under JDK 16+ ([#4827](https://github.com/opensearch-project/OpenSearch/pull/4827))
- Fixed compression support for h2c protocol ([#4944](https://github.com/opensearch-project/OpenSearch/pull/4944))
- Reject bulk requests with invalid actions ([#5299](https://github.com/opensearch-project/OpenSearch/issues/5299))
- Support OpenSSL Provider with default Netty allocator ([#5460](https://github.com/opensearch-project/OpenSearch/pull/5460))

### Security

## [Unreleased 2.x]
### Added
- Prevent deletion of snapshots that are backing searchable snapshot indexes ([#5069](https://github.com/opensearch-project/OpenSearch/pull/5069))
- Add max_shard_size parameter for shrink API ([#5229](https://github.com/opensearch-project/OpenSearch/pull/5229))

### Dependencies
- Bumps `bcpg-fips` from 1.0.5.1 to 1.0.7.1
- Bumps `azure-storage-blob` from 12.16.1 to 12.20.0 ([#4995](https://github.com/opensearch-project/OpenSearch/pull/4995))
- Bumps `commons-compress` from 1.21 to 1.22 ([#5104](https://github.com/opensearch-project/OpenSearch/pull/5104))
- Bump `opencensus-contrib-http-util` from 0.18.0 to 0.31.1 ([#3633](https://github.com/opensearch-project/OpenSearch/pull/3633))
- Bump `geoip2` from 3.0.1 to 3.0.2 ([#5103](https://github.com/opensearch-project/OpenSearch/pull/5103))
- Bump gradle-extra-configurations-plugin from 7.0.0 to 8.0.0 ([#4808](https://github.com/opensearch-project/OpenSearch/pull/4808))
### Changed
### Deprecated
- Refactor fuzziness interface on query builders ([#5433](https://github.com/opensearch-project/OpenSearch/pull/5433))

>>>>>>> d3f6dfab897 (Refactor fuzziness interface on query builders (#5433))
### Removed
### Fixed
- Fix 1.x compatibility bug with stored Tasks ([#5412](https://github.com/opensearch-project/OpenSearch/pull/5412))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.opensearch.action.index.IndexRequestBuilder;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.common.util.set.Sets;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentFactory;
Expand Down Expand Up @@ -1024,7 +1025,7 @@ public void testFuzzyFieldLevelBoosting() throws InterruptedException, Execution

SearchResponse searchResponse = client().prepareSearch(idx)
.setExplain(true)
.setQuery(multiMatchQuery("foo").field("title", 100).field("body").fuzziness(0))
.setQuery(multiMatchQuery("foo").field("title", 100).field("body").fuzziness(Fuzziness.ZERO))
.get();
SearchHit[] hits = searchResponse.getHits().getHits();
assertNotEquals("both documents should be on different shards", hits[0].getShard().getShardId(), hits[1].getShard().getShardId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.common.regex.Regex;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.common.xcontent.XContentType;
Expand Down Expand Up @@ -762,21 +763,21 @@ public void testMatchQueryFuzzy() throws Exception {
client().prepareIndex("test").setId("2").setSource("text", "Unity")
);

SearchResponse searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness("0")).get();
SearchResponse searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness(Fuzziness.ZERO)).get();
assertHitCount(searchResponse, 0L);

searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness("1")).get();
searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness(Fuzziness.ONE)).get();
assertHitCount(searchResponse, 2L);
assertSearchHits(searchResponse, "1", "2");

searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness("AUTO")).get();
searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness(Fuzziness.AUTO)).get();
assertHitCount(searchResponse, 2L);
assertSearchHits(searchResponse, "1", "2");

searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness("AUTO:5,7")).get();
searchResponse = client().prepareSearch().setQuery(matchQuery("text", "uniy").fuzziness(Fuzziness.customAuto(5, 7))).get();
assertHitCount(searchResponse, 0L);

searchResponse = client().prepareSearch().setQuery(matchQuery("text", "unify").fuzziness("AUTO:5,7")).get();
searchResponse = client().prepareSearch().setQuery(matchQuery("text", "unify").fuzziness(Fuzziness.customAuto(5, 7))).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "2");
}
Expand Down
10 changes: 10 additions & 0 deletions server/src/main/java/org/opensearch/common/unit/Fuzziness.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,16 @@ public static Fuzziness build(Object fuzziness) {
return new Fuzziness(string);
}

/***
* Creates a {@link Fuzziness} instance from lowDistance and highDistance.
* where the edit distance is 0 for strings shorter than lowDistance,
* 1 for strings where its length between lowDistance and highDistance (inclusive),
* and 2 for strings longer than highDistance.
*/
public static Fuzziness customAuto(int lowDistance, int highDistance) {
return new Fuzziness("AUTO", lowDistance, highDistance);
}

private static Fuzziness parseCustomAuto(final String string) {
assert string.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":");
String[] fuzzinessLimit = string.substring(AUTO.asString().length() + 1).split(",");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,19 @@ public String minimumShouldMatch() {
return this.minimumShouldMatch;
}

@Deprecated
/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
public MatchBoolPrefixQueryBuilder fuzziness(Object fuzziness) {
this.fuzziness = Fuzziness.build(fuzziness);
return this;
}

/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
public MatchBoolPrefixQueryBuilder fuzziness(Fuzziness fuzziness) {
this.fuzziness = fuzziness;
return this;
}

/** Gets the fuzziness used when evaluated to a fuzzy query type. */
public Fuzziness fuzziness() {
return this.fuzziness;
Expand Down Expand Up @@ -348,19 +355,16 @@ public static MatchBoolPrefixQueryBuilder fromXContent(XContentParser parser) th
}
}

MatchBoolPrefixQueryBuilder queryBuilder = new MatchBoolPrefixQueryBuilder(fieldName, value);
queryBuilder.analyzer(analyzer);
queryBuilder.operator(operator);
queryBuilder.minimumShouldMatch(minimumShouldMatch);
queryBuilder.boost(boost);
queryBuilder.queryName(queryName);
if (fuzziness != null) {
queryBuilder.fuzziness(fuzziness);
}
queryBuilder.prefixLength(prefixLength);
queryBuilder.maxExpansions(maxExpansion);
queryBuilder.fuzzyTranspositions(fuzzyTranspositions);
queryBuilder.fuzzyRewrite(fuzzyRewrite);
MatchBoolPrefixQueryBuilder queryBuilder = new MatchBoolPrefixQueryBuilder(fieldName, value).analyzer(analyzer)
.operator(operator)
.minimumShouldMatch(minimumShouldMatch)
.boost(boost)
.queryName(queryName)
.fuzziness(fuzziness)
.prefixLength(prefixLength)
.maxExpansions(maxExpansion)
.fuzzyTranspositions(fuzzyTranspositions)
.fuzzyRewrite(fuzzyRewrite);
return queryBuilder;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,19 @@ public String analyzer() {
return this.analyzer;
}

@Deprecated
/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
public MatchQueryBuilder fuzziness(Object fuzziness) {
this.fuzziness = Fuzziness.build(fuzziness);
return this;
}

/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
public MatchQueryBuilder fuzziness(Fuzziness fuzziness) {
this.fuzziness = fuzziness;
return this;
}

/** Gets the fuzziness used when evaluated to a fuzzy query type. */
public Fuzziness fuzziness() {
return this.fuzziness;
Expand Down Expand Up @@ -565,9 +572,7 @@ public static MatchQueryBuilder fromXContent(XContentParser parser) throws IOExc
matchQuery.operator(operator);
matchQuery.analyzer(analyzer);
matchQuery.minimumShouldMatch(minimumShouldMatch);
if (fuzziness != null) {
matchQuery.fuzziness(fuzziness);
}
matchQuery.fuzziness(fuzziness);
matchQuery.fuzzyRewrite(fuzzyRewrite);
matchQuery.prefixLength(prefixLength);
matchQuery.fuzzyTranspositions(fuzzyTranspositions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ public int slop() {
return slop;
}

@Deprecated
/**
* Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO".
*/
Expand All @@ -414,6 +415,14 @@ public MultiMatchQueryBuilder fuzziness(Object fuzziness) {
return this;
}

/**
* Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO".
*/
public MultiMatchQueryBuilder fuzziness(Fuzziness fuzziness) {
this.fuzziness = fuzziness;
return this;
}

public Fuzziness fuzziness() {
return fuzziness;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
public static final int DEFAULT_FUZZY_PREFIX_LENGTH = FuzzyQuery.defaultPrefixLength;
public static final int DEFAULT_FUZZY_MAX_EXPANSIONS = FuzzyQuery.defaultMaxExpansions;
public static final int DEFAULT_PHRASE_SLOP = 0;
/** Default maximum edit distance. Defaults to AUTO. */
public static final Fuzziness DEFAULT_FUZZINESS = Fuzziness.AUTO;
public static final Operator DEFAULT_OPERATOR = Operator.OR;
public static final MultiMatchQueryBuilder.Type DEFAULT_TYPE = MultiMatchQueryBuilder.Type.BEST_FIELDS;
Expand Down Expand Up @@ -416,7 +417,7 @@ public boolean enablePositionIncrements() {
* Set the edit distance for fuzzy queries. Default is "AUTO".
*/
public QueryStringQueryBuilder fuzziness(Fuzziness fuzziness) {
this.fuzziness = fuzziness == null ? DEFAULT_FUZZINESS : fuzziness;
this.fuzziness = fuzziness;
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@ public void testSerializationCustomAuto() throws IOException {
assertNotSame(original, deserializedFuzziness);
assertEquals(original, deserializedFuzziness);
assertEquals(original.asString(), deserializedFuzziness.asString());

original = Fuzziness.customAuto(4, 7);
deserializedFuzziness = doSerializeRoundtrip(original);
assertNotSame(original, deserializedFuzziness);
assertEquals(original, deserializedFuzziness);
assertEquals(original.asString(), deserializedFuzziness.asString());
}

private static Fuzziness doSerializeRoundtrip(Fuzziness in) throws IOException {
Expand Down Expand Up @@ -205,5 +211,11 @@ public void testAsDistanceString() {
assertEquals(1, fuzziness.asDistance("abcdef"));
assertEquals(2, fuzziness.asDistance("abcdefg"));

fuzziness = Fuzziness.customAuto(5, 7);
assertEquals(0, fuzziness.asDistance(""));
assertEquals(0, fuzziness.asDistance("abcd"));
assertEquals(1, fuzziness.asDistance("abcde"));
assertEquals(1, fuzziness.asDistance("abcdef"));
assertEquals(2, fuzziness.asDistance("abcdefg"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ public void testIllegalValues() {
}
}

public void testDefaultFuzziness() {
MatchBoolPrefixQueryBuilder matchBoolPrefixQueryBuilder = new MatchBoolPrefixQueryBuilder(TEXT_FIELD_NAME, "text").fuzziness(null);
assertNull(matchBoolPrefixQueryBuilder.fuzziness());
}

public void testFromSimpleJson() throws IOException {
final String simple = "{" + "\"match_bool_prefix\": {" + "\"fieldName\": \"fieldValue\"" + "}" + "}";
final String expected = "{"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,11 @@ public void testFuzzinessOnNonStringField() throws Exception {
query.toQuery(context); // no exception
}

public void testDefaultFuzziness() {
MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", TEXT_FIELD_NAME).fuzziness(null);
assertNull(matchQueryBuilder.fuzziness());
}

public void testExactOnUnsupportedField() throws Exception {
MatchQueryBuilder query = new MatchQueryBuilder(GEO_POINT_FIELD_NAME, "2,3");
QueryShardContext context = createShardContext();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,11 @@ public void testFuzzinessNotAllowedTypes() throws IOException {
}
}

public void testDefaultFuzziness() {
MultiMatchQueryBuilder multiMatchQueryBuilder = new MultiMatchQueryBuilder("text", TEXT_FIELD_NAME).fuzziness(null);
assertNull(multiMatchQueryBuilder.fuzziness());
}

public void testQueryParameterArrayException() {
String json = "{\n"
+ " \"multi_match\" : {\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ public void testToQueryWildcardWithIndexedPrefixes() throws Exception {
assertThat(query, equalTo(expectedQuery));
}

public void testToQueryWilcardQueryWithSynonyms() throws Exception {
public void testToQueryWildcardQueryWithSynonyms() throws Exception {
for (Operator op : Operator.values()) {
BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();
QueryStringQueryParser queryParser = new QueryStringQueryParser(createShardContext(), TEXT_FIELD_NAME);
Expand Down Expand Up @@ -803,7 +803,7 @@ public void testToQueryRegExpQueryMaxDeterminizedStatesParsing() throws Exceptio
assertThat(e.getMessage(), containsString("would require more than 10 effort"));
}

public void testToQueryFuzzyQueryAutoFuziness() throws Exception {
public void testToQueryFuzzyQueryAutoFuzziness() throws Exception {
for (int i = 0; i < 3; i++) {
final int len;
final int expectedEdits;
Expand All @@ -828,7 +828,6 @@ public void testToQueryFuzzyQueryAutoFuziness() throws Exception {
String queryString = new String(bytes);
for (int j = 0; j < 2; j++) {
Query query = queryStringQuery(queryString + (j == 0 ? "~" : "~auto")).defaultField(TEXT_FIELD_NAME)
.fuzziness(Fuzziness.AUTO)
.toQuery(createShardContext());
assertThat(query, instanceOf(FuzzyQuery.class));
FuzzyQuery fuzzyQuery = (FuzzyQuery) query;
Expand Down Expand Up @@ -868,6 +867,11 @@ public void testFuzzyNumeric() throws Exception {
query.toQuery(context); // no exception
}

public void testDefaultFuzziness() {
QueryStringQueryBuilder queryStringQueryBuilder = new QueryStringQueryBuilder(TEXT_FIELD_NAME).fuzziness(null);
assertNull(queryStringQueryBuilder.fuzziness());
}

public void testPrefixNumeric() throws Exception {
QueryStringQueryBuilder query = queryStringQuery("12*").defaultField(INT_FIELD_NAME);
QueryShardContext context = createShardContext();
Expand Down

0 comments on commit 1e6b12a

Please sign in to comment.