From 324b08754fe91f1e86ec482d97e1a5d73987dd7f Mon Sep 17 00:00:00 2001 From: kkewwei Date: Sat, 6 Jul 2024 16:50:13 +0800 Subject: [PATCH 1/6] support rangeQuery and regexpQuery in constant_keyword field type Signed-off-by: kkewwei --- .../index/mapper/ConstantFieldType.java | 2 +- .../mapper/ConstantKeywordFieldMapper.java | 66 +++++++++++++++++++ .../mapper/ConstantKeywordFieldTypeTests.java | 54 +++++++++++++++ 3 files changed, 121 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java b/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java index a28a6369b1aa4..8d4920c554a9c 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java @@ -76,7 +76,7 @@ public final boolean isAggregatable() { */ protected abstract boolean matches(String pattern, boolean caseInsensitive, QueryShardContext context); - private static String valueToString(Object value) { + protected static String valueToString(Object value) { return value instanceof BytesRef ? ((BytesRef) value).utf8ToString() : value.toString(); } diff --git a/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java index 2edd817f61f61..02c2214c18e72 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java @@ -9,10 +9,21 @@ package org.opensearch.index.mapper; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.ByteRunAutomaton; +import org.apache.lucene.util.automaton.RegExp; import org.opensearch.OpenSearchParseException; +import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.geo.ShapeRelation; +import org.opensearch.common.lucene.BytesRefs; import org.opensearch.common.regex.Regex; +import org.opensearch.common.time.DateMathParser; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.plain.ConstantIndexFieldData; import org.opensearch.index.query.QueryShardContext; @@ -20,6 +31,7 @@ import org.opensearch.search.lookup.SearchLookup; import java.io.IOException; +import java.time.ZoneId; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -122,6 +134,60 @@ public Query existsQuery(QueryShardContext context) { return new MatchAllDocsQuery(); } + @Override + public Query rangeQuery( + Object lowerTerm, + Object upperTerm, + boolean includeLower, + boolean includeUpper, + ShapeRelation relation, + ZoneId timeZone, + DateMathParser parser, + QueryShardContext context + ) { + if (lowerTerm != null) { + lowerTerm = valueToString(lowerTerm); + } + if (upperTerm != null) { + upperTerm = valueToString(upperTerm); + } + + if (lowerTerm != null && upperTerm != null && ((String) lowerTerm).compareTo((String) upperTerm) > 0) { + return new MatchNoDocsQuery(); + } + + if (lowerTerm != null && ((String) lowerTerm).compareTo(value) > (includeLower ? 0 : -1)) { + return new MatchNoDocsQuery(); + } + + if (upperTerm != null && ((String) upperTerm).compareTo(value) < (includeUpper ? 0 : 1)) { + return new MatchNoDocsQuery(); + } + return new MatchAllDocsQuery(); + } + + @Override + public Query regexpQuery( + String value, + int syntaxFlags, + int matchFlags, + int maxDeterminizedStates, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + Automaton automaton = new RegExp(value, syntaxFlags, matchFlags).toAutomaton( + RegexpQuery.DEFAULT_PROVIDER, + maxDeterminizedStates + ); + ByteRunAutomaton byteRunAutomaton = new ByteRunAutomaton(automaton); + BytesRef valueBytes = BytesRefs.toBytesRef(this.value); + if (byteRunAutomaton.run(valueBytes.bytes, valueBytes.offset, valueBytes.length)) { + return new MatchAllDocsQuery(); + } else { + return new MatchNoDocsQuery(); + } + } + @Override public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { return new ConstantIndexFieldData.Builder(fullyQualifiedIndexName, name(), CoreValuesSourceType.BYTES); diff --git a/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java index 235811539a299..266d79fb8e8b8 100644 --- a/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java @@ -10,6 +10,8 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.regex.Regex; @@ -61,6 +63,58 @@ public void testExistsQuery() { assertEquals(new MatchAllDocsQuery(), ft.existsQuery(createContext())); } + public void testRangeQuery() { + Query actual = ft.rangeQuery("default", null, true, false, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + + actual = ft.rangeQuery("default", null, false, false, null, null, null, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), actual); + + actual = ft.rangeQuery(null, "default", true, true, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + + actual = ft.rangeQuery(null, "default", false, false, null, null, null, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), actual); + + actual = ft.rangeQuery("default", "default", false, true, null, null, null, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), actual); + + actual = ft.rangeQuery("default", "default", true, false, null, null, null, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), actual); + + actual = ft.rangeQuery(null, null, false, false, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + + actual = ft.rangeQuery("default", "default", true, true, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + + actual = ft.rangeQuery("defaul", "default1", true, true, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + } + + public void testRegexpQuery() { + final ConstantKeywordFieldMapper.ConstantKeywordFieldType ft = new ConstantKeywordFieldMapper.ConstantKeywordFieldType( + "field", + "d3efault" + ); + // test .* + Query query = ft.regexpQuery("d.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), query); + // test \d and ? + query = ft.regexpQuery("d\\defau[a-z]?t", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), query); + + // test \d and ? + query = ft.regexpQuery("d\\defa[a-z]?t", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), query); + // \w{m,n} + query = ft.regexpQuery("d3efa[a-z]{3,3}", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), query); + // \w{m,n} + query = ft.regexpQuery("d3efa[a-z]{4,4}", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), query); + } + private QueryShardContext createContext() { IndexMetadata indexMetadata = IndexMetadata.builder("index") .settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)) From 795ed61ef751b4fac3fcf72ad2cc4062ed699efa Mon Sep 17 00:00:00 2001 From: kkewwei Date: Thu, 1 Aug 2024 21:37:45 +0800 Subject: [PATCH 2/6] add change log Signed-off-by: kkewwei --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1846bd5e7cfd..b64fd7c5e663c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add basic aggregation support for derived fields ([#14618](https://github.com/opensearch-project/OpenSearch/pull/14618)) - Add ThreadContextPermission for markAsSystemContext and allow core to perform the method ([#15016](https://github.com/opensearch-project/OpenSearch/pull/15016)) - Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) +- Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) ### Dependencies - Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) From 8d0742ffda30dbc22b4d2cbfe9d142e708025fa1 Mon Sep 17 00:00:00 2001 From: kkewwei Date: Fri, 2 Aug 2024 00:23:27 +0800 Subject: [PATCH 3/6] add rest api spec tests Signed-off-by: kkewwei add rest api spec tests Signed-off-by: kkewwei --- .../test/index/110_constant_keyword.yml | 270 +++++++++++++++++- .../index/mapper/ConstantFieldType.java | 2 +- 2 files changed, 261 insertions(+), 11 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml index f4f8b3752bec8..0cd0c2917f699 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml @@ -1,17 +1,18 @@ --- # The test setup includes: -# - Create index with constant_keyword field type +# - Create two indices with constant_keyword field type +# - Index documents # - Check mapping -# - Index two example documents -# - Search -# - Delete Index when connection is teardown +# - Verify document count +# - Search rangeQuery and regexpQuery +# - Delete indices when connection is teardown "Mappings and Supported queries": - skip: - version: " - 2.15.99" - reason: "fixed in 2.16.0" + version: " - 2.16.99" + reason: "fixed in 2.17.0" - # Create index with constant_keyword field type + # Create indices with constant_keyword field type - do: indices.create: index: test @@ -22,7 +23,17 @@ type: "constant_keyword" value: "1" - # Index document + - do: + indices.create: + index: test1 + body: + mappings: + properties: + genre: + type: "constant_keyword" + value: "d3efault" + + # Index documents to test integer and string are both ok. - do: index: index: test @@ -39,9 +50,18 @@ "genre": 1 } + # Index documents to test query. + - do: + index: + index: test1 + id: 1 + body: { + "genre": "d3efault" + } + - do: indices.refresh: - index: test + index: [test, test1] # Check mapping - do: @@ -54,6 +74,7 @@ # Verify Document Count - do: search: + index: test body: { query: { match_all: {} @@ -64,7 +85,236 @@ - match: { hits.hits.0._source.genre: "1" } - match: { hits.hits.1._source.genre: 1 } + - do: + search: + index: test1 + body: { + query: { + match_all: {} + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.genre: "d3efault" } + + # Test rangeQuery + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + gte: "d3efault" + } + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efault", + "include_lower": "false" + } + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + lte: "d3efault" + } + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + to: "d3efault", + include_upper: "false" + } + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efault", + to: "d3efault", + include_lower: "false", + include_upper: "true" + } + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efault", + to: "d3efault", + include_lower: "true", + include_upper: "false" + } + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: null, + to: null + } + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efault", + to: "d3efault", + include_lower: "true", + include_upper: "true" + } + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efaul", + to: "d3efault1", + include_lower: "true", + include_upper: "true" + } + } + } + } + + - length: { hits.hits: 1 } + + # Test regexpQuery + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d.*" + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d\\defau[a-z]?t" + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d\\defa[a-z]?t" + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d3efa[a-z]{3,3}" + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d3efa[a-z]{4,4}" + } + } + } + + - length: { hits.hits: 0 } + # Delete Index when connection is teardown - do: indices.delete: - index: test + index: test,test1 diff --git a/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java b/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java index 8d4920c554a9c..cc581651e5295 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java @@ -76,7 +76,7 @@ public final boolean isAggregatable() { */ protected abstract boolean matches(String pattern, boolean caseInsensitive, QueryShardContext context); - protected static String valueToString(Object value) { + static String valueToString(Object value) { return value instanceof BytesRef ? ((BytesRef) value).utf8ToString() : value.toString(); } From 9e6e85a0a44af474d9b4e00f547af73d14402a37 Mon Sep 17 00:00:00 2001 From: kkewwei Date: Fri, 2 Aug 2024 02:44:02 +0800 Subject: [PATCH 4/6] fix rest api spec tests Signed-off-by: kkewwei --- .../test/index/110_constant_keyword.yml | 35 ++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml index 0cd0c2917f699..38d447b6427af 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml @@ -6,11 +6,10 @@ # - Verify document count # - Search rangeQuery and regexpQuery # - Delete indices when connection is teardown - -"Mappings and Supported queries": +setup: - skip: - version: " - 2.16.99" - reason: "fixed in 2.17.0" + version: " - 2.15.99" + reason: "fixed in 2.16.0" # Create indices with constant_keyword field type - do: @@ -63,6 +62,23 @@ indices.refresh: index: [test, test1] +--- +# Delete Index when connection is teardown +teardown: + - skip: + version: " - 2.15.99" + reason: "fixed in 2.16.0" + + - do: + indices.delete: + index: test,test1 + +--- +"Mappings": + - skip: + version: " - 2.15.99" + reason: "fixed in 2.16.0" + # Check mapping - do: indices.get_mapping: @@ -97,6 +113,12 @@ - length: { hits.hits: 1 } - match: { hits.hits.0._source.genre: "d3efault" } +--- +"Queries": + - skip: + version: " - 2.16.99" + reason: "fixed in 2.17.0" + # Test rangeQuery - do: search: @@ -313,8 +335,3 @@ } - length: { hits.hits: 0 } - - # Delete Index when connection is teardown - - do: - indices.delete: - index: test,test1 From 2e326719e4bf173ebaef4c55810e3ca8a1eba6be Mon Sep 17 00:00:00 2001 From: kkewwei Date: Fri, 2 Aug 2024 16:25:14 +0800 Subject: [PATCH 5/6] fix rest api spec tests Signed-off-by: kkewwei --- .../test/index/110_constant_keyword.yml | 105 +++++++++--------- 1 file changed, 50 insertions(+), 55 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml index 38d447b6427af..b5a87862f562c 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml @@ -1,12 +1,8 @@ +# The test setup includes two parts: +# part1: test mapping and indexing +# part2: test query --- -# The test setup includes: -# - Create two indices with constant_keyword field type -# - Index documents -# - Check mapping -# - Verify document count -# - Search rangeQuery and regexpQuery -# - Delete indices when connection is teardown -setup: +"Mappings and Indexing": - skip: version: " - 2.15.99" reason: "fixed in 2.16.0" @@ -22,16 +18,6 @@ setup: type: "constant_keyword" value: "1" - - do: - indices.create: - index: test1 - body: - mappings: - properties: - genre: - type: "constant_keyword" - value: "d3efault" - # Index documents to test integer and string are both ok. - do: index: @@ -49,35 +35,10 @@ setup: "genre": 1 } - # Index documents to test query. - - do: - index: - index: test1 - id: 1 - body: { - "genre": "d3efault" - } - + # Refresh - do: indices.refresh: - index: [test, test1] - ---- -# Delete Index when connection is teardown -teardown: - - skip: - version: " - 2.15.99" - reason: "fixed in 2.16.0" - - - do: - indices.delete: - index: test,test1 - ---- -"Mappings": - - skip: - version: " - 2.15.99" - reason: "fixed in 2.16.0" + index: test # Check mapping - do: @@ -101,17 +62,10 @@ teardown: - match: { hits.hits.0._source.genre: "1" } - match: { hits.hits.1._source.genre: 1 } + # Delete Index when connection is teardown - do: - search: - index: test1 - body: { - query: { - match_all: {} - } - } - - - length: { hits.hits: 1 } - - match: { hits.hits.0._source.genre: "d3efault" } + indices.delete: + index: test --- "Queries": @@ -119,6 +73,30 @@ teardown: version: " - 2.16.99" reason: "fixed in 2.17.0" + - do: + indices.create: + index: test1 + body: + mappings: + properties: + genre: + type: "constant_keyword" + value: "d3efault" + + # Index documents to test query. + - do: + index: + index: test1 + id: 1 + body: { + "genre": "d3efault" + } + + # Refresh + - do: + indices.refresh: + index: test1 + # Test rangeQuery - do: search: @@ -335,3 +313,20 @@ teardown: } - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + match_all: {} + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.genre: "d3efault" } + + # Delete Index when connection is teardown + - do: + indices.delete: + index: test1 From 0ebfcb3805f63d28fa987312f6fe6be4adec054c Mon Sep 17 00:00:00 2001 From: kkewwei Date: Fri, 2 Aug 2024 16:28:16 +0800 Subject: [PATCH 6/6] fix rest api spec tests Signed-off-by: kkewwei --- .../rest-api-spec/test/index/110_constant_keyword.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml index b5a87862f562c..1c50187534026 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml @@ -70,8 +70,8 @@ --- "Queries": - skip: - version: " - 2.16.99" - reason: "fixed in 2.17.0" + version: " - 2.99.99" + reason: "rangeQuery and regexpQuery are supported in 3.0.0 in main branch" - do: indices.create: