From dd5aad4bf507ae89b6104689a2aae5da24bbebb1 Mon Sep 17 00:00:00 2001 From: Varun Jain Date: Wed, 4 Sep 2024 12:57:46 -0700 Subject: [PATCH] Removing code to cut search results of hybrid search in the priority queue (#867) * Removing code to cut results in the priority queue Signed-off-by: Varun Jain * throw exception when from is not equal to 0 Signed-off-by: Varun Jain * Adding TODO check Signed-off-by: Varun Jain * Move pagination validation of from condition to query phase searcher Signed-off-by: Varun Jain * Adding integ test Signed-off-by: Varun Jain --------- Signed-off-by: Varun Jain (cherry picked from commit b8e2b3558d95d62d8628471375a869d803ece05e) --- .../collector/HybridTopScoreDocCollector.java | 5 --- .../query/HybridQueryPhaseSearcher.java | 4 ++ .../query/HybridQueryAggregationsIT.java | 24 +++++++---- .../neuralsearch/query/HybridQueryIT.java | 40 +++++++++++++++++++ .../query/HybridQueryPostFilterIT.java | 21 ++++++---- .../neuralsearch/query/HybridQuerySortIT.java | 27 ++++++++----- .../MetricAggregationsWithHybridQueryIT.java | 3 +- .../neuralsearch/BaseNeuralSearchIT.java | 7 ++-- 8 files changed, 98 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java b/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java index 01a4cdfff..4e72b55bf 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java +++ b/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java @@ -172,11 +172,6 @@ private TopDocs topDocsPerQuery(int start, int howMany, PriorityQueue int size = howMany - start; ScoreDoc[] results = new ScoreDoc[size]; - // pq's pop() returns the 'least' element in the queue, therefore need - // to discard the first ones, until we reach the requested range. - for (int i = pq.size() - start - size; i > 0; i--) { - pq.pop(); - } // Get the requested results from pq. populateResults(results, size, pq); diff --git a/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java b/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java index 53248f88c..8c7390406 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java +++ b/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java @@ -60,6 +60,10 @@ public boolean searchWith( validateQuery(searchContext, query); return super.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout); } else { + // TODO remove this check after following issue https://github.com/opensearch-project/neural-search/issues/280 gets resolved. + if (searchContext.from() != 0) { + throw new IllegalArgumentException("In the current OpenSearch version pagination is not supported with hybrid query"); + } Query hybridQuery = extractHybridQuery(searchContext, query); QueryPhaseSearcher queryPhaseSearcher = getQueryPhaseSearcher(searchContext); return queryPhaseSearcher.searchWith(searchContext, searcher, hybridQuery, collectors, hasFilterCollector, hasTimeout); diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java index 9df106156..3bb566aef 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java @@ -215,7 +215,8 @@ private void testPostFilterWithSimpleHybridQuery(boolean isSingleShard, boolean rangeFilterQuery, null, false, - null + null, + 0 ); assertHitResultsFromQuery(1, searchResponseAsMap); @@ -230,7 +231,8 @@ private void testPostFilterWithSimpleHybridQuery(boolean isSingleShard, boolean null, null, false, - null + null, + 0 ); assertHitResultsFromQuery(2, searchResponseAsMap); } else if (!isSingleShard && hasPostFilterQuery) { @@ -244,7 +246,8 @@ private void testPostFilterWithSimpleHybridQuery(boolean isSingleShard, boolean rangeFilterQuery, null, false, - null + null, + 0 ); assertHitResultsFromQuery(2, searchResponseAsMap); } else { @@ -258,7 +261,8 @@ private void testPostFilterWithSimpleHybridQuery(boolean isSingleShard, boolean null, null, false, - null + null, + 0 ); assertHitResultsFromQuery(3, searchResponseAsMap); } @@ -319,7 +323,8 @@ private void testPostFilterWithComplexHybridQuery(boolean isSingleShard, boolean rangeFilterQuery, null, false, - null + null, + 0 ); assertHitResultsFromQuery(1, searchResponseAsMap); @@ -334,7 +339,8 @@ private void testPostFilterWithComplexHybridQuery(boolean isSingleShard, boolean null, null, false, - null + null, + 0 ); assertHitResultsFromQuery(2, searchResponseAsMap); } else if (!isSingleShard && hasPostFilterQuery) { @@ -348,7 +354,8 @@ private void testPostFilterWithComplexHybridQuery(boolean isSingleShard, boolean rangeFilterQuery, null, false, - null + null, + 0 ); assertHitResultsFromQuery(4, searchResponseAsMap); } else { @@ -362,7 +369,8 @@ private void testPostFilterWithComplexHybridQuery(boolean isSingleShard, boolean null, null, false, - null + null, + 0 ); assertHitResultsFromQuery(3, searchResponseAsMap); } diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java index a650087b4..610e08dd0 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java @@ -793,6 +793,46 @@ public void testConcurrentSearchWithMultipleSlices_whenMultipleShardsIndex_thenS } } + // TODO remove this test after following issue https://github.com/opensearch-project/neural-search/issues/280 gets resolved. + @SneakyThrows + public void testHybridQuery_whenFromIsSetInSearchRequest_thenFail() { + try { + initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD); + createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE); + MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3); + HybridQueryBuilder hybridQueryBuilderOnlyTerm = new HybridQueryBuilder(); + hybridQueryBuilderOnlyTerm.add(matchQueryBuilder); + + ResponseException exceptionNoNestedTypes = expectThrows( + ResponseException.class, + () -> search( + TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD, + hybridQueryBuilderOnlyTerm, + null, + 10, + Map.of("search_pipeline", SEARCH_PIPELINE), + null, + null, + null, + false, + null, + 10 + ) + + ); + + org.hamcrest.MatcherAssert.assertThat( + exceptionNoNestedTypes.getMessage(), + allOf( + containsString("In the current OpenSearch version pagination is not supported with hybrid query"), + containsString("illegal_argument_exception") + ) + ); + } finally { + wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD, null, null, SEARCH_PIPELINE); + } + } + @SneakyThrows private void initializeIndexIfNotExist(String indexName) throws IOException { if (TEST_BASIC_INDEX_NAME.equals(indexName) && !indexExists(TEST_BASIC_INDEX_NAME)) { diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryPostFilterIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryPostFilterIT.java index ea951b65f..82b8ef6ec 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryPostFilterIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryPostFilterIT.java @@ -177,7 +177,8 @@ private void testPostFilterRangeQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 1, 0, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); } @@ -262,7 +263,8 @@ private void testPostFilterBoolQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 2, 1, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); // Case 2 A Query with a combination of hybrid query (Match Query, Term Query, Range Query), aggregation (Average stock price @@ -278,7 +280,8 @@ private void testPostFilterBoolQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 2, 1, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); Map aggregations = getAggregations(searchResponseAsMap); @@ -303,7 +306,8 @@ private void testPostFilterBoolQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 0, 0, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); // Case 4 A Query with a combination of hybrid query (Match Query, Range Query) and a post filter query (Bool Query with a should @@ -324,7 +328,8 @@ private void testPostFilterBoolQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 0, 0, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); } @@ -382,7 +387,8 @@ private void testPostFilterMatchAllAndMatchNoneQueries(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 4, 3, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); @@ -399,7 +405,8 @@ private void testPostFilterMatchAllAndMatchNoneQueries(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 0, 0, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); } diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQuerySortIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQuerySortIT.java index f3615b991..e6440cc61 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQuerySortIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQuerySortIT.java @@ -139,7 +139,8 @@ private void testSingleFieldSort_whenMultipleSubQueries_thenSuccessful(String in null, createSortBuilders(fieldSortOrderMap, false), false, - null + null, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 6, 6); assertStockValueWithSortOrderInHybridQueryResults(nestedHits, SortOrder.DESC, LARGEST_STOCK_VALUE_IN_QUERY_RESULT, true, true); @@ -168,7 +169,8 @@ private void testMultipleFieldSort_whenMultipleSubQueries_thenSuccessful(String null, createSortBuilders(fieldSortOrderMap, false), false, - null + null, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 6, 6); assertStockValueWithSortOrderInHybridQueryResults(nestedHits, SortOrder.DESC, LARGEST_STOCK_VALUE_IN_QUERY_RESULT, true, false); @@ -200,7 +202,8 @@ public void testSingleFieldSort_whenTrackScoresIsEnabled_thenFail() { null, createSortBuilders(fieldSortOrderMap, false), true, - null + null, + 0 ) ); } finally { @@ -234,7 +237,8 @@ public void testSingleFieldSort_whenSortCriteriaIsByScoreAndField_thenFail() { null, createSortBuilders(fieldSortOrderMap, false), true, - null + null, + 0 ) ); } finally { @@ -312,7 +316,8 @@ private void testSearchAfter_whenSingleFieldSort_thenSuccessful(String indexName null, createSortBuilders(fieldSortOrderMap, false), false, - searchAfter + searchAfter, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 3, 6); assertStockValueWithSortOrderInHybridQueryResults( @@ -348,7 +353,8 @@ private void testSearchAfter_whenMultipleFieldSort_thenSuccessful(String indexNa null, createSortBuilders(fieldSortOrderMap, false), false, - searchAfter + searchAfter, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 5, 6); assertStockValueWithSortOrderInHybridQueryResults( @@ -381,7 +387,8 @@ private void testScoreSort_whenSingleFieldSort_thenSuccessful(String indexName) null, createSortBuilders(fieldSortOrderMap, false), false, - null + null, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 6, 6); assertScoreWithSortOrderInHybridQueryResults(nestedHits, SortOrder.DESC, 1.0); @@ -415,7 +422,8 @@ public void testSort_whenSortFieldsSizeNotEqualToSearchAfterSize_thenFail() { null, createSortBuilders(fieldSortOrderMap, false), true, - searchAfter + searchAfter, + 0 ) ); } finally { @@ -450,7 +458,8 @@ public void testSearchAfter_whenAfterFieldIsNotPassed_thenFail() { null, createSortBuilders(fieldSortOrderMap, false), true, - searchAfter + searchAfter, + 0 ) ); } finally { diff --git a/src/test/java/org/opensearch/neuralsearch/query/aggregation/MetricAggregationsWithHybridQueryIT.java b/src/test/java/org/opensearch/neuralsearch/query/aggregation/MetricAggregationsWithHybridQueryIT.java index 08efd0811..c969e6ee8 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/aggregation/MetricAggregationsWithHybridQueryIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/aggregation/MetricAggregationsWithHybridQueryIT.java @@ -424,7 +424,8 @@ private void testSumAggsAndRangePostFilter() throws IOException { rangeFilterQuery, null, false, - null + null, + 0 ); Map aggregations = getAggregations(searchResponseAsMap); diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java index dd1dac432..da8088fb8 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java @@ -514,7 +514,7 @@ protected Map search( Map requestParams, List aggs ) { - return search(index, queryBuilder, rescorer, resultSize, requestParams, aggs, null, null, false, null); + return search(index, queryBuilder, rescorer, resultSize, requestParams, aggs, null, null, false, null, 0); } @SneakyThrows @@ -528,10 +528,11 @@ protected Map search( QueryBuilder postFilterBuilder, List> sortBuilders, boolean trackScores, - List searchAfter + List searchAfter, + int from ) { XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - + builder.field("from", from); if (queryBuilder != null) { builder.field("query"); queryBuilder.toXContent(builder, ToXContent.EMPTY_PARAMS);