Skip to content

Commit

Permalink
lift clauses in LogicalAst (quickwit-oss#2449)
Browse files Browse the repository at this point in the history
(a OR b) OR (c OR d) can be simplified to (a OR b OR c OR d)
(a AND b) AND (c AND d) can be simplified to (a AND b AND c AND d)

This directly affects how queries are executed

remove unused SumWithCoordsCombiner
the number of fields is unused and private
  • Loading branch information
PSeitz authored and philippemnoel committed Aug 31, 2024
1 parent a3243d8 commit faa150a
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 42 deletions.
6 changes: 3 additions & 3 deletions query-grammar/src/occur.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ use std::fmt::Write;
#[derive(Debug, Clone, Hash, Copy, Eq, PartialEq)]
pub enum Occur {
/// For a given document to be considered for scoring,
/// at least one of the terms with the Should or the Must
/// at least one of the queries with the Should or the Must
/// Occur constraint must be within the document.
Should,
/// Document without the term are excluded from the search.
/// Document without the queries are excluded from the search.
Must,
/// Document that contain the term are excluded from the
/// Document that contain the query are excluded from the
/// search.
MustNot,
}
Expand Down
4 changes: 2 additions & 2 deletions src/query/boolean_query/boolean_query.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::boolean_weight::BooleanWeight;
use crate::query::{EnableScoring, Occur, Query, SumWithCoordsCombiner, TermQuery, Weight};
use crate::query::{EnableScoring, Occur, Query, SumCombiner, TermQuery, Weight};
use crate::schema::{IndexRecordOption, Term};

/// The boolean query returns a set of documents
Expand Down Expand Up @@ -169,7 +169,7 @@ impl Query for BooleanQuery {
sub_weights,
self.minimum_number_should_match,
enable_scoring.is_scoring_enabled(),
Box::new(SumWithCoordsCombiner::default),
Box::new(SumCombiner::default),
)))
}

Expand Down
10 changes: 3 additions & 7 deletions src/query/boolean_query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@ mod tests {
use super::*;
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
use crate::collector::TopDocs;
use crate::query::score_combiner::SumWithCoordsCombiner;
use crate::query::term_query::TermScorer;
use crate::query::{
EnableScoring, Intersection, Occur, Query, QueryParser, RequiredOptionalScorer, Scorer,
TermQuery,
SumCombiner, TermQuery,
};
use crate::schema::*;
use crate::{assert_nearly_equals, DocAddress, DocId, Index, IndexWriter, Score};
Expand Down Expand Up @@ -90,11 +89,8 @@ mod tests {
let query = query_parser.parse_query("+a b")?;
let weight = query.weight(EnableScoring::enabled_from_searcher(&searcher))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<RequiredOptionalScorer<
Box<dyn Scorer>,
Box<dyn Scorer>,
SumWithCoordsCombiner,
>>());
assert!(scorer
.is::<RequiredOptionalScorer<Box<dyn Scorer>, Box<dyn Scorer>, SumCombiner>>());
}
{
let query = query_parser.parse_query("+a b")?;
Expand Down
4 changes: 1 addition & 3 deletions src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ pub use self::query_parser::{QueryParser, QueryParserError};
pub use self::range_query::*;
pub use self::regex_query::RegexQuery;
pub use self::reqopt_scorer::RequiredOptionalScorer;
pub use self::score_combiner::{
DisjunctionMaxCombiner, ScoreCombiner, SumCombiner, SumWithCoordsCombiner,
};
pub use self::score_combiner::{DisjunctionMaxCombiner, ScoreCombiner, SumCombiner};
pub use self::scorer::Scorer;
pub use self::set_query::TermSetQuery;
pub use self::term_query::TermQuery;
Expand Down
28 changes: 28 additions & 0 deletions src/query/query_parser/logical_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,34 @@ impl LogicalAst {
LogicalAst::Boost(Box::new(self), boost)
}
}

pub fn simplify(self) -> LogicalAst {
match self {
LogicalAst::Clause(clauses) => {
let mut new_clauses: Vec<(Occur, LogicalAst)> = Vec::new();

for (occur, sub_ast) in clauses {
let simplified_sub_ast = sub_ast.simplify();

// If clauses below have the same `Occur`, we can pull them up
match simplified_sub_ast {
LogicalAst::Clause(sub_clauses)
if (occur == Occur::Should || occur == Occur::Must)
&& sub_clauses.iter().all(|(o, _)| *o == occur) =>
{
for sub_clause in sub_clauses {
new_clauses.push(sub_clause);
}
}
_ => new_clauses.push((occur, simplified_sub_ast)),
}
}

LogicalAst::Clause(new_clauses)
}
LogicalAst::Leaf(_) | LogicalAst::Boost(_, _) => self,
}
}
}

fn occur_letter(occur: Occur) -> &'static str {
Expand Down
21 changes: 18 additions & 3 deletions src/query/query_parser/query_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ impl QueryParser {
if !err.is_empty() {
return Err(err.swap_remove(0));
}
Ok(ast)
Ok(ast.simplify())
}

/// Parse the user query into an AST.
Expand Down Expand Up @@ -1437,7 +1437,7 @@ mod test {
);
test_parse_query_to_logical_ast_helper(
"(+title:a +title:b) title:c",
r#"(+(+Term(field=0, type=Str, "a") +Term(field=0, type=Str, "b")) +Term(field=0, type=Str, "c"))"#,
r#"(+Term(field=0, type=Str, "a") +Term(field=0, type=Str, "b") +Term(field=0, type=Str, "c"))"#,
true,
);
}
Expand Down Expand Up @@ -1473,7 +1473,7 @@ mod test {
pub fn test_parse_query_to_ast_two_terms() {
test_parse_query_to_logical_ast_helper(
"title:a b",
r#"(Term(field=0, type=Str, "a") (Term(field=0, type=Str, "b") Term(field=1, type=Str, "b")))"#,
r#"(Term(field=0, type=Str, "a") Term(field=0, type=Str, "b") Term(field=1, type=Str, "b"))"#,
false,
);
test_parse_query_to_logical_ast_helper(
Expand Down Expand Up @@ -1705,6 +1705,21 @@ mod test {
);
}

#[test]
pub fn test_parse_query_negative() {
test_parse_query_to_logical_ast_helper(
"title:b -title:a",
r#"(+Term(field=0, type=Str, "b") -Term(field=0, type=Str, "a"))"#,
true,
);

test_parse_query_to_logical_ast_helper(
"title:b -(-title:a -title:c)",
r#"(+Term(field=0, type=Str, "b") -(-Term(field=0, type=Str, "a") -Term(field=0, type=Str, "c")))"#,
true,
);
}

#[test]
pub fn test_query_parser_hyphen() {
test_parse_query_to_logical_ast_helper(
Expand Down
24 changes: 0 additions & 24 deletions src/query/score_combiner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,30 +54,6 @@ impl ScoreCombiner for SumCombiner {
}
}

/// Sums the score of different scorers and keeps the count
/// of scorers which matched.
#[derive(Default, Clone, Copy)]
pub struct SumWithCoordsCombiner {
num_fields: usize,
score: Score,
}

impl ScoreCombiner for SumWithCoordsCombiner {
fn update<TScorer: Scorer>(&mut self, scorer: &mut TScorer) {
self.score += scorer.score();
self.num_fields += 1;
}

fn clear(&mut self) {
self.score = 0.0;
self.num_fields = 0;
}

fn score(&self) -> Score {
self.score
}
}

/// Take max score of different scorers
/// and optionally sum it with other matches multiplied by `tie_breaker`
#[derive(Default, Clone, Copy)]
Expand Down

0 comments on commit faa150a

Please sign in to comment.