From 1c38632d65ea231a789eaebcdb8253014a14ff74 Mon Sep 17 00:00:00 2001 From: maneeshpm Date: Mon, 29 Mar 2021 12:31:11 +0530 Subject: [PATCH] Add subquery_anchored in case of suggestions By forcing an ANCHOR_TERM at the beginning of all the documents, we can generate an OP_PHRASE type subquery that matches the terms of the query from the beginning of the documents. --- src/search.cpp | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c9038eb44..449ca11aa 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -38,6 +38,8 @@ #include "xapian.h" #include +#include "constants.h" + #define MAX_MATCHES_TO_SORT 10000 namespace zim @@ -118,21 +120,34 @@ setup_queryParser(Xapian::QueryParser* queryParser, /* * subquery_phrase: selects documents that have the terms in the order of the query * within a specified window. + * subquery_anchored: selects documents that have the terms in the order of the + * query within a specified window and starts from the beginning of the document. * subquery_and: selects documents that have all the terms in the query. - * subquery_phrase by itself is quite exclusive. To include more "similar" docs, - * we combine it with subquery_and using OP_OR operator. If a perticular document - * has a weight of A in subquery_phrase and B in subquery_and, the net weight of - * that document becomes A+B. So the documents closer to the query gets a higher. + * + * subquery_phrase and subquery_anchored by themselves are quite exclusive. To + * include more "similar" docs, we combine them with subquery_and using OP_OR + * operator. If a particular document has a weight of A in subquery_and and B + * in subquery_phrase and C in subquery_anchored, the net weight of that document + * becomes A+B+C (normalised out of 100). So the documents closer to the query + * gets a higher relevance. */ Xapian::Query parse_query(Xapian::QueryParser* query_parser, std::string qs, int flags, std::string prefix, bool suggestion_mode) { Xapian::Query query, subquery_and; query = subquery_and = query_parser->parse_query(qs, flags, prefix); - if (suggestion_mode) { + if (suggestion_mode && !query.empty()) { + Xapian::Query subquery_phrase, subquery_anchored; query_parser->set_default_op(Xapian::Query::op::OP_PHRASE); - Xapian::Query subquery_phrase = query_parser->parse_query(qs); + + subquery_phrase = query_parser->parse_query(qs); subquery_phrase = Xapian::Query(Xapian::Query::OP_PHRASE, subquery_phrase.get_terms_begin(), subquery_phrase.get_terms_end(), subquery_phrase.get_length()); - query = Xapian::Query(Xapian::Query::OP_OR, subquery_phrase, subquery_and); + + qs = ANCHOR_TERM + qs; + subquery_anchored = query_parser->parse_query(qs); + subquery_anchored = Xapian::Query(Xapian::Query::OP_PHRASE, subquery_anchored.get_terms_begin(), subquery_anchored.get_terms_end(), subquery_anchored.get_length()); + + query = Xapian::Query(Xapian::Query::OP_OR, query, subquery_phrase); + query = Xapian::Query(Xapian::Query::OP_OR, query, subquery_anchored); } return query;