Skip to content

Commit

Permalink
Move query parsing in internalDataBase.
Browse files Browse the repository at this point in the history
Parsing the query can be made entirely in the database, so let's move
it there.
  • Loading branch information
mgautierfr committed Apr 6, 2021
1 parent f6d3ca2 commit 5ad6f89
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 70 deletions.
118 changes: 51 additions & 67 deletions src/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,48 +74,24 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
}
return result;
}




/*
* subquery_phrase: selects documents that have the terms in the order of the query
* within a specified window.
* subquery_and: selects documents that have all the terms in the query.
* subquery_phrase by itself is quite exclusive. To include more "similar" docs,
* we combine it with subquery_and using OP_OR operator. If a perticular document
* has a weight of A in subquery_phrase and B in subquery_and, the net weight of
* that document becomes A+B. So the documents closer to the query gets a higher.
*/
Xapian::Query parse_query(Xapian::QueryParser* query_parser, std::string qs, int flags, std::string prefix, bool suggestion_mode) {
Xapian::Query query, subquery_and;
query = subquery_and = query_parser->parse_query(qs, flags, prefix);

if (suggestion_mode) {
query_parser->set_default_op(Xapian::Query::op::OP_PHRASE);
Xapian::Query subquery_phrase = query_parser->parse_query(qs);
subquery_phrase = Xapian::Query(Xapian::Query::OP_PHRASE, subquery_phrase.get_terms_begin(), subquery_phrase.get_terms_end(), subquery_phrase.get_length());
query = Xapian::Query(Xapian::Query::OP_OR, subquery_phrase, subquery_and);
}

return query;
}

}

InternalDataBase::InternalDataBase(const std::vector<Archive>& archives, bool suggestionMode)
: m_hasNewSuggestionFormat(false)
{
bool first = true;
bool hasNewSuggestionFormat = false;
m_queryParser.set_database(m_database);
m_queryParser.set_default_op(Xapian::Query::op::OP_AND);
m_flags = Xapian::QueryParser::FLAG_DEFAULT;
if (suggestionMode) {
m_flags |= Xapian::QueryParser::FLAG_PARTIAL;
}
for(auto& archive: archives) {
auto impl = archive.getImpl();
FileImpl::FindxResult r;
if (suggestionMode) {
r = impl->findx('X', "title/xapian");
if (r.first) {
m_hasNewSuggestionFormat = true;
hasNewSuggestionFormat = true;
}
}
if (!r.first) {
Expand Down Expand Up @@ -178,7 +154,7 @@ InternalDataBase::InternalDataBase(const std::vector<Archive>& archives, bool su
Xapian::Stem stemmer = Xapian::Stem(languageLocale.getLanguage());
m_queryParser.set_stemmer(stemmer);
m_queryParser.set_stemming_strategy(
m_hasNewSuggestionFormat ? Xapian::QueryParser::STEM_SOME : Xapian::QueryParser::STEM_ALL);
hasNewSuggestionFormat ? Xapian::QueryParser::STEM_SOME : Xapian::QueryParser::STEM_ALL);
} catch (...) {
std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl;
}
Expand All @@ -194,7 +170,10 @@ InternalDataBase::InternalDataBase(const std::vector<Archive>& archives, bool su
stopper->release();
m_queryParser.set_stopper(stopper);
}
m_prefixes = database.get_metadata("prefixes");
auto prefixes = database.get_metadata("prefixes");
if ( hasNewSuggestionFormat && prefixes.find("S") != std::string::npos ) {
m_prefix = "S";
}
} else {
std::map<std::string, int> valuesmap = read_valuesmap(database.get_metadata("valuesmap"));
if (m_valuesmap != valuesmap ) {
Expand Down Expand Up @@ -228,9 +207,43 @@ int InternalDataBase::valueSlot(const std::string& valueName) const
return m_valuesmap.at(valueName);
}

Xapian::QueryParser& InternalDataBase::getQueryParser()
/*
* subquery_phrase: selects documents that have the terms in the order of the query
* within a specified window.
* subquery_and: selects documents that have all the terms in the query.
* subquery_phrase by itself is quite exclusive. To include more "similar" docs,
* we combine it with subquery_and using OP_OR operator. If a perticular document
* has a weight of A in subquery_phrase and B in subquery_and, the net weight of
* that document becomes A+B. So the documents closer to the query gets a higher.
*/
Xapian::Query InternalDataBase::parseQuery(const Query& query)
{
return m_queryParser;
Xapian::Query xquery;

m_queryParser.set_default_op(Xapian::Query::op::OP_AND);
xquery = m_queryParser.parse_query(query.m_query, m_flags, m_prefix);

if (query.m_suggestionMode) {
m_queryParser.set_default_op(Xapian::Query::op::OP_PHRASE);
Xapian::Query subquery_phrase = m_queryParser.parse_query(query.m_query);
// Force the OP_PHRASE window to be equal to the number of terms.
subquery_phrase = Xapian::Query(Xapian::Query::OP_PHRASE, subquery_phrase.get_terms_begin(), subquery_phrase.get_terms_end(), subquery_phrase.get_length());
xquery = Xapian::Query(Xapian::Query::OP_OR, subquery_phrase, xquery);
}

if (query.m_geoquery && hasValue("geo.position")) {
Xapian::GreatCircleMetric metric;
Xapian::LatLongCoord centre(query.m_latitude, query.m_longitude);
Xapian::LatLongDistancePostingSource ps(valueSlot("geo.position"), centre, metric, query.m_distance);
Xapian::Query geoQuery(&ps);
if (query.m_query.empty()) {
xquery = geoQuery;
} else {
xquery = Xapian::Query(Xapian::Query::OP_FILTER, xquery, geoQuery);
}
}

return xquery;
}

Searcher::Searcher(const std::vector<Archive>& archives) :
Expand Down Expand Up @@ -340,40 +353,13 @@ Xapian::Enquire& Search::getEnquire() const
return *mp_enquire;
}

auto queryParser = mp_internalDb->getQueryParser();
std::string prefix = "";
unsigned flags = Xapian::QueryParser::FLAG_DEFAULT;
if (m_query.m_suggestionMode) {
if (m_query.m_verbose) {
std::cout << "Mark query as 'partial'" << std::endl;
}
flags |= Xapian::QueryParser::FLAG_PARTIAL;
if ( !mp_internalDb->m_hasNewSuggestionFormat
&& mp_internalDb->m_prefixes.find("S") != std::string::npos ) {
if (m_query.m_verbose) {
std::cout << "Searching in title namespace" << std::endl;
}
prefix = "S";
}
}
auto query = parse_query(queryParser, m_query.m_query, flags, prefix, m_query.m_suggestionMode);
if (m_query.m_verbose) {
std::cout << "Parsed query '" << m_query.m_query << "' to " << query.get_description() << std::endl;
}
delete queryParser;

auto enquire = std::unique_ptr<Xapian::Enquire>(new Xapian::Enquire(mp_internalDb->m_database));

if (m_query.m_geoquery && mp_internalDb->hasValue("geo.position")) {
Xapian::GreatCircleMetric metric;
Xapian::LatLongCoord centre(m_query.m_latitude, m_query.m_longitude);
Xapian::LatLongDistancePostingSource ps(mp_internalDb->valueSlot("geo.position"), centre, metric, m_query.m_distance);
if ( m_query.m_query.empty()) {
query = Xapian::Query(&ps);
} else {
query = Xapian::Query(Xapian::Query::OP_FILTER, query, Xapian::Query(&ps));
}
auto query = mp_internalDb->parseQuery(m_query);
if (m_query.m_verbose) {
std::cout << "Parsed query '" << m_query.m_query << "' to " << query.get_description() << std::endl;
}
enquire->set_query(query);

/*
* In suggestion mode, we are searching over a separate title index. Default BM25 is not
Expand All @@ -391,8 +377,6 @@ Xapian::Enquire& Search::getEnquire() const
}
}

enquire->set_query(query);


if (m_query.m_suggestionMode && mp_internalDb->hasValue("targetPath")) {
enquire->set_collapse_key(mp_internalDb->valueSlot("targetPath"));
Expand Down
6 changes: 3 additions & 3 deletions src/search_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ class InternalDataBase {
bool hasValue(const std::string& valueName) const;
int valueSlot(const std::string& valueName) const;

Xapian::QueryParser& getQueryParser();
Xapian::Query parseQuery(const Query& query);

public: // data
Xapian::Database m_database; // The (main) database we will search on (wrapping other xapian databases).
std::vector<Xapian::Database> m_xapianDatabases; // The real databases.
std::vector<Archive> m_archives; // The archives we are searching on.
std::map<std::string, int> m_valuesmap; // The valuesmap associated with the database.
std::string m_prefixes; // The prefix stored in the database.
bool m_hasNewSuggestionFormat; // If the database has new suggestion format.
std::string m_prefix; // The prefix to search on.
unsigned m_flags; // Flags to use to parse the query.
Xapian::QueryParser m_queryParser;
};

Expand Down

0 comments on commit 5ad6f89

Please sign in to comment.