Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce Search/Searcher Caching to Internal Server #620

Merged
merged 3 commits into from
Mar 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 40 additions & 19 deletions src/server/internalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ extern "C" {

#include <zim/uuid.h>
#include <zim/error.h>
#include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/entry.h>
#include <zim/item.h>

Expand All @@ -80,6 +78,7 @@ extern "C" {

#define MAX_SEARCH_LEN 140
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
#define DEFAULT_CACHE_SIZE 2

namespace kiwix {

Expand All @@ -96,6 +95,18 @@ inline std::string normalizeRootUrl(std::string rootUrl)
return rootUrl.empty() ? rootUrl : "/" + rootUrl;
}

// Returns the value of env var `name` if found, otherwise returns defaultVal
unsigned int getCacheLength(const char* name, unsigned int defaultVal) {
try {
const char* envString = std::getenv(name);
if (envString == nullptr) {
throw std::runtime_error("Environment variable not set");
}
return extractFromString<unsigned int>(envString);
} catch (...) {}

return defaultVal;
}
} // unnamed namespace

static IdNameMapper defaultNameMapper;
Expand Down Expand Up @@ -134,7 +145,10 @@ InternalServer::InternalServer(Library* library,
m_ipConnectionLimit(ipConnectionLimit),
mp_daemon(nullptr),
mp_library(library),
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper)
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper),
searcherCache(getCacheLength("SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))),
searchCache(getCacheLength("SEARCH_CACHE_SIZE", DEFAULT_CACHE_SIZE)),
suggestionSearcherCache(getCacheLength("SUGGESTION_SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U)))
{}

bool InternalServer::start() {
Expand Down Expand Up @@ -339,14 +353,15 @@ std::unique_ptr<Response> InternalServer::build_homepage(const RequestContext& r
* Archive and Zim handlers begin
**/

// TODO: retrieve searcher from caching mechanism
SuggestionsList_t getSuggestions(const zim::Archive* const archive,
const std::string& queryString, int start, int suggestionCount)
SuggestionsList_t getSuggestions(SuggestionSearcherCache& cache, const zim::Archive* const archive,
const std::string& bookId, const std::string& queryString, int start, int suggestionCount)
{
SuggestionsList_t suggestions;
auto searcher = zim::SuggestionSearcher(*archive);
std::shared_ptr<zim::SuggestionSearcher> searcher;
searcher = cache.getOrPut(bookId, [=](){ return make_shared<zim::SuggestionSearcher>(*archive); });

if (archive->hasTitleIndex()) {
auto search = searcher.suggest(queryString);
auto search = searcher->suggest(queryString);
auto srs = search.getResults(start, suggestionCount);

for (auto it : srs) {
Expand All @@ -359,7 +374,7 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive,
std::vector<std::string> variants = getTitleVariants(queryString);
int currCount = 0;
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
auto search = searcher.suggest(queryString);
auto search = searcher->suggest(queryString);
auto srs = search.getResults(0, suggestionCount);
for (auto it : srs) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
Expand All @@ -379,11 +394,11 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
printf("** running handle_suggest\n");
}

std::string bookName;
std::string bookName, bookId;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
const std::string bookId = mp_nameMapper->getIdForName(bookName);
bookId = mp_nameMapper->getIdForName(bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {
// error handled by the archive == nullptr check below
Expand All @@ -410,7 +425,8 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
bool first = true;

/* Get the suggestions */
SuggestionsList_t suggestions = getSuggestions(archive.get(), queryString, start, count);
SuggestionsList_t suggestions = getSuggestions(suggestionSearcherCache, archive.get(),
bookId, queryString, start, count);
for(auto& suggestion:suggestions) {
MustacheData result;
result.set("label", suggestion.getTitle());
Expand Down Expand Up @@ -488,11 +504,11 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
} catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {}

std::string bookName;
std::string bookName, bookId;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
const std::string bookId = mp_nameMapper->getIdForName(bookName);
bookId = mp_nameMapper->getIdForName(bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {}

Expand All @@ -509,7 +525,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re

std::shared_ptr<zim::Searcher> searcher;
if (archive) {
searcher = std::make_shared<zim::Searcher>(*archive);
searcher = searcherCache.getOrPut(bookId, [=](){ return std::make_shared<zim::Searcher>(*archive);});
} else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentArchive = mp_library->getArchiveById(bookId);
Expand Down Expand Up @@ -540,6 +556,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
}

/* Get the results */
std::string queryString;
try {
zim::Query query;
if (patternString.empty()) {
Expand All @@ -549,20 +566,24 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
}

query.setQuery("");
queryString = "GEO:" + to_string(latitude) + to_string(longitude) + to_string(distance);
query.setGeorange(latitude, longitude, distance);
} else {
// Execute Ft search
if (m_verbose.load()) {
cout << "Performing query `" << patternString << "'" << endl;
}

std::string queryString = removeAccents(patternString);
queryString = "FT:" + removeAccents(patternString);
query.setQuery(queryString);
}
queryString = bookId + queryString;
mgautierfr marked this conversation as resolved.
Show resolved Hide resolved

std::shared_ptr<zim::Search> search;
search = searchCache.getOrPut(queryString, [=](){ return make_shared<zim::Search>(searcher->search(query));});

zim::Search search = searcher->search(query);
SearchRenderer renderer(search.getResults(start, pageLength), mp_nameMapper, mp_library, start,
search.getEstimatedMatches());
SearchRenderer renderer(search->getResults(start, pageLength), mp_nameMapper, mp_library, start,
search->getEstimatedMatches());
renderer.setSearchPattern(patternString);
renderer.setSearchContent(bookName);
renderer.setProtocolPrefix(m_root + "/");
Expand Down
12 changes: 12 additions & 0 deletions src/server/internalServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ extern "C" {
#include "library.h"
#include "name_mapper.h"

#include <zim/search.h>
#include <zim/suggestion.h>

#include <mustache.hpp>

#include <atomic>
Expand All @@ -36,9 +39,14 @@ extern "C" {
#include "server/request_context.h"
#include "server/response.h"

#include "tools/concurrent_cache.h"

namespace kiwix {

typedef kainjow::mustache::data MustacheData;
typedef ConcurrentCache<string, std::shared_ptr<zim::Searcher>> SearcherCache;
typedef ConcurrentCache<string, std::shared_ptr<zim::Search>> SearchCache;
typedef ConcurrentCache<string, std::shared_ptr<zim::SuggestionSearcher>> SuggestionSearcherCache;

class Entry;
class OPDSDumper;
Expand Down Expand Up @@ -115,6 +123,10 @@ class InternalServer {
Library* mp_library;
NameMapper* mp_nameMapper;

SearcherCache searcherCache;
SearchCache searchCache;
SuggestionSearcherCache suggestionSearcherCache;

std::string m_server_id;
std::string m_library_id;

Expand Down
95 changes: 95 additions & 0 deletions src/tools/concurrent_cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright (C) 2021 Matthieu Gautier <[email protected]>
* Copyright (C) 2020 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

#ifndef ZIM_CONCURRENT_CACHE_H
#define ZIM_CONCURRENT_CACHE_H

#include "lrucache.h"

#include <future>
#include <mutex>

namespace kiwix
{

/**
ConcurrentCache implements a concurrent thread-safe cache

Compared to kiwix::lru_cache, each access operation is slightly more expensive.
However, different slots of the cache can be safely accessed concurrently
with minimal blocking. Concurrent access to the same element is also
safe, and, in case of a cache miss, will block until that element becomes
available.
*/
template <typename Key, typename Value>
class ConcurrentCache
{
private: // types
typedef std::shared_future<Value> ValuePlaceholder;
typedef lru_cache<Key, ValuePlaceholder> Impl;

public: // types
explicit ConcurrentCache(size_t maxEntries)
: impl_(maxEntries)
{}

// Gets the entry corresponding to the given key. If the entry is not in the
// cache, it is obtained by calling f() (without any arguments) and the
// result is put into the cache.
//
// The cache as a whole is locked only for the duration of accessing
// the respective slot. If, in the case of the a cache miss, the generation
// of the missing element takes a long time, only attempts to access that
// element will block - the rest of the cache remains open to concurrent
// access.
template<class F>
Value getOrPut(const Key& key, F f)
{
std::promise<Value> valuePromise;
std::unique_lock<std::mutex> l(lock_);
const auto x = impl_.getOrPut(key, valuePromise.get_future().share());
l.unlock();
if ( x.miss() ) {
try {
valuePromise.set_value(f());
} catch (std::exception& e) {
drop(key);
throw;
}
}

return x.value().get();
}

bool drop(const Key& key)
{
std::unique_lock<std::mutex> l(lock_);
return impl_.drop(key);
}

private: // data
Impl impl_;
std::mutex lock_;
};

} // namespace kiwix

#endif // ZIM_CONCURRENT_CACHE_H

Loading