Skip to content

Commit

Permalink
Introduce lower bound range cache for select0/1.
Browse files Browse the repository at this point in the history
This CL introduces lower bound range cache for select0/1 in
SimpleSuccinctBitVectorIndex.

The first step of select0/1 is binary-search for the chunk to which bit
index belongs.  This CL improves the performance of it by narrowing down
the search range using precomputed range cache.

BUG=
TEST=unittest
REF_BUG=21859420
REF_CL=96650187
  • Loading branch information
Noriyuki Takahashi authored and yukawa committed Dec 20, 2015
1 parent 3591f5e commit cac1465
Show file tree
Hide file tree
Showing 12 changed files with 351 additions and 92 deletions.
20 changes: 18 additions & 2 deletions src/dictionary/system/system_dictionary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,20 @@ namespace {

const int kMinTokenArrayBlobSize = 4;

// TODO(noriyukit): The following paramters may not be well optimized. In our
// experiments, Select1 is computational burden, so increasing cache size for
// lb1/select1 may improve performance.
const size_t kKeyTrieLb0CacheSize = 1 * 1024;
const size_t kKeyTrieLb1CacheSize = 1 * 1024;
const size_t kKeyTrieSelect0CacheSize = 4 * 1024;
const size_t kKeyTrieSelect1CacheSize = 4 * 1024;
const size_t kKeyTrieTermvecCacheSize = 1 * 1024;

const size_t kValueTrieLb0CacheSize = 1 * 1024;
const size_t kValueTrieLb1CacheSize = 1 * 1024;
const size_t kValueTrieSelect0CacheSize = 1 * 1024;
const size_t kValueTrieSelect1CacheSize = 16 * 1024;
const size_t kValueTrieTermvecCacheSize = 4 * 1024;

// Expansion table format:
// "<Character to expand>[<Expanded character 1><Expanded character 2>...]"
Expand Down Expand Up @@ -506,8 +516,11 @@ bool SystemDictionary::OpenDictionaryFile(bool enable_reverse_lookup_index) {
const uint8 *key_image = reinterpret_cast<const uint8 *>(
dictionary_file_->GetSection(codec_->GetSectionNameForKey(), &len));
if (!key_trie_.Open(key_image,
kKeyTrieLb0CacheSize,
kKeyTrieLb1CacheSize,
kKeyTrieSelect0CacheSize,
kKeyTrieSelect1CacheSize)) {
kKeyTrieSelect1CacheSize,
kKeyTrieTermvecCacheSize)) {
LOG(ERROR) << "cannot open key trie";
return false;
}
Expand All @@ -517,8 +530,11 @@ bool SystemDictionary::OpenDictionaryFile(bool enable_reverse_lookup_index) {
const uint8 *value_image = reinterpret_cast<const uint8 *>(
dictionary_file_->GetSection(codec_->GetSectionNameForValue(), &len));
if (!value_trie_.Open(value_image,
kValueTrieLb0CacheSize,
kValueTrieLb1CacheSize,
kValueTrieSelect0CacheSize,
kValueTrieSelect1CacheSize)) {
kValueTrieSelect1CacheSize,
kValueTrieTermvecCacheSize)) {
LOG(ERROR) << "can not open value trie";
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion src/mozc_version_template.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MAJOR=2
MINOR=17
BUILD=2261
BUILD=2262
REVISION=102
# NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
# downloaded by NaCl Mozc.
Expand Down
9 changes: 7 additions & 2 deletions src/storage/louds/bit_vector_based_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,16 @@
namespace mozc {
namespace storage {
namespace louds {

namespace {

// Select1 is not used, so cache is unnecessary.
const size_t kLb0CacheSize = 1024;
const size_t kLb1CacheSize = 0;

inline int ReadInt32(const uint8 *data) {
return *reinterpret_cast<const int32*>(data);
}

} // namespace

void BitVectorBasedArray::Open(const uint8 *image) {
Expand All @@ -48,7 +53,7 @@ void BitVectorBasedArray::Open(const uint8 *image) {
// Check 0 padding.
CHECK_EQ(ReadInt32(image + 12), 0);

index_.Init(image + 16, index_length);
index_.Init(image + 16, index_length, kLb0CacheSize, kLb1CacheSize);
base_length_ = base_length;
step_length_ = step_length;
data_ = reinterpret_cast<const char*>(image + 16 + index_length);
Expand Down
3 changes: 2 additions & 1 deletion src/storage/louds/louds.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ Louds::Louds() : select0_cache_size_(0), select1_cache_size_(0) {}
Louds::~Louds() {}

void Louds::Init(const uint8 *image, int length,
size_t bitvec_lb0_cache_size, size_t bitvec_lb1_cache_size,
size_t select0_cache_size, size_t select1_cache_size) {
index_.Init(image, length);
index_.Init(image, length, bitvec_lb0_cache_size, bitvec_lb1_cache_size);

// Cap the cache sizes.
if (select0_cache_size > index_.GetNum0Bits()) {
Expand Down
11 changes: 6 additions & 5 deletions src/storage/louds/louds.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,17 @@ class Louds {
~Louds();

// Initializes this LOUDS from bit array. To improve the performance of
// downward traversal (i.e., from root to leaves), set |select0_cache_size| to
// a larger value. On the other hand, to improve the performance of upward
// traversal (i.e., from leaves to the root), set |select1_cache_size| to a
// larger value.
// downward traversal (i.e., from root to leaves), set |bitvec_lb0_cache_size|
// and |select0_cache_size| to larger values. On the other hand, to improve
// the performance of upward traversal (i.e., from leaves to the root), set
// |bitvec_lb1_cache_size| and |select1_cache_size| to larger values.
void Init(const uint8 *image, int length,
size_t bitvec_lb0_cache_size, size_t bitvec_lb1_cache_size,
size_t select0_cache_size, size_t select1_cache_size);

// Initializes this LOUDS from bit array without cache.
void Init(const uint8 *image, int length) {
Init(image, length, 0, 0);
Init(image, length, 0, 0, 0, 0);
}

// Explicitly clears the internal bit array.
Expand Down
49 changes: 39 additions & 10 deletions src/storage/louds/louds_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,19 @@ vector<uint8> MakeSequence(StringPiece s) {
EXPECT_FALSE(louds.IsValidNode(tmp)); \
} while (false)

using CacheSizeParam = std::pair<size_t, size_t>;
struct CacheSizeParam {
CacheSizeParam(size_t lb0, size_t lb1, size_t s0, size_t s1)
: bitvec_lb0_cache_size(lb0),
bitvec_lb1_cache_size(lb1),
select0_cache_size(s0),
select1_cache_size(s1) {}

size_t bitvec_lb0_cache_size;
size_t bitvec_lb1_cache_size;
size_t select0_cache_size;
size_t select1_cache_size;
};

class LoudsTest : public ::testing::TestWithParam<CacheSizeParam> {};

TEST_P(LoudsTest, Basic) {
Expand All @@ -95,7 +107,11 @@ TEST_P(LoudsTest, Basic) {
// Test with the trie illustrated in louds.h.
const vector<uint8> kSeq = MakeSequence("10 110 0 110 0 0");
Louds louds;
louds.Init(kSeq.data(), kSeq.size(), param.first, param.second);
louds.Init(kSeq.data(), kSeq.size(),
param.bitvec_lb0_cache_size,
param.bitvec_lb1_cache_size,
param.select0_cache_size,
param.select1_cache_size);

// root -> 2 -> 3 -> 4 -> 5
{
Expand Down Expand Up @@ -138,14 +154,27 @@ TEST_P(LoudsTest, Basic) {
}
}

INSTANTIATE_TEST_CASE_P(GenLoudsTest, LoudsTest,
::testing::Values(CacheSizeParam(0, 0),
CacheSizeParam(0, 1),
CacheSizeParam(1, 0),
CacheSizeParam(1, 1),
CacheSizeParam(2, 2),
CacheSizeParam(8, 8),
CacheSizeParam(1024, 1024)));
INSTANTIATE_TEST_CASE_P(
GenLoudsTest, LoudsTest,
::testing::Values(CacheSizeParam(0, 0, 0, 0),
CacheSizeParam(0, 0, 0, 1),
CacheSizeParam(0, 0, 1, 0),
CacheSizeParam(0, 0, 1, 1),
CacheSizeParam(0, 1, 0, 0),
CacheSizeParam(0, 1, 0, 1),
CacheSizeParam(0, 1, 1, 0),
CacheSizeParam(0, 1, 1, 1),
CacheSizeParam(1, 0, 0, 0),
CacheSizeParam(1, 0, 0, 1),
CacheSizeParam(1, 0, 1, 0),
CacheSizeParam(1, 0, 1, 1),
CacheSizeParam(1, 1, 0, 0),
CacheSizeParam(1, 1, 0, 1),
CacheSizeParam(1, 1, 1, 0),
CacheSizeParam(1, 1, 1, 1),
CacheSizeParam(2, 2, 2, 2),
CacheSizeParam(8, 8, 8, 8),
CacheSizeParam(1024, 1024, 1024, 1024)));

} // namespace
} // namespace louds
Expand Down
10 changes: 8 additions & 2 deletions src/storage/louds/louds_trie.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,11 @@ inline int32 ReadInt32(const uint8 *data) {
} // namespace

bool LoudsTrie::Open(const uint8 *image,
size_t louds_lb0_cache_size,
size_t louds_lb1_cache_size,
size_t louds_select0_cache_size,
size_t louds_select1_cache_size) {
size_t louds_select1_cache_size,
size_t termvec_lb1_cache_size) {
// Reads a binary image data, which is compatible with rx.
// The format is as follows:
// [trie size: little endian 4byte int]
Expand Down Expand Up @@ -82,8 +85,11 @@ bool LoudsTrie::Open(const uint8 *image,
const uint8 *edge_character = terminal_image + terminal_size;

louds_.Init(louds_image, louds_size,
louds_lb0_cache_size, louds_lb1_cache_size,
louds_select0_cache_size, louds_select1_cache_size);
terminal_bit_vector_.Init(terminal_image, terminal_size);
terminal_bit_vector_.Init(terminal_image, terminal_size,
0, // Select0 is not carried out.
termvec_lb1_cache_size);
edge_character_ = reinterpret_cast<const char*>(edge_character);

return true;
Expand Down
14 changes: 9 additions & 5 deletions src/storage/louds/louds_trie.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,21 @@ class LoudsTrie {
LoudsTrie() : edge_character_(nullptr) {}
~LoudsTrie() {}

// Opens the binary image and constructs the data structure. The two cache
// sizes are passed to the underlying LOUDS. See louds.h for more information
// of cache size. This class doesn't own the "data", so it is caller's
// Opens the binary image and constructs the data structure. The first four
// cache sizes are passed to the underlying LOUDS. See louds.h for more
// information of cache size. The last one is passed to the underlying
// terminal bit vector. This class doesn't own the "data", so it is caller's
// reponsibility to keep the data alive until Close is invoked. See .cc file
// for the detailed format of the binary image.
bool Open(const uint8 *data,
size_t louds_lb0_cache_size,
size_t louds_lb1_cache_size,
size_t louds_select0_cache_size,
size_t louds_select1_cache_size);
size_t louds_select1_cache_size,
size_t termvec_lb1_cache_size);

bool Open(const uint8 *data) {
return Open(data, 0, 0);
return Open(data, 0, 0, 0, 0, 0);
}

// Destructs the internal data structure explicitly (the destructor will do
Expand Down
Loading

0 comments on commit cac1465

Please sign in to comment.