Skip to content

Commit

Permalink
Implement rule based zero query suggestion.
Browse files Browse the repository at this point in the history
BUG=none
TEST=unittest
  • Loading branch information
Toshiyuki Hanaoka authored and yukawa committed May 2, 2015
1 parent 8e4dedc commit 988392a
Show file tree
Hide file tree
Showing 8 changed files with 362 additions and 110 deletions.
8 changes: 8 additions & 0 deletions src/data/zero_query/zero_query.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Rules for triggering zero query suggestion/prediction.
# File format:
# trigger<TAB>candidate_1,candidate_2,...,candidate_n
# ...
# Note that '#' is special caracter for comment line, so it cannot be placed
# at the beginning of a line.

@ gmail.com
2 changes: 1 addition & 1 deletion src/mozc_version_template.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MAJOR=2
MINOR=17
BUILD=2079
BUILD=2080
REVISION=102
# NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
# downloaded by NaCl Mozc.
Expand Down
130 changes: 99 additions & 31 deletions src/prediction/dictionary_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "dictionary/pos_matcher.h"
#include "prediction/predictor_interface.h"
#include "prediction/suggestion_filter.h"
#include "prediction/zero_query_data.h"
#include "prediction/zero_query_number_data.h"
#include "session/commands.pb.h"

Expand Down Expand Up @@ -98,22 +99,22 @@ void GetNumberSuffixArray(const string &history_input,
int default_num = -1;
int suffix_num = -1;

for (int i = 0; ZeroQueryNum[i]; ++i) {
if (default_str == ZeroQueryNum[i][0]) {
for (int i = 0; i < kZeroQueryNum_size; ++i) {
if (default_str == kZeroQueryNum_data[i][0]) {
default_num = i;
} else if (history_input == ZeroQueryNum[i][0]) {
} else if (history_input == kZeroQueryNum_data[i][0]) {
suffix_num = i;
}
}
DCHECK_GE(default_num, 0);

if (suffix_num != -1) {
for (int j = 1; ZeroQueryNum[suffix_num][j]; ++j) {
suffixes->push_back(ZeroQueryNum[suffix_num][j]);
for (int j = 1; kZeroQueryNum_data[suffix_num][j]; ++j) {
suffixes->push_back(kZeroQueryNum_data[suffix_num][j]);
}
}
for (int j = 1; ZeroQueryNum[default_num][j]; ++j) {
suffixes->push_back(ZeroQueryNum[default_num][j]);
for (int j = 1; kZeroQueryNum_data[default_num][j]; ++j) {
suffixes->push_back(kZeroQueryNum_data[default_num][j]);
}
}

Expand Down Expand Up @@ -163,6 +164,11 @@ bool IsTypingCorrectionEnabled() {
FLAGS_enable_typing_correction;
}

struct ZeroQueryRuleCompare {
bool operator()(const char **lhs, const char **rhs) const {
return (strcmp(lhs[0], rhs[0]) < 0);
}
};
} // namespace

class DictionaryPredictor::PredictiveLookupCallback :
Expand Down Expand Up @@ -1617,6 +1623,88 @@ void DictionaryPredictor::GetPredictiveResultsUsingTypingCorrection(
}
}

// Returns true if we add zero query result.
bool DictionaryPredictor::AggregateNumberZeroQueryPrediction(
const Segments &segments, vector<Result> *results) const {
string number_key;
if (!GetNumberHistory(segments, &number_key)) {
return false;
}

// Use number suffixes and do not add normal zero query.
vector<string> suffixes;
GetNumberSuffixArray(number_key, &suffixes);
DCHECK_GT(suffixes.size(), 0);
int cost = 0;

for (size_t i = 0; i < suffixes.size(); ++i) {
const auto &suffix = suffixes[i];
// Increment cost to show the candidates in order.
const int kSuffixPenalty = 10;

results->push_back(Result());
Result *result = &results->back();
result->SetTypesAndTokenAttributes(SUFFIX, Token::NONE);
result->key = suffix;
result->value = suffix;
result->wcost = cost;
result->lid = counter_suffix_word_id_;
result->rid = counter_suffix_word_id_;

cost += kSuffixPenalty;
}
return true;
}

// Returns true if we add zero query result.
bool DictionaryPredictor::AggregateZeroQueryPrediction(
const Segments &segments, vector<Result> *results) const {
const size_t history_size = segments.history_segments_size();
if (history_size <= 0) {
return false;
}

const Segment &last_segment = segments.history_segment(history_size - 1);
DCHECK_GT(last_segment.candidates_size(), 0);
const string &history_value = last_segment.candidate(0).value;

const char *key_item[] = {history_value.c_str(), 0};
const char **key = key_item;
// kZeroQueryData_data is a 2-dimensional string array and
// sorted by the first string.
// For each string array, the first item is a key for zero query prediction,
// the rest items are candidates, and the last item is 0.
const char ***result_rule =
lower_bound(
kZeroQueryData_data, kZeroQueryData_data + kZeroQueryData_size,
key, ZeroQueryRuleCompare());
if (result_rule == (kZeroQueryData_data + kZeroQueryData_size) ||
history_value != (*result_rule)[0]) {
return false;
}

int cost = 0;
for (int i = 1; (*result_rule)[i]; ++i) {
string candidate = (*result_rule)[i];

// Increment cost to show the candidates in order.
const int kPenalty = 10;

results->push_back(Result());
Result *result = &results->back();

result->SetTypesAndTokenAttributes(SUFFIX, Token::NONE);
result->key = candidate;
result->value = candidate;
result->wcost = cost;
result->lid = 0; // EOS
result->rid = 0; // EOS

cost += kPenalty;
}
return true;
}

void DictionaryPredictor::AggregateSuffixPrediction(
PredictionTypes types,
const ConversionRequest &request,
Expand All @@ -1630,30 +1718,10 @@ void DictionaryPredictor::AggregateSuffixPrediction(

const bool is_zero_query = segments.conversion_segment(0).key().empty();
if (is_zero_query) {
string number_key;
if (GetNumberHistory(segments, &number_key)) {
// Use number suffixes and do not add normal zero query.
vector<string> suffixes;
GetNumberSuffixArray(number_key, &suffixes);
DCHECK_GT(suffixes.size(), 0);
int cost = 0;

for (vector<string>::const_iterator it = suffixes.begin();
it != suffixes.end(); ++it) {
// Increment cost to show the candidates in order.
const int kSuffixPenalty = 10;

results->push_back(Result());
Result *result = &results->back();
result->SetTypesAndTokenAttributes(SUFFIX, Token::NONE);
result->key = *it;
result->value = *it;
result->wcost = cost;
result->lid = counter_suffix_word_id_;
result->rid = counter_suffix_word_id_;

cost += kSuffixPenalty;
}
if (AggregateNumberZeroQueryPrediction(segments, results)) {
return;
}
if (AggregateZeroQueryPrediction(segments, results)) {
return;
}
// Fall through
Expand Down
7 changes: 7 additions & 0 deletions src/prediction/dictionary_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@ class DictionaryPredictor : public PredictorInterface {
const Segments &segments,
vector<Result> *results) const;

bool AggregateNumberZeroQueryPrediction(const Segments &segments,
vector<Result> *results) const;

bool AggregateZeroQueryPrediction(const Segments &segments,
vector<Result> *result) const;

void ApplyPenaltyForKeyExpansion(const Segments &segments,
vector<Result> *results) const;

Expand All @@ -200,6 +206,7 @@ class DictionaryPredictor : public PredictorInterface {
FRIEND_TEST(DictionaryPredictorTest, AggregateSuffixPrediction);
FRIEND_TEST(DictionaryPredictorTest, ZeroQuerySuggestionAfterNumbers);
FRIEND_TEST(DictionaryPredictorTest, TriggerNumberZeroQuerySuggestion);
FRIEND_TEST(DictionaryPredictorTest, TriggerZeroQuerySuggestion);
FRIEND_TEST(DictionaryPredictorTest, GetHistoryKeyAndValue);
FRIEND_TEST(DictionaryPredictorTest, RealtimeConversionStartingWithAlphabets);
FRIEND_TEST(DictionaryPredictorTest, IsAggressiveSuggestion);
Expand Down
49 changes: 48 additions & 1 deletion src/prediction/dictionary_predictor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2097,7 +2097,54 @@ TEST_F(DictionaryPredictorTest, TriggerNumberZeroQuerySuggestion) {
break;
}
}
EXPECT_EQ(test_case.expected_result, found);
EXPECT_EQ(test_case.expected_result, found) << test_case.history_value;
}
}

TEST_F(DictionaryPredictorTest, TriggerZeroQuerySuggestion) {
scoped_ptr<MockDataAndPredictor> data_and_predictor(
CreateDictionaryPredictorWithMockData());
const DictionaryPredictor *predictor =
data_and_predictor->dictionary_predictor();
const ConversionRequest conversion_request;

const struct TestCase {
const char *history_key;
const char *history_value;
const char *find_value;
bool expected_result;
} kTestCases[] = {
{ "@", "@",
"gmail.com", true },
{ "!", "!",
"?", false },
};

for (size_t i = 0; i < arraysize(kTestCases); ++i) {
Segments segments;
MakeSegmentsForSuggestion("", &segments);

const TestCase &test_case = kTestCases[i];
PrependHistorySegments(
test_case.history_key, test_case.history_value, &segments);
vector<DictionaryPredictor::Result> results;
predictor->AggregateSuffixPrediction(
DictionaryPredictor::SUFFIX,
conversion_request, segments, &results);
EXPECT_FALSE(results.empty());

bool found = false;
for (vector<DictionaryPredictor::Result>::const_iterator it =
results.begin();
it != results.end(); ++it) {
EXPECT_EQ(it->types, DictionaryPredictor::SUFFIX);
if (it->value == test_case.find_value &&
it->lid == 0 /* EOS */) {
found = true;
break;
}
}
EXPECT_EQ(test_case.expected_result, found) << test_case.history_value;
}
}

Expand Down
Loading

0 comments on commit 988392a

Please sign in to comment.