Skip to content

Commit

Permalink
Change all the rest to use Segments iterators
Browse files Browse the repository at this point in the history
Some index-based code that are not simple to rewrite are left unchanged.

This patch should have no behavior changes.

PiperOrigin-RevId: 610149703
  • Loading branch information
kojiishi authored and hiroyuki-komatsu committed Feb 26, 2024
1 parent 8cb32c2 commit 3be4ef6
Show file tree
Hide file tree
Showing 11 changed files with 91 additions and 108 deletions.
9 changes: 6 additions & 3 deletions src/prediction/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ mozc_cc_library(
"//base:config_file_stream",
"//base:hash",
"//base:japanese_util",
"//base:logging",
"//base:thread",
"//base:util",
"//base:vlog",
Expand All @@ -98,6 +97,8 @@ mozc_cc_library(
"//usage_stats",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/hash",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/time",
],
Expand Down Expand Up @@ -235,7 +236,6 @@ mozc_cc_library(
":single_kanji_prediction_aggregator",
":zero_query_dict",
"//base:japanese_util",
"//base:logging",
"//base:number_util",
"//base:util",
"//base:vlog",
Expand All @@ -255,6 +255,8 @@ mozc_cc_library(
"//request:conversion_request",
"//request:request_util",
"//transliteration",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:span",
],
Expand Down Expand Up @@ -406,7 +408,6 @@ mozc_cc_library(
hdrs = ["predictor.h"],
deps = [
":predictor_interface",
"//base:logging",
"//base:util",
"//converter:converter_interface",
"//converter:segments",
Expand All @@ -415,6 +416,8 @@ mozc_cc_library(
"//request:conversion_request",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
Expand Down
12 changes: 6 additions & 6 deletions src/prediction/dictionary_prediction_aggregator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,14 @@
#include <utility>
#include <vector>

#include "ngram/neg_log_prob.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "base/japanese_util.h"
#include "base/logging.h"
#include "base/number_util.h"
#include "base/strings/unicode.h"
#include "base/util.h"
Expand Down Expand Up @@ -833,7 +835,7 @@ size_t DictionaryPredictionAggregator::GetRealtimeCandidateMaxSize(
size = mixed_conversion ? max_size : default_size;
break;
case ConversionRequest::SUGGESTION:
// Fewer candidatats are needed basically.
// Fewer candidates are needed basically.
// But on mixed_conversion mode we should behave like as conversion mode.
size = mixed_conversion ? default_size : 1;
break;
Expand Down Expand Up @@ -891,8 +893,7 @@ bool DictionaryPredictionAggregator::PushBackTopConversionResult(
// TODO(noriyukit): This is code duplicate in converter/nbest_generator.cc and
// we should refactor code after finding more good design.
bool inner_segment_boundary_success = true;
for (size_t i = 0; i < tmp_segments.conversion_segments_size(); ++i) {
const Segment &segment = tmp_segments.conversion_segment(i);
for (const Segment &segment : tmp_segments.conversion_segments()) {
const Segment::Candidate &candidate = segment.candidate(0);
result->value.append(candidate.value);
result->key.append(candidate.key);
Expand Down Expand Up @@ -1037,8 +1038,7 @@ DictionaryPredictionAggregator::GenerateQueryForHandwriting(
return std::nullopt;
}
HandwritingQueryInfo info;
for (size_t i = 0; i < tmp_segments.conversion_segments_size(); ++i) {
const Segment &segment = tmp_segments.conversion_segment(i);
for (const Segment &segment : tmp_segments.conversion_segments()) {
if (segment.candidates_size() == 0) {
LOG(WARNING) << "Reverse conversion failed";
return std::nullopt;
Expand Down
11 changes: 6 additions & 5 deletions src/prediction/predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@
#include <utility>

#include "absl/container/flat_hash_map.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "base/logging.h"
#include "base/util.h"
#include "converter/converter_interface.h"
#include "converter/segments.h"
#include "prediction/predictor_interface.h"
#include "protocol/commands.pb.h"
Expand Down Expand Up @@ -313,11 +315,10 @@ ConversionRequest MobilePredictor::GetRequestForPredict(
namespace {
// Fills empty lid and rid of candidates with the candidates of the same value.
void MaybeFillFallbackPos(Segments *segments) {
for (size_t si = 0; si < segments->conversion_segments_size(); ++si) {
for (Segment &segment : segments->conversion_segments()) {
absl::flat_hash_map<absl::string_view, Segment::Candidate *> posless_cands;
Segment *seg = segments->mutable_conversion_segment(si);
for (size_t ci = 0; ci < seg->candidates_size(); ++ci) {
Segment::Candidate *cand = seg->mutable_candidate(ci);
for (size_t ci = 0; ci < segment.candidates_size(); ++ci) {
Segment::Candidate *cand = segment.mutable_candidate(ci);
// Candidates with empty POS come before candidates with filled POS.
if (cand->lid == 0 || cand->rid == 0) {
posless_cands[cand->value] = cand;
Expand Down
40 changes: 14 additions & 26 deletions src/prediction/user_history_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@

#include "absl/container/flat_hash_set.h"
#include "absl/hash/hash.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/ascii.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
Expand All @@ -58,14 +60,12 @@
#include "base/container/trie.h"
#include "base/hash.h"
#include "base/japanese_util.h"
#include "base/logging.h"
#include "base/protobuf/message.h"
#include "base/thread.h"
#include "base/util.h"
#include "base/vlog.h"
#include "composer/composer.h"
#include "converter/segments.h"
#include "dictionary/dictionary_interface.h"
#include "dictionary/pos_matcher.h"
#include "dictionary/suppression_dictionary.h"
#include "engine/modules.h"
Expand All @@ -81,9 +81,6 @@
namespace mozc::prediction {
namespace {

using ::mozc::dictionary::DictionaryInterface;
using ::mozc::dictionary::PosMatcher;
using ::mozc::dictionary::SuppressionDictionary;
using ::mozc::usage_stats::UsageStats;

// Finds suggestion candidates from the most recent 3000 history in LRU.
Expand Down Expand Up @@ -1211,17 +1208,17 @@ bool UserHistoryPredictor::ShouldPredict(RequestType request_type,

const UserHistoryPredictor::Entry *UserHistoryPredictor::LookupPrevEntry(
const Segments &segments) const {
const size_t history_segments_size = segments.history_segments_size();
const Segments::Range<Segments::const_iterator> history_segments =
segments.history_segments();
const Entry *prev_entry = nullptr;
// When there are non-zero history segments, lookup an entry
// from the LRU dictionary, which is corresponding to the last
// history segment.
if (history_segments_size == 0) {
if (history_segments.empty()) {
return nullptr;
}

const Segment &history_segment =
segments.history_segment(history_segments_size - 1);
const Segment &history_segment = history_segments.back();

// Simply lookup the history_segment.
prev_entry = dic_->LookupWithoutInsert(SegmentFingerprint(history_segment));
Expand Down Expand Up @@ -1560,7 +1557,7 @@ bool UserHistoryPredictor::ShouldInsert(
return false;
}

// For mobile, we do not learn candidates that ends with puctuation.
// For mobile, we do not learn candidates that ends with punctuation.
if (request_type == ZERO_QUERY_SUGGESTION && Util::CharsLen(value) > 1 &&
IsPunctuation(Util::Utf8SubString(value, Util::CharsLen(value) - 1, 1))) {
return false;
Expand Down Expand Up @@ -1720,18 +1717,14 @@ void UserHistoryPredictor::Finish(const ConversionRequest &request,
segments->conversion_segment(0).candidates_size() > 0 &&
IsPunctuation(segments->conversion_segment(0).candidate(0).value) &&
// Check if the previous value looks like a sentence.
segments->history_segments_size() > 0 &&
segments->history_segment(segments->history_segments_size() - 1)
.candidates_size() > 0 &&
!segments->history_segments().empty() &&
segments->history_segments().back().candidates_size() > 0 &&
IsSentenceLikeCandidate(
segments->history_segment(segments->history_segments_size() - 1)
.candidate(0))) {
segments->history_segments().back().candidate(0))) {
const Entry *entry = &(dic_->Head()->value);
DCHECK(entry);
const std::string &last_value =
segments->history_segment(segments->history_segments_size() - 1)
.candidate(0)
.value;
segments->history_segments().back().candidate(0).value;
// Check if the head value in LRU ends with the candidate value in history
// segments.
if (absl::EndsWith(entry->value(), last_value)) {
Expand All @@ -1746,11 +1739,8 @@ void UserHistoryPredictor::Finish(const ConversionRequest &request,
}
}

const size_t history_segments_size = segments->history_segments_size();

// Checks every segment is valid.
for (size_t i = history_segments_size; i < segments->segments_size(); ++i) {
const Segment &segment = segments->segment(i);
for (const Segment &segment : segments->conversion_segments()) {
if (segment.candidates_size() < 1) {
MOZC_VLOG(2) << "candidates size < 1";
return;
Expand Down Expand Up @@ -1778,8 +1768,7 @@ void UserHistoryPredictor::MakeLearningSegments(
const Segments &segments, SegmentsForLearning *learning_segments) const {
DCHECK(learning_segments);

for (size_t i = 0; i < segments.history_segments_size(); ++i) {
const Segment &segment = segments.history_segment(i);
for (const Segment &segment : segments.history_segments()) {
DCHECK_LE(1, segment.candidates_size());
auto &candidate = segment.candidate(0);
learning_segments->history_segments.push_back(
Expand All @@ -1788,8 +1777,7 @@ void UserHistoryPredictor::MakeLearningSegments(
}

std::string all_key, all_value;
for (size_t i = 0; i < segments.conversion_segments_size(); ++i) {
const Segment &segment = segments.conversion_segment(i);
for (const Segment &segment : segments.conversion_segments()) {
const Segment::Candidate &candidate = segment.candidate(0);
absl::StrAppend(&all_key, candidate.key);
absl::StrAppend(&all_value, candidate.value);
Expand Down
2 changes: 1 addition & 1 deletion src/rewriter/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -1049,14 +1049,14 @@ mozc_cc_library(
deps = [
":number_compound_util",
":rewriter_interface",
"//base:logging",
"//base:number_util",
"//base:util",
"//base:vlog",
"//base/container:serialized_string_array",
"//converter:segments",
"//data_manager:data_manager_interface",
"//dictionary:pos_matcher",
"@com_google_absl//absl/log",
"@com_google_absl//absl/strings",
],
)
Expand Down
43 changes: 19 additions & 24 deletions src/rewriter/focus_candidate_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
#include <cstdint>
#include <string>

#include "absl/log/log.h"
#include "absl/strings/string_view.h"
#include "base/container/serialized_string_array.h"
#include "base/logging.h"
#include "base/number_util.h"
#include "base/util.h"
#include "base/vlog.h"
Expand Down Expand Up @@ -244,20 +244,19 @@ bool FocusCandidateRewriter::Focus(Segments *segments, size_t segment_index,
if (IsNumberCandidate(seg.candidate(candidate_index))) {
bool modified = false;
int distance = 0;
for (size_t i = segment_index + 1; i < segments->segments_size(); ++i) {
Segment *target_right_seg = segments->mutable_segment(i);
if (target_right_seg == nullptr ||
target_right_seg->candidates_size() <= 0) {
for (Segment &target_right_seg :
segments->all().drop(segment_index + 1)) {
if (target_right_seg.candidates_size() <= 0) {
LOG(WARNING) << "target right seg is not valid";
return false;
}
if (!IsValidSegment(*target_right_seg)) {
if (!IsValidSegment(target_right_seg)) {
continue;
}

// Make sure the first candidate of the segment is number.
if (IsNumberSegment(*target_right_seg) &&
RewriteNumber(target_right_seg, seg.candidate(candidate_index))) {
if (IsNumberSegment(target_right_seg) &&
RewriteNumber(&target_right_seg, seg.candidate(candidate_index))) {
modified = true;
distance = 0;
} else {
Expand All @@ -281,28 +280,25 @@ bool FocusCandidateRewriter::Focus(Segments *segments, size_t segment_index,
seg.candidate(candidate_index).content_key) {
int next_stat = CONNECTOR | NUMBER;
bool modified = false;
for (size_t i = segment_index + 1; i < segments->segments_size(); ++i) {
for (Segment &segment : segments->all().drop(segment_index + 1)) {
if (next_stat == (CONNECTOR | NUMBER)) {
if (IsConnectorSegment(segments->segment(i))) {
if (IsConnectorSegment(segment)) {
next_stat = NUMBER;
} else if (IsNumberSegment(segments->segment(i))) {
} else if (IsNumberSegment(segment)) {
next_stat = SUFFIX;
} else {
break;
}
} else if (next_stat == NUMBER &&
IsNumberSegment(segments->segment(i))) {
} else if (next_stat == NUMBER && IsNumberSegment(segment)) {
next_stat = SUFFIX;
} else if (next_stat == SUFFIX &&
segments->segment(i).candidates_size() > 0 &&
segments->segment(i).candidate(0).content_key ==
} else if (next_stat == SUFFIX && segment.candidates_size() > 0 &&
segment.candidate(0).content_key ==
seg.candidate(0).content_key) {
if (!IsValidSegment(segments->segment(i))) {
if (!IsValidSegment(segment)) {
continue;
}
modified |=
RewriteCandidate(segments->mutable_segment(i),
seg.candidate(candidate_index).content_value);
modified |= RewriteCandidate(
&segment, seg.candidate(candidate_index).content_value);
next_stat = CONNECTOR | NUMBER;
} else {
break;
Expand Down Expand Up @@ -333,9 +329,8 @@ bool FocusCandidateRewriter::RerankNumberCandidates(Segments *segments,
// compound style of the focused candidate.
bool modified = false;
int distance = 0;
for (size_t i = segment_index + 1; i < segments->segments_size(); ++i) {
Segment *seg = segments->mutable_segment(i);
const int index = FindMatchingCandidates(*seg, number_script_type, suffix);
for (Segment &seg : segments->all().drop(segment_index + 1)) {
const int index = FindMatchingCandidates(seg, number_script_type, suffix);
if (index == -1) {
// If there's no appropriate candidate having the same style, we increment
// the distance not to modify segments far from the focused one.
Expand All @@ -347,7 +342,7 @@ bool FocusCandidateRewriter::RerankNumberCandidates(Segments *segments,
// Move the target candidate to the top. We don't need to move if the
// target is already at top (i.g., the case where index == 0).
if (index > 0) {
seg->move_candidate(index, 0);
seg.move_candidate(index, 0);
modified = true;
distance = 0;
}
Expand Down
5 changes: 3 additions & 2 deletions src/session/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ mozc_cc_library(
deps = [
":session_converter_interface",
":session_usage_stats_util",
"//base:logging",
"//base:text_normalizer",
"//base:util",
"//base:vlog",
Expand All @@ -80,6 +79,8 @@ mozc_cc_library(
"//transliteration",
"//usage_stats",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
],
)
Expand All @@ -96,7 +97,6 @@ mozc_cc_test(
":request_test_util",
":session_converter",
":session_converter_interface",
"//base:logging",
"//base:util",
"//composer",
"//composer:table",
Expand All @@ -115,6 +115,7 @@ mozc_cc_test(
"//transliteration",
"//usage_stats",
"//usage_stats:usage_stats_testing_util",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
],
)
Expand Down
Loading

0 comments on commit 3be4ef6

Please sign in to comment.