Skip to content

Commit

Permalink
Support inner segment info.
Browse files Browse the repository at this point in the history
Candidates with inner segment info can be generated by mobile prediction.

PiperOrigin-RevId: 581895193
  • Loading branch information
Toshiyuki Hanaoka authored and hiroyuki-komatsu committed Nov 13, 2023
1 parent 96e76ef commit 3330302
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 8 deletions.
1 change: 1 addition & 0 deletions src/converter/segments.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ class Segment final {
absl::string_view GetContentValue() const;
absl::string_view GetFunctionalKey() const;
absl::string_view GetFunctionalValue() const;
size_t GetIndex() const { return index_; }

private:
const Candidate *candidate_;
Expand Down
56 changes: 48 additions & 8 deletions src/rewriter/user_segment_history_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -607,10 +607,6 @@ void UserSegmentHistoryRewriter::RememberNumberPreference(
void UserSegmentHistoryRewriter::RememberFirstCandidate(
const Segments &segments, size_t segment_index) {
const Segment &seg = segments.segment(segment_index);
if (seg.candidates_size() <= 1) {
return;
}

const Segment::Candidate &candidate = seg.candidate(0);

// http://b/issue?id=3156109
Expand Down Expand Up @@ -710,6 +706,49 @@ bool UserSegmentHistoryRewriter::IsAvailable(const ConversionRequest &request,
return true;
}

// Returns segments for learning.
// Inner segments boundary will be expanded.
Segments UserSegmentHistoryRewriter::MakeLearningSegmentsForTesting(
const Segments &segments) {
Segments ret;
for (size_t i = 0; i < segments.segments_size(); ++i) {
const Segment &segment = segments.segment(i);
const Segment::Candidate &candidate = segment.candidate(0);
if (candidate.inner_segment_boundary.size() <= 1) {
// No inner segment info
Segment *seg = ret.add_segment();
*seg = segment;
continue;
}
for (Segment::Candidate::InnerSegmentIterator iter(&candidate);
!iter.Done(); iter.Next()) {
size_t index = iter.GetIndex();
absl::string_view key = iter.GetKey();

Segment *seg = ret.add_segment();
seg->set_segment_type(segment.segment_type());
seg->set_key(key);
seg->clear_candidates();

Segment::Candidate *cand = seg->add_candidate();
cand->attributes = candidate.attributes;
cand->key = key;
cand->content_key = iter.GetContentKey();
cand->value = iter.GetValue();
cand->content_value = iter.GetContentValue();
// Fill IDs for the first and last inner segment.
if (index == 0) {
cand->lid = candidate.lid;
cand->rid = candidate.lid;
} else if (index == candidate.inner_segment_boundary.size() - 1) {
cand->lid = candidate.rid;
cand->rid = candidate.rid;
}
}
}
return ret;
}

void UserSegmentHistoryRewriter::Finish(const ConversionRequest &request,
Segments *segments) {
if (request.request_type() != ConversionRequest::CONVERSION) {
Expand All @@ -725,9 +764,10 @@ void UserSegmentHistoryRewriter::Finish(const ConversionRequest &request,
return;
}

for (size_t i = segments->history_segments_size();
i < segments->segments_size(); ++i) {
const Segment &segment = segments->segment(i);
const Segments target_segments = MakeLearningSegmentsForTesting(*segments);
for (size_t i = target_segments.history_segments_size();
i < target_segments.segments_size(); ++i) {
const Segment &segment = target_segments.segment(i);
if (segment.candidates_size() <= 0 ||
segment.segment_type() != Segment::FIXED_VALUE ||
segment.candidate(0).attributes &
Expand All @@ -739,7 +779,7 @@ void UserSegmentHistoryRewriter::Finish(const ConversionRequest &request,
continue;
}
InsertTriggerKey(segment);
RememberFirstCandidate(*segments, i);
RememberFirstCandidate(target_segments, i);
}
// update usage stats here
usage_stats::UsageStats::SetInteger("UserSegmentHistoryEntrySize",
Expand Down
2 changes: 2 additions & 0 deletions src/rewriter/user_segment_history_rewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class UserSegmentHistoryRewriter : public RewriterInterface {
bool Reload() override;
void Clear() override;

static Segments MakeLearningSegmentsForTesting(const Segments &segments);

private:
struct Score {
constexpr void Update(const Score other) {
Expand Down
98 changes: 98 additions & 0 deletions src/rewriter/user_segment_history_rewriter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "rewriter/user_segment_history_rewriter.h"

#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
Expand Down Expand Up @@ -1661,5 +1662,102 @@ TEST_F(UserSegmentHistoryRewriterTest, AnnotationAfterLearning) {
}
}

TEST_F(UserSegmentHistoryRewriterTest, SupportInnerSegmentsOnLearning) {
Segments segments;
std::unique_ptr<UserSegmentHistoryRewriter> rewriter(
CreateUserSegmentHistoryRewriter());

{
segments.Clear();
InitSegments(&segments, 1, 2);
constexpr absl::string_view kKey = "わたしのなまえはなかのです";
constexpr absl::string_view kValue = "私の名前は中野です";
segments.mutable_segment(0)->set_key(kKey);
Segment::Candidate *candidate =
segments.mutable_segment(0)->mutable_candidate(1);

candidate->value = kValue;
candidate->content_value = kValue;
candidate->key = kKey;
candidate->content_key = kKey;
// "わたしの, 私の", "わたし, 私"
candidate->PushBackInnerSegmentBoundary(12, 6, 9, 3);
// "なまえは, 名前は", "なまえ, 名前"
candidate->PushBackInnerSegmentBoundary(12, 9, 9, 6);
// "なかのです, 中野です", "なかの, 中野"
candidate->PushBackInnerSegmentBoundary(15, 12, 9, 6);
candidate->lid = 10;
candidate->rid = 20;

segments.mutable_segment(0)->move_candidate(1, 0);
segments.mutable_segment(0)->mutable_candidate(0)->attributes |=
Segment::Candidate::RERANKED;
segments.mutable_segment(0)->set_segment_type(Segment::FIXED_VALUE);

{
const Segments learning_segments =
UserSegmentHistoryRewriter::MakeLearningSegmentsForTesting(segments);
EXPECT_EQ(learning_segments.segments_size(), 3);
EXPECT_EQ(learning_segments.segment(0).key(), "わたしの");
EXPECT_EQ(learning_segments.segment(0).candidate(0).key, "わたしの");
EXPECT_EQ(learning_segments.segment(0).candidate(0).value, "私の");
EXPECT_EQ(learning_segments.segment(0).candidate(0).content_key,
"わたし");
EXPECT_EQ(learning_segments.segment(0).candidate(0).content_value, "");
EXPECT_EQ(learning_segments.segment(0).candidate(0).lid, 10);
EXPECT_EQ(learning_segments.segment(0).candidate(0).rid, 10);
EXPECT_EQ(learning_segments.segment(0).segment_type(),
Segment::FIXED_VALUE);

EXPECT_EQ(learning_segments.segment(1).key(), "なまえは");
EXPECT_EQ(learning_segments.segment(1).candidate(0).key, "なまえは");
EXPECT_EQ(learning_segments.segment(1).candidate(0).value, "名前は");
EXPECT_EQ(learning_segments.segment(1).candidate(0).content_key,
"なまえ");
EXPECT_EQ(learning_segments.segment(1).candidate(0).content_value,
"名前");
EXPECT_EQ(learning_segments.segment(1).candidate(0).lid, 0);
EXPECT_EQ(learning_segments.segment(1).candidate(0).rid, 0);
EXPECT_EQ(learning_segments.segment(1).segment_type(),
Segment::FIXED_VALUE);

EXPECT_EQ(learning_segments.segment(2).key(), "なかのです");
EXPECT_EQ(learning_segments.segment(2).candidate(0).key, "なかのです");
EXPECT_EQ(learning_segments.segment(2).candidate(0).value, "中野です");
EXPECT_EQ(learning_segments.segment(2).candidate(0).content_key,
"なかの");
EXPECT_EQ(learning_segments.segment(2).candidate(0).content_value,
"中野");
EXPECT_EQ(learning_segments.segment(2).candidate(0).lid, 20);
EXPECT_EQ(learning_segments.segment(2).candidate(0).rid, 20);
EXPECT_EQ(learning_segments.segment(2).segment_type(),
Segment::FIXED_VALUE);
}

rewriter->Finish(request_, &segments);
}

{
segments.Clear();
InitSegments(&segments, 1, 2);
segments.mutable_segment(0)->set_key("なかの");
Segment::Candidate *candidate =
segments.mutable_segment(0)->mutable_candidate(0);
candidate->value = "中埜";
candidate->content_value = "中埜";
candidate->content_key = "なかの";
candidate->content_key = "なかの";

candidate = segments.mutable_segment(0)->mutable_candidate(1);
candidate->value = "中野";
candidate->content_value = "中野";
candidate->content_key = "なかの";
candidate->content_key = "なかの";

EXPECT_TRUE(rewriter->Rewrite(request_, &segments));
EXPECT_EQ(segments.segment(0).candidate(0).value, "中野");
}
}

} // namespace
} // namespace mozc

0 comments on commit 3330302

Please sign in to comment.