Skip to content

Commit

Permalink
Fix problem that NO_POS is not working in Android Gboard
Browse files Browse the repository at this point in the history
- The cause of the bug was that while user_pos_*.data is in the dictionary side, Android Gboard did not have an updated version of dictionary data. As dictionary is separate from engine, there is no guarantee that '品詞なし' user pos is available in the specific dictionary. To avoid such invalid behavior, I added special treatment for NO_POS in user_dictionary Load function. By using this implementation they will work as expected even if there is no corresponding version of dictionary.
- I fixed some potential source of bugs, including
  - non-tab separator in user_pos.def
  - additional half-space in user_pos.def
  - mistake in the start index of pos list.

PiperOrigin-RevId: 575072274
  • Loading branch information
ensan-hcl authored and coooooooozy committed Nov 25, 2023
1 parent beb0cd0 commit b0c57fb
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 12 deletions.
6 changes: 3 additions & 3 deletions src/data/rules/user_pos.def
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,18 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

品詞なし NO_POS * 名詞,サ変接続,*,*,*,*
品詞なし NO_POS * 名詞,サ変接続,*,*,*,*
名詞 NOUN * 名詞,一般,*,*,*,*
短縮よみ ABBREVIATION * 特殊,短縮よみ
サジェストのみ SUGGESTION_ONLY * 特殊,サジェストのみ
サジェストのみ SUGGESTION_ONLY * 特殊,サジェストのみ
固有名詞 PROPER_NOUN * 名詞,固有名詞,一般,*,*,*
人名 PERSONAL_NAME * 名詞,固有名詞,人名,一般,*,*
姓 FAMILY_NAME * 名詞,固有名詞,人名,姓,*,*
名 FIRST_NAME * 名詞,固有名詞,人名,名,*,*
組織 ORGANIZATION_NAME * 名詞,固有名詞,組織,*,*,*
地名 PLACE_NAME * 名詞,固有名詞,地域,一般,*,*
名詞サ変 SA_IRREGULAR_CONJUGATION_NOUN * 名詞,サ変接続,*,*,*,*
名詞形動 ADJECTIVE_VERBAL_NOUN * 名詞,形容動詞語幹,*,*,*,*
名詞形動 ADJECTIVE_VERBAL_NOUN * 名詞,形容動詞語幹,*,*,*,*
数 NUMBER * 名詞,数,アラビア数字
アルファベット ALPHABET * 記号,アルファベット,*,*,*,*
記号 SYMBOL * 記号,一般,*,*,*,*
Expand Down
19 changes: 18 additions & 1 deletion src/dictionary/user_dictionary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,26 @@ class UserDictionary::TokensIndex {
continue;
}

// "抑制単語"
if (entry.pos() == user_dictionary::UserDictionary::SUPPRESSION_WORD) {
// "抑制単語"
suppression_dictionary_->AddEntry(reading, entry.value());
} else if (entry.pos() == user_dictionary::UserDictionary::NO_POS) {
// In theory NO_POS works without this implementation, as it is
// covered in the UserPos::GetTokens function. However, that function
// is depending on the user_pos_*.data in the dictionary and there
// will not be corresponding POS tag. To avoid invalid behavior, this
// special treatment is added here.
// "品詞なし"
const absl::string_view comment =
absl::StripAsciiWhitespace(entry.comment());
UserPos::Token token = {};
strings::Assign(token.key, entry.key());
strings::Assign(token.value, entry.value());
// NO_POS has '名詞サ変' id as in user_pos.def
user_pos_->GetPosIds("名詞サ変", &token.id);
strings::Assign(token.comment, comment);
token.attributes = UserPos::Token::SHORTCUT;
user_pos_tokens_.push_back(std::move(token));
} else {
tokens.clear();
user_pos_->GetTokens(
Expand Down
4 changes: 2 additions & 2 deletions src/dictionary/user_dictionary_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -617,8 +617,8 @@ TEST_F(UserDictionaryTest, TestLookupWithShortCut) {

EXPECT_TRUE(TestLookupExactHelper(kExpected2, std::size(kExpected2), "key", 3,
*user_dic));
TestLookupPredictiveHelper(kExpectedPrediction,
std::size(kExpectedPrediction), "ke", *user_dic);
EXPECT_TRUE(TestLookupPredictiveHelper(kExpectedPrediction,
std::size(kExpectedPrediction), "ke", *user_dic));
EXPECT_TRUE(TestLookupPrefixHelper(kExpected2, std::size(kExpected2),
"keykey", 3, *user_dic));
}
Expand Down
5 changes: 1 addition & 4 deletions src/dictionary/user_dictionary_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,6 @@ UserDictionaryCommandStatus::Status UserDictionaryUtil::ValidateDictionaryName(
namespace {
// The index of each element should be matched with the actual value of enum.
// See also user_dictionary_storage.proto for the definition of the enum.
// Note that the '0' is invalid in the definition, so the corresponding
// element is nullptr.
const char *kPosTypeStringTable[] = {
"品詞なし", "名詞", "短縮よみ", "サジェストのみ",
"固有名詞", "人名", "", "",
Expand Down Expand Up @@ -332,8 +330,7 @@ const char *UserDictionaryUtil::GetStringPosType(

user_dictionary::UserDictionary::PosType UserDictionaryUtil::ToPosType(
const char *string_pos_type) {
// Skip the element at 0.
for (int i = 1; i < std::size(kPosTypeStringTable); ++i) {
for (int i = 0; i < std::size(kPosTypeStringTable); ++i) {
if (strcmp(kPosTypeStringTable[i], string_pos_type) == 0) {
return static_cast<user_dictionary::UserDictionary::PosType>(i);
}
Expand Down
31 changes: 31 additions & 0 deletions src/dictionary/user_dictionary_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

#include "dictionary/user_dictionary_util.h"

#include <string.h>

#include <cstdint>
#include <string>

Expand Down Expand Up @@ -332,6 +334,35 @@ TEST(UserDictionaryUtilTest, GetUserDictionaryIndexById) {
EXPECT_EQ(UserDictionaryUtil::GetUserDictionaryIndexById(storage, -1), -1);
}

TEST(UserDictionaryUtilTest, ToPosType) {
EXPECT_TRUE(UserDictionaryUtil::ToPosType("品詞なし") ==
UserDictionary::NO_POS);
EXPECT_TRUE(UserDictionaryUtil::ToPosType("サジェストのみ") ==
UserDictionary::SUGGESTION_ONLY);
EXPECT_TRUE(UserDictionaryUtil::ToPosType("動詞ワ行五段") ==
UserDictionary::WA_GROUP1_VERB);
EXPECT_TRUE(UserDictionaryUtil::ToPosType("抑制単語") ==
UserDictionary::SUPPRESSION_WORD);
}

TEST(UserDictionaryUtilTest, GetStringPosType) {
EXPECT_EQ(strcmp(UserDictionaryUtil::GetStringPosType(UserDictionary::NO_POS),
"品詞なし"),
0);
EXPECT_EQ(strcmp(UserDictionaryUtil::GetStringPosType(
UserDictionary::SUGGESTION_ONLY),
"サジェストのみ"),
0);
EXPECT_EQ(strcmp(UserDictionaryUtil::GetStringPosType(
UserDictionary::WA_GROUP1_VERB),
"動詞ワ行五段"),
0);
EXPECT_EQ(strcmp(UserDictionaryUtil::GetStringPosType(
UserDictionary::SUPPRESSION_WORD),
"抑制単語"),
0);
}

TEST(UserDictionaryUtilTest, CreateDictionary) {
user_dictionary::UserDictionaryStorage storage;
uint64_t dictionary_id;
Expand Down
13 changes: 12 additions & 1 deletion src/dictionary/user_pos_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "dictionary/user_pos.h"

#include <algorithm>
#include <cstdint>
#include <memory>
#include <string>
Expand Down Expand Up @@ -65,7 +66,17 @@ TEST_F(UserPosTest, UserPosBasicTest) {
std::vector<std::string> pos_list;
user_pos_->GetPosList(&pos_list);
EXPECT_FALSE(pos_list.empty());

// test contains
EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "名詞サ変") !=
pos_list.end());
EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "サジェストのみ") !=
pos_list.end());
EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "短縮よみ") !=
pos_list.end());
EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "抑制単語") !=
pos_list.end());
EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "品詞なし") !=
pos_list.end());
uint16_t id = 0;
for (size_t i = 0; i < pos_list.size(); ++i) {
EXPECT_TRUE(user_pos_->IsValidPos(pos_list[i]));
Expand Down
5 changes: 4 additions & 1 deletion src/protocol/user_dictionary_storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ option java_outer_classname = "ProtoUserDictionaryStorage";
option java_package = "org.mozc.android.inputmethod.japanese.protobuf";

message UserDictionary {
// LINT.IfChange
enum PosType {
// Use this when the POS tag is not provided or unknown.
NO_POS = 0; // "品詞なし"
Expand Down Expand Up @@ -89,7 +90,9 @@ message UserDictionary {

SUPPRESSION_WORD = 44; // "抑制単語"
}

// LINT.ThenChange(
// //dictionary/user_dictionary_util.cc
// )
// ID of this dictionary
optional uint64 id = 1 [default = 0, jstype = JS_STRING];

Expand Down

0 comments on commit b0c57fb

Please sign in to comment.