From b0c57fb72217795ae06f2b96844a9514adc3cc50 Mon Sep 17 00:00:00 2001 From: Keita Miwa Date: Fri, 20 Oct 2023 02:32:22 +0000 Subject: [PATCH] Fix problem that NO_POS is not working in Android Gboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - The cause of the bug was that while user_pos_*.data is in the dictionary side, Android Gboard did not have an updated version of dictionary data. As dictionary is separate from engine, there is no guarantee that '品詞なし' user pos is available in the specific dictionary. To avoid such invalid behavior, I added special treatment for NO_POS in user_dictionary Load function. By using this implementation they will work as expected even if there is no corresponding version of dictionary. - I fixed some potential source of bugs, including - non-tab separator in user_pos.def - additional half-space in user_pos.def - mistake in the start index of pos list. PiperOrigin-RevId: 575072274 --- src/data/rules/user_pos.def | 6 ++-- src/dictionary/user_dictionary.cc | 19 ++++++++++++- src/dictionary/user_dictionary_test.cc | 4 +-- src/dictionary/user_dictionary_util.cc | 5 +--- src/dictionary/user_dictionary_util_test.cc | 31 +++++++++++++++++++++ src/dictionary/user_pos_test.cc | 13 ++++++++- src/protocol/user_dictionary_storage.proto | 5 +++- 7 files changed, 71 insertions(+), 12 deletions(-) diff --git a/src/data/rules/user_pos.def b/src/data/rules/user_pos.def index 2f3033f37f..c95d199e39 100644 --- a/src/data/rules/user_pos.def +++ b/src/data/rules/user_pos.def @@ -27,10 +27,10 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -品詞なし NO_POS * 名詞,サ変接続,*,*,*,* +品詞なし NO_POS * 名詞,サ変接続,*,*,*,* 名詞 NOUN * 名詞,一般,*,*,*,* 短縮よみ ABBREVIATION * 特殊,短縮よみ -サジェストのみ SUGGESTION_ONLY * 特殊,サジェストのみ +サジェストのみ SUGGESTION_ONLY * 特殊,サジェストのみ 固有名詞 PROPER_NOUN * 名詞,固有名詞,一般,*,*,* 人名 PERSONAL_NAME * 名詞,固有名詞,人名,一般,*,* 姓 FAMILY_NAME * 名詞,固有名詞,人名,姓,*,* @@ -38,7 +38,7 @@ 組織 ORGANIZATION_NAME * 名詞,固有名詞,組織,*,*,* 地名 PLACE_NAME * 名詞,固有名詞,地域,一般,*,* 名詞サ変 SA_IRREGULAR_CONJUGATION_NOUN * 名詞,サ変接続,*,*,*,* -名詞形動 ADJECTIVE_VERBAL_NOUN * 名詞,形容動詞語幹,*,*,*,* +名詞形動 ADJECTIVE_VERBAL_NOUN * 名詞,形容動詞語幹,*,*,*,* 数 NUMBER * 名詞,数,アラビア数字 アルファベット ALPHABET * 記号,アルファベット,*,*,*,* 記号 SYMBOL * 記号,一般,*,*,*,* diff --git a/src/dictionary/user_dictionary.cc b/src/dictionary/user_dictionary.cc index f43ac5b8bd..54fbf7035d 100644 --- a/src/dictionary/user_dictionary.cc +++ b/src/dictionary/user_dictionary.cc @@ -185,9 +185,26 @@ class UserDictionary::TokensIndex { continue; } - // "抑制単語" if (entry.pos() == user_dictionary::UserDictionary::SUPPRESSION_WORD) { + // "抑制単語" suppression_dictionary_->AddEntry(reading, entry.value()); + } else if (entry.pos() == user_dictionary::UserDictionary::NO_POS) { + // In theory NO_POS works without this implementation, as it is + // covered in the UserPos::GetTokens function. However, that function + // is depending on the user_pos_*.data in the dictionary and there + // will not be corresponding POS tag. To avoid invalid behavior, this + // special treatment is added here. + // "品詞なし" + const absl::string_view comment = + absl::StripAsciiWhitespace(entry.comment()); + UserPos::Token token = {}; + strings::Assign(token.key, entry.key()); + strings::Assign(token.value, entry.value()); + // NO_POS has '名詞サ変' id as in user_pos.def + user_pos_->GetPosIds("名詞サ変", &token.id); + strings::Assign(token.comment, comment); + token.attributes = UserPos::Token::SHORTCUT; + user_pos_tokens_.push_back(std::move(token)); } else { tokens.clear(); user_pos_->GetTokens( diff --git a/src/dictionary/user_dictionary_test.cc b/src/dictionary/user_dictionary_test.cc index 591e53efc3..3759c5310a 100644 --- a/src/dictionary/user_dictionary_test.cc +++ b/src/dictionary/user_dictionary_test.cc @@ -617,8 +617,8 @@ TEST_F(UserDictionaryTest, TestLookupWithShortCut) { EXPECT_TRUE(TestLookupExactHelper(kExpected2, std::size(kExpected2), "key", 3, *user_dic)); - TestLookupPredictiveHelper(kExpectedPrediction, - std::size(kExpectedPrediction), "ke", *user_dic); + EXPECT_TRUE(TestLookupPredictiveHelper(kExpectedPrediction, + std::size(kExpectedPrediction), "ke", *user_dic)); EXPECT_TRUE(TestLookupPrefixHelper(kExpected2, std::size(kExpected2), "keykey", 3, *user_dic)); } diff --git a/src/dictionary/user_dictionary_util.cc b/src/dictionary/user_dictionary_util.cc index fc6256df27..1fcaf63a74 100644 --- a/src/dictionary/user_dictionary_util.cc +++ b/src/dictionary/user_dictionary_util.cc @@ -303,8 +303,6 @@ UserDictionaryCommandStatus::Status UserDictionaryUtil::ValidateDictionaryName( namespace { // The index of each element should be matched with the actual value of enum. // See also user_dictionary_storage.proto for the definition of the enum. -// Note that the '0' is invalid in the definition, so the corresponding -// element is nullptr. const char *kPosTypeStringTable[] = { "品詞なし", "名詞", "短縮よみ", "サジェストのみ", "固有名詞", "人名", "姓", "名", @@ -332,8 +330,7 @@ const char *UserDictionaryUtil::GetStringPosType( user_dictionary::UserDictionary::PosType UserDictionaryUtil::ToPosType( const char *string_pos_type) { - // Skip the element at 0. - for (int i = 1; i < std::size(kPosTypeStringTable); ++i) { + for (int i = 0; i < std::size(kPosTypeStringTable); ++i) { if (strcmp(kPosTypeStringTable[i], string_pos_type) == 0) { return static_cast(i); } diff --git a/src/dictionary/user_dictionary_util_test.cc b/src/dictionary/user_dictionary_util_test.cc index 860772974a..6906a53d53 100644 --- a/src/dictionary/user_dictionary_util_test.cc +++ b/src/dictionary/user_dictionary_util_test.cc @@ -29,6 +29,8 @@ #include "dictionary/user_dictionary_util.h" +#include + #include #include @@ -332,6 +334,35 @@ TEST(UserDictionaryUtilTest, GetUserDictionaryIndexById) { EXPECT_EQ(UserDictionaryUtil::GetUserDictionaryIndexById(storage, -1), -1); } +TEST(UserDictionaryUtilTest, ToPosType) { + EXPECT_TRUE(UserDictionaryUtil::ToPosType("品詞なし") == + UserDictionary::NO_POS); + EXPECT_TRUE(UserDictionaryUtil::ToPosType("サジェストのみ") == + UserDictionary::SUGGESTION_ONLY); + EXPECT_TRUE(UserDictionaryUtil::ToPosType("動詞ワ行五段") == + UserDictionary::WA_GROUP1_VERB); + EXPECT_TRUE(UserDictionaryUtil::ToPosType("抑制単語") == + UserDictionary::SUPPRESSION_WORD); +} + +TEST(UserDictionaryUtilTest, GetStringPosType) { + EXPECT_EQ(strcmp(UserDictionaryUtil::GetStringPosType(UserDictionary::NO_POS), + "品詞なし"), + 0); + EXPECT_EQ(strcmp(UserDictionaryUtil::GetStringPosType( + UserDictionary::SUGGESTION_ONLY), + "サジェストのみ"), + 0); + EXPECT_EQ(strcmp(UserDictionaryUtil::GetStringPosType( + UserDictionary::WA_GROUP1_VERB), + "動詞ワ行五段"), + 0); + EXPECT_EQ(strcmp(UserDictionaryUtil::GetStringPosType( + UserDictionary::SUPPRESSION_WORD), + "抑制単語"), + 0); +} + TEST(UserDictionaryUtilTest, CreateDictionary) { user_dictionary::UserDictionaryStorage storage; uint64_t dictionary_id; diff --git a/src/dictionary/user_pos_test.cc b/src/dictionary/user_pos_test.cc index 45893f8572..557b5178c4 100644 --- a/src/dictionary/user_pos_test.cc +++ b/src/dictionary/user_pos_test.cc @@ -29,6 +29,7 @@ #include "dictionary/user_pos.h" +#include #include #include #include @@ -65,7 +66,17 @@ TEST_F(UserPosTest, UserPosBasicTest) { std::vector pos_list; user_pos_->GetPosList(&pos_list); EXPECT_FALSE(pos_list.empty()); - + // test contains + EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "名詞サ変") != + pos_list.end()); + EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "サジェストのみ") != + pos_list.end()); + EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "短縮よみ") != + pos_list.end()); + EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "抑制単語") != + pos_list.end()); + EXPECT_TRUE(std::find(pos_list.begin(), pos_list.end(), "品詞なし") != + pos_list.end()); uint16_t id = 0; for (size_t i = 0; i < pos_list.size(); ++i) { EXPECT_TRUE(user_pos_->IsValidPos(pos_list[i])); diff --git a/src/protocol/user_dictionary_storage.proto b/src/protocol/user_dictionary_storage.proto index ee200c9696..ec27faea41 100644 --- a/src/protocol/user_dictionary_storage.proto +++ b/src/protocol/user_dictionary_storage.proto @@ -35,6 +35,7 @@ option java_outer_classname = "ProtoUserDictionaryStorage"; option java_package = "org.mozc.android.inputmethod.japanese.protobuf"; message UserDictionary { + // LINT.IfChange enum PosType { // Use this when the POS tag is not provided or unknown. NO_POS = 0; // "品詞なし" @@ -89,7 +90,9 @@ message UserDictionary { SUPPRESSION_WORD = 44; // "抑制単語" } - + // LINT.ThenChange( + // //dictionary/user_dictionary_util.cc + // ) // ID of this dictionary optional uint64 id = 1 [default = 0, jstype = JS_STRING];