From 29a58b60f8fa982a2a6a071a49ab22d23f421dc4 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 21 May 2024 10:18:53 +0900 Subject: [PATCH 1/4] add wrapper class for morpheme with ToXContent --- src/main/ext/es-8.00-ge/lucene-aliases.kt | 8 ++++- src/main/ext/es-8.00-lt/lucene-aliases.kt | 8 ++++- src/main/ext/os-2.00-ge/lucene-aliases.kt | 8 ++++- .../ja/attributes/MorphemeAttributeImpl.kt | 31 ++++++++++++++++--- 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/src/main/ext/es-8.00-ge/lucene-aliases.kt b/src/main/ext/es-8.00-ge/lucene-aliases.kt index 76def36e..233d95b3 100644 --- a/src/main/ext/es-8.00-ge/lucene-aliases.kt +++ b/src/main/ext/es-8.00-ge/lucene-aliases.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Works Applications Co., Ltd. + * Copyright (c) 2023-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,3 +23,9 @@ typealias TokenFilterFactory = org.apache.lucene.analysis.TokenFilterFactory typealias ResourceLoaderArgument = org.apache.lucene.util.ResourceLoader typealias ResourceLoaderAware = org.apache.lucene.util.ResourceLoaderAware + +typealias ToXContent = org.elasticsearch.xcontent.ToXContent + +typealias ToXContentParams = org.elasticsearch.xcontent.ToXContent.Params + +typealias XContentBuilder = org.elasticsearch.xcontent.XContentBuilder diff --git a/src/main/ext/es-8.00-lt/lucene-aliases.kt b/src/main/ext/es-8.00-lt/lucene-aliases.kt index 143e404c..76f2fc1a 100644 --- a/src/main/ext/es-8.00-lt/lucene-aliases.kt +++ b/src/main/ext/es-8.00-lt/lucene-aliases.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Works Applications Co., Ltd. + * Copyright (c) 2023-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,3 +23,9 @@ typealias TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactor typealias ResourceLoaderArgument = org.apache.lucene.analysis.util.ResourceLoader typealias ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware + +typealias ToXContent = org.elasticsearch.xcontent.ToXContent + +typealias ToXContentParams = org.elasticsearch.xcontent.ToXContent.Params + +typealias XContentBuilder = org.elasticsearch.xcontent.XContentBuilder diff --git a/src/main/ext/os-2.00-ge/lucene-aliases.kt b/src/main/ext/os-2.00-ge/lucene-aliases.kt index 76def36e..9b41efb4 100644 --- a/src/main/ext/os-2.00-ge/lucene-aliases.kt +++ b/src/main/ext/os-2.00-ge/lucene-aliases.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Works Applications Co., Ltd. + * Copyright (c) 2023-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,3 +23,9 @@ typealias TokenFilterFactory = org.apache.lucene.analysis.TokenFilterFactory typealias ResourceLoaderArgument = org.apache.lucene.util.ResourceLoader typealias ResourceLoaderAware = org.apache.lucene.util.ResourceLoaderAware + +typealias ToXContent = org.opensearch.core.xcontent.ToXContent + +typealias ToXContentParams = org.opensearch.core.xcontent.ToXContent.Params + +typealias XContentBuilder = org.opensearch.core.xcontent.XContentBuilder diff --git a/src/main/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImpl.kt b/src/main/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImpl.kt index 3e67ba18..fd63e2fd 100644 --- a/src/main/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImpl.kt +++ b/src/main/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImpl.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Works Applications Co., Ltd. + * Copyright (c) 2022-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,13 +16,36 @@ package com.worksap.nlp.lucene.sudachi.ja.attributes +import com.worksap.nlp.lucene.aliases.ToXContent +import com.worksap.nlp.lucene.aliases.ToXContentParams +import com.worksap.nlp.lucene.aliases.XContentBuilder import com.worksap.nlp.lucene.sudachi.ja.reflect import com.worksap.nlp.sudachi.Morpheme import org.apache.lucene.util.AttributeImpl import org.apache.lucene.util.AttributeReflector class MorphemeAttributeImpl : AttributeImpl(), MorphemeAttribute { - private var morpheme: Morpheme? = null + private var morpheme: MorphemeWrapper? = null + + private class MorphemeWrapper(morpheme: Morpheme) : ToXContent { + private val morpheme = morpheme + + override fun toXContent(builder: XContentBuilder, params: ToXContentParams): XContentBuilder { + builder.value( + mapOf( + "surface" to morpheme.surface(), + "dictionaryForm" to morpheme.dictionaryForm(), + "normalizedForm" to morpheme.normalizedForm(), + "readingForm" to morpheme.readingForm(), + "partOfSpeech" to morpheme.partOfSpeech(), + )) + return builder + } + + fun unwrap(): Morpheme { + return morpheme + } + } override fun clear() { morpheme = null @@ -37,10 +60,10 @@ class MorphemeAttributeImpl : AttributeImpl(), MorphemeAttribute { } override fun getMorpheme(): Morpheme? { - return morpheme + return morpheme?.let { m -> m.unwrap() } } override fun setMorpheme(morpheme: Morpheme?) { - this.morpheme = morpheme + this.morpheme = morpheme?.let { m -> MorphemeWrapper(m) } } } From e3b33b504288dd945f13f826324558a59f19ac88 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 21 May 2024 11:36:44 +0900 Subject: [PATCH 2/4] support lower versions --- .../com/worksap/nlp/tools/engines.groovy | 7 ++++-- src/main/ext/es-7.15-ge/xcontent-aliases.kt | 25 +++++++++++++++++++ src/main/ext/es-7.15-lt/xcontent-aliases.kt | 25 +++++++++++++++++++ src/main/ext/es-8.00-ge/lucene-aliases.kt | 8 +----- src/main/ext/es-8.00-lt/lucene-aliases.kt | 8 +----- src/main/ext/os-2.00-ge/lucene-aliases.kt | 8 +----- src/main/ext/os-2.07-ge/xcontent-aliases.kt | 25 +++++++++++++++++++ src/main/ext/os-2.07-lt/xcontent-aliases.kt | 25 +++++++++++++++++++ 8 files changed, 108 insertions(+), 23 deletions(-) create mode 100644 src/main/ext/es-7.15-ge/xcontent-aliases.kt create mode 100644 src/main/ext/es-7.15-lt/xcontent-aliases.kt create mode 100644 src/main/ext/os-2.07-ge/xcontent-aliases.kt create mode 100644 src/main/ext/os-2.07-lt/xcontent-aliases.kt diff --git a/buildSrc/src/main/groovy/com/worksap/nlp/tools/engines.groovy b/buildSrc/src/main/groovy/com/worksap/nlp/tools/engines.groovy index cce96482..4bdfb308 100644 --- a/buildSrc/src/main/groovy/com/worksap/nlp/tools/engines.groovy +++ b/buildSrc/src/main/groovy/com/worksap/nlp/tools/engines.groovy @@ -49,7 +49,8 @@ enum EsSupport implements EngineSupport { enum OsSupport implements EngineSupport { Os20("os-2.00"), - Os210("os-2.10") + Os27("os-2.07"), + Os210("os-2.10"), String tag @@ -59,8 +60,10 @@ enum OsSupport implements EngineSupport { static OsSupport supportVersion(Version version) { - if (version.ge(2, 0) && version.lt(2, 10)) { + if (version.ge(2, 0) && version.lt(2, 7)) { return Os20 + } else if (version.ge(2, 7) && version.lt(2, 10)) { + return Os27 } else if (version.ge(2, 10)) { return Os210 } diff --git a/src/main/ext/es-7.15-ge/xcontent-aliases.kt b/src/main/ext/es-7.15-ge/xcontent-aliases.kt new file mode 100644 index 00000000..036a5cb4 --- /dev/null +++ b/src/main/ext/es-7.15-ge/xcontent-aliases.kt @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2024 Works Applications Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@file:Suppress("PackageDirectoryMismatch") + +package com.worksap.nlp.lucene.aliases + +typealias ToXContent = org.elasticsearch.xcontent.ToXContent + +typealias ToXContentParams = org.elasticsearch.xcontent.ToXContent.Params + +typealias XContentBuilder = org.elasticsearch.xcontent.XContentBuilder diff --git a/src/main/ext/es-7.15-lt/xcontent-aliases.kt b/src/main/ext/es-7.15-lt/xcontent-aliases.kt new file mode 100644 index 00000000..5fdcb844 --- /dev/null +++ b/src/main/ext/es-7.15-lt/xcontent-aliases.kt @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2024 Works Applications Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@file:Suppress("PackageDirectoryMismatch") + +package com.worksap.nlp.lucene.aliases + +typealias ToXContent = org.elasticsearch.common.xcontent.ToXContent + +typealias ToXContentParams = org.elasticsearch.common.xcontent.ToXContent.Params + +typealias XContentBuilder = org.elasticsearch.common.xcontent.XContentBuilder diff --git a/src/main/ext/es-8.00-ge/lucene-aliases.kt b/src/main/ext/es-8.00-ge/lucene-aliases.kt index 233d95b3..76def36e 100644 --- a/src/main/ext/es-8.00-ge/lucene-aliases.kt +++ b/src/main/ext/es-8.00-ge/lucene-aliases.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024 Works Applications Co., Ltd. + * Copyright (c) 2023 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,9 +23,3 @@ typealias TokenFilterFactory = org.apache.lucene.analysis.TokenFilterFactory typealias ResourceLoaderArgument = org.apache.lucene.util.ResourceLoader typealias ResourceLoaderAware = org.apache.lucene.util.ResourceLoaderAware - -typealias ToXContent = org.elasticsearch.xcontent.ToXContent - -typealias ToXContentParams = org.elasticsearch.xcontent.ToXContent.Params - -typealias XContentBuilder = org.elasticsearch.xcontent.XContentBuilder diff --git a/src/main/ext/es-8.00-lt/lucene-aliases.kt b/src/main/ext/es-8.00-lt/lucene-aliases.kt index 76f2fc1a..143e404c 100644 --- a/src/main/ext/es-8.00-lt/lucene-aliases.kt +++ b/src/main/ext/es-8.00-lt/lucene-aliases.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024 Works Applications Co., Ltd. + * Copyright (c) 2023 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,9 +23,3 @@ typealias TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactor typealias ResourceLoaderArgument = org.apache.lucene.analysis.util.ResourceLoader typealias ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware - -typealias ToXContent = org.elasticsearch.xcontent.ToXContent - -typealias ToXContentParams = org.elasticsearch.xcontent.ToXContent.Params - -typealias XContentBuilder = org.elasticsearch.xcontent.XContentBuilder diff --git a/src/main/ext/os-2.00-ge/lucene-aliases.kt b/src/main/ext/os-2.00-ge/lucene-aliases.kt index 9b41efb4..76def36e 100644 --- a/src/main/ext/os-2.00-ge/lucene-aliases.kt +++ b/src/main/ext/os-2.00-ge/lucene-aliases.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024 Works Applications Co., Ltd. + * Copyright (c) 2023 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,9 +23,3 @@ typealias TokenFilterFactory = org.apache.lucene.analysis.TokenFilterFactory typealias ResourceLoaderArgument = org.apache.lucene.util.ResourceLoader typealias ResourceLoaderAware = org.apache.lucene.util.ResourceLoaderAware - -typealias ToXContent = org.opensearch.core.xcontent.ToXContent - -typealias ToXContentParams = org.opensearch.core.xcontent.ToXContent.Params - -typealias XContentBuilder = org.opensearch.core.xcontent.XContentBuilder diff --git a/src/main/ext/os-2.07-ge/xcontent-aliases.kt b/src/main/ext/os-2.07-ge/xcontent-aliases.kt new file mode 100644 index 00000000..ff7b7bac --- /dev/null +++ b/src/main/ext/os-2.07-ge/xcontent-aliases.kt @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2024 Works Applications Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@file:Suppress("PackageDirectoryMismatch") + +package com.worksap.nlp.lucene.aliases + +typealias ToXContent = org.opensearch.core.xcontent.ToXContent + +typealias ToXContentParams = org.opensearch.core.xcontent.ToXContent.Params + +typealias XContentBuilder = org.opensearch.core.xcontent.XContentBuilder diff --git a/src/main/ext/os-2.07-lt/xcontent-aliases.kt b/src/main/ext/os-2.07-lt/xcontent-aliases.kt new file mode 100644 index 00000000..dddb4bda --- /dev/null +++ b/src/main/ext/os-2.07-lt/xcontent-aliases.kt @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2024 Works Applications Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@file:Suppress("PackageDirectoryMismatch") + +package com.worksap.nlp.lucene.aliases + +typealias ToXContent = org.opensearch.common.xcontent.ToXContent + +typealias ToXContentParams = org.opensearch.common.xcontent.ToXContent.Params + +typealias XContentBuilder = org.opensearch.common.xcontent.XContentBuilder From 54576a599aedd79b1c8b574a1e99026c239fff5d Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 21 May 2024 11:46:03 +0900 Subject: [PATCH 3/4] add tests --- .../attributes/MorphemeAttributeImplTest.kt | 73 +++++++++++++++++++ test-scripts/01-integration-test.py | 21 ++++++ 2 files changed, 94 insertions(+) create mode 100644 src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt diff --git a/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt b/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt new file mode 100644 index 00000000..ab140827 --- /dev/null +++ b/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024 Works Applications Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.worksap.nlp.lucene.sudachi.ja.attributes + +import com.worksap.nlp.lucene.aliases.ToXContent +import com.worksap.nlp.sudachi.Config +import com.worksap.nlp.sudachi.DictionaryFactory +import com.worksap.nlp.sudachi.Morpheme +import com.worksap.nlp.test.TestDictionary +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNull +import kotlin.test.assertTrue +import org.junit.Before +import org.junit.Rule + +class MorphemeAttributeImplTest { + @JvmField @Rule var testDic = TestDictionary("system") + + private lateinit var config: Config + + fun getFirstMorpheme(text: String): Morpheme? { + val dict = DictionaryFactory().create(config) + val tok = dict.create() + val morphemes = tok.tokenize(text) + + return if (morphemes.size == 0) null else morphemes.get(0) + } + + @Before + fun setup() { + val configDir = testDic.root.toPath().resolve("config/sudachi") + config = Config.fromFile(configDir.resolve("sudachi.json")) + } + + @Test + fun setMorpheme() { + var morphemeAtt = MorphemeAttributeImpl() + assertNull(morphemeAtt.getMorpheme()) + + val morph = getFirstMorpheme("東京都")!! + morphemeAtt.setMorpheme(morph) + assertEquals(morph, morphemeAtt.getMorpheme()) + } + + @Test + fun reflectMorpheme() { + var morphemeAtt = MorphemeAttributeImpl() + val morph = getFirstMorpheme("東京都")!! + morphemeAtt.setMorpheme(morph) + + morphemeAtt.reflectWith( + fun(attClass, key, value) { + assertEquals(MorphemeAttribute::class.java, attClass) + assertEquals("morpheme", key) + assertTrue(value is ToXContent) + }) + } +} diff --git a/test-scripts/01-integration-test.py b/test-scripts/01-integration-test.py index b310391b..383b8db8 100644 --- a/test-scripts/01-integration-test.py +++ b/test-scripts/01-integration-test.py @@ -97,6 +97,27 @@ def test_tokenize_using_sudachi_tokenizer(self): self.assertEqual(6, tokens[3]["end_offset"]) return + def test_explain_tokenizer_details(self): + body = {"tokenizer": "sudachi_tokenizer", + "text": "すだち", "explain": True} + resp = es_instance.analyze(body) + self.assertEqual(200, resp.status) + + morpheme = json.loads(resp.data)[ + "detail"]["tokenizer"]["tokens"][0]["morpheme"] + self.assertIn("surface", morpheme) + self.assertEqual("すだち", morpheme["surface"]) + self.assertIn("dictionaryForm", morpheme) + self.assertEqual("すだち", morpheme["dictionaryForm"]) + self.assertIn("normalizedForm", morpheme) + self.assertEqual("酢橘", morpheme["normalizedForm"]) + self.assertIn("readingForm", morpheme) + self.assertEqual("スダチ", morpheme["readingForm"]) + self.assertIn("partOfSpeech", morpheme) + self.assertEqual(["名詞", "普通名詞", "一般", "*", "*", "*"], + morpheme["partOfSpeech"]) + return + class TestICUFiltered(unittest.TestCase): # requires analysis-icu plugin installed From fede7048ad01b09783a5ec2349449635eec53010 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 21 May 2024 15:39:42 +0900 Subject: [PATCH 4/4] test xcontent serialization --- build.gradle | 2 + .../attributes/MorphemeAttributeImplTest.kt | 47 ++++++++++++++++--- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/build.gradle b/build.gradle index 02503802..484df01b 100644 --- a/build.gradle +++ b/build.gradle @@ -3,6 +3,7 @@ import org.jetbrains.kotlin.gradle.dsl.JvmTarget plugins { id 'java-library' id 'org.jetbrains.kotlin.jvm' version '1.8.0' + id "org.jetbrains.kotlin.plugin.serialization" version "1.8.0" id 'com.diffplug.spotless' version '6.16.0' id 'org.sonarqube' version '4.0.0.2929' id("org.jetbrains.kotlinx.kover") version "0.7.0" @@ -44,6 +45,7 @@ dependencies { testImplementation('org.jetbrains.kotlin:kotlin-test-junit') { exclude(group: 'org.hamcrest') } + testImplementation('org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.3') kover(project(':integration')) kover(project(':testlib')) } diff --git a/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt b/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt index ab140827..1247f9c7 100644 --- a/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt +++ b/src/test/java/com/worksap/nlp/lucene/sudachi/ja/attributes/MorphemeAttributeImplTest.kt @@ -17,14 +17,19 @@ package com.worksap.nlp.lucene.sudachi.ja.attributes import com.worksap.nlp.lucene.aliases.ToXContent +import com.worksap.nlp.lucene.aliases.XContentBuilder +import com.worksap.nlp.search.aliases.XContentType import com.worksap.nlp.sudachi.Config import com.worksap.nlp.sudachi.DictionaryFactory import com.worksap.nlp.sudachi.Morpheme import com.worksap.nlp.test.TestDictionary import kotlin.test.Test import kotlin.test.assertEquals +import kotlin.test.assertNotNull import kotlin.test.assertNull import kotlin.test.assertTrue +import kotlinx.serialization.Serializable +import kotlinx.serialization.json.Json import org.junit.Before import org.junit.Rule @@ -52,22 +57,52 @@ class MorphemeAttributeImplTest { var morphemeAtt = MorphemeAttributeImpl() assertNull(morphemeAtt.getMorpheme()) - val morph = getFirstMorpheme("東京都")!! - morphemeAtt.setMorpheme(morph) - assertEquals(morph, morphemeAtt.getMorpheme()) + val morpheme = getFirstMorpheme("東京都")!! + morphemeAtt.setMorpheme(morpheme) + assertEquals(morpheme, morphemeAtt.getMorpheme()) + + morphemeAtt.setMorpheme(null) + assertNull(morphemeAtt.getMorpheme()) } @Test - fun reflectMorpheme() { + fun toXContent() { var morphemeAtt = MorphemeAttributeImpl() - val morph = getFirstMorpheme("東京都")!! - morphemeAtt.setMorpheme(morph) + val morpheme = getFirstMorpheme("東京都")!! + morphemeAtt.setMorpheme(morpheme) + val builder = XContentBuilder.builder(XContentType.JSON.xContent()) + builder.startObject() morphemeAtt.reflectWith( fun(attClass, key, value) { assertEquals(MorphemeAttribute::class.java, attClass) assertEquals("morpheme", key) assertTrue(value is ToXContent) + + builder.field(key, value) }) + builder.endObject() + builder.flush() + + val serialized = builder.getOutputStream().toString() + val deserialized = Json.decodeFromString(serialized) + + assertNotNull(deserialized.morpheme) + assertEquals(morpheme.surface(), deserialized.morpheme.surface) + assertEquals(morpheme.dictionaryForm(), deserialized.morpheme.dictionaryForm) + assertEquals(morpheme.normalizedForm(), deserialized.morpheme.normalizedForm) + assertEquals(morpheme.readingForm(), deserialized.morpheme.readingForm) + assertEquals(morpheme.partOfSpeech(), deserialized.morpheme.partOfSpeech) } } + +@Serializable data class MorphemeHolder(val morpheme: MorphemeAttributeHolder) + +@Serializable +data class MorphemeAttributeHolder( + val surface: String, + val dictionaryForm: String, + val normalizedForm: String, + val readingForm: String, + val partOfSpeech: List, +)