diff --git a/test-scripts/01-integration-test.py b/test-scripts/01-integration-test.py index b310391..1390d92 100644 --- a/test-scripts/01-integration-test.py +++ b/test-scripts/01-integration-test.py @@ -97,6 +97,24 @@ def test_tokenize_using_sudachi_tokenizer(self): self.assertEqual(6, tokens[3]["end_offset"]) return + def test_explain_tokenizer_details(self): + body = {"tokenizer": "sudachi_tokenizer", + "text": "すだち", "explain": True} + resp = es_instance.analyze(body) + self.assertEqual(200, resp.status) + + token = json.loads(resp.data)["detail"]["tokenizer"]["tokens"][0] + self.assertIn("dictionaryForm", token) + self.assertEqual("すだち", token["dictionaryForm"]) + self.assertIn("normalizedForm", token) + self.assertEqual("酢橘", token["normalizedForm"]) + self.assertIn("readingForm", token) + self.assertEqual("スダチ", token["readingForm"]) + self.assertIn("partOfSpeech", token) + self.assertEqual(["名詞", "普通名詞", "一般", "*", "*", "*"], + token["partOfSpeech"]) + return + class TestICUFiltered(unittest.TestCase): # requires analysis-icu plugin installed