From 8baaa7abc53c49d7b475256436935f2e30fe3a4c Mon Sep 17 00:00:00 2001
From: mh-northlander <mh.northlander+github@gmail.com>
Date: Fri, 5 Jul 2024 17:24:57 +0900
Subject: [PATCH 1/9] add missing docstrings

---
 python/py_src/sudachipy/errors.py | 6 ++++--
 python/src/build.rs               | 4 +++-
 python/src/lib.rs                 | 6 ++++--
 python/src/morpheme.rs            | 6 ++++--
 python/src/pos_matcher.rs         | 6 +++++-
 python/src/pretokenizer.rs        | 7 ++++---
 python/src/tokenizer.rs           | 7 ++++---
 7 files changed, 28 insertions(+), 14 deletions(-)
diff --git a/python/py_src/sudachipy/errors.py b/python/py_src/sudachipy/errors.py
index e75e21cd..c11a8205 100644
--- a/python/py_src/sudachipy/errors.py
+++ b/python/py_src/sudachipy/errors.py
@@ -1,4 +1,4 @@
-#   Copyright (c) 2023 Works Applications Co., Ltd.
+#   Copyright (c) 2023-2024 Works Applications Co., Ltd.
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
@@ -13,4 +13,6 @@
 #   limitations under the License.
 
 class SudachiError(Exception):
-    pass
\ No newline at end of file
+    """Base class for all Sudachipy exceptions.
+    """
+    pass
diff --git a/python/src/build.rs b/python/src/build.rs
index a6005b26..59eb50c9 100644
--- a/python/src/build.rs
+++ b/python/src/build.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -58,6 +58,7 @@ fn create_file(p: &Path) -> std::io::Result<File> {
     OpenOptions::new().create_new(true).write(true).open(p)
 }
 
+/// Build system dictionary from matrix and lexicons.
 #[pyfunction]
 #[pyo3(text_signature = "(matrix, lex, output, description=None) -> list")]
 fn build_system_dic<'p>(
@@ -87,6 +88,7 @@ fn build_system_dic<'p>(
     to_stats(py, builder)
 }
 
+/// Build user dictionary from lexicons based on the given system dictionary.
 #[pyfunction]
 #[pyo3(text_signature = "(system, lex, output, description=None) -> list")]
 fn build_user_dic<'p>(
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 68a9c91d..4887a737 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -26,7 +26,9 @@ mod projection;
 mod tokenizer;
 mod word_info;
 
-/// module root
+/// SudachiPy raw module root.
+///
+/// Users should not use this directly.
 #[pymodule]
 fn sudachipy(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_class::<dictionary::PyDictionary>()?;
diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs
index ad3929dd..47e020ee 100644
--- a/python/src/morpheme.rs
+++ b/python/src/morpheme.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -86,6 +86,7 @@ impl PyMorphemeListWrapper {
         }
     }
 }
+
 #[pymethods]
 impl PyMorphemeListWrapper {
     /// Returns an empty morpheme list with dictionary
@@ -197,7 +198,7 @@ impl PyMorphemeListWrapper {
     }
 }
 
-/// A morpheme (basic semantic unit of language).
+/// An iterator over the MorphemeList.
 #[pyclass(module = "sudachipy.morphemelist", name = "MorphemeIter")]
 pub struct PyMorphemeIter {
     list: Py<PyMorphemeListWrapper>,
@@ -241,6 +242,7 @@ impl<'py> Deref for MorphemeRef<'py> {
     }
 }
 
+/// A morpheme (basic semantic unit of language).
 #[pyclass(module = "sudachipy.morpheme", name = "Morpheme", frozen)]
 pub struct PyMorpheme {
     list: Py<PyMorphemeListWrapper>,
diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs
index 7c6a884d..a849edf5 100644
--- a/python/src/pos_matcher.rs
+++ b/python/src/pos_matcher.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -26,6 +26,9 @@ use sudachi::pos::PosMatcher;
 use crate::dictionary::PyDicData;
 use crate::morpheme::PyMorpheme;
 
+/// A part-of-speech matcher which checks if a morpheme belongs to a set of part of speech.
+///
+/// Create using Dictionary.pos_matcher method.
 #[pyclass(name = "PosMatcher", module = "sudachipy")]
 pub struct PyPosMatcher {
     matcher: PosMatcher,
@@ -189,6 +192,7 @@ impl PyPosMatcher {
     }
 }
 
+/// An iterator over POS tuples in the PosPatcher
 #[pyclass(name = "PosMatcherIterator", module = "sudachipy")]
 pub struct PyPosIter {
     data: Vec<u16>,
diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs
index 755f040b..385c6dcb 100644
--- a/python/src/pretokenizer.rs
+++ b/python/src/pretokenizer.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -76,9 +76,10 @@ impl PerThreadPreTokenizer {
     }
 }
 
-/// Binding for the Tokenizer, which handles threading for tokenization
+/// Binding for the Tokenizer, which handles threading for tokenization.
 ///
-/// We use ThreadLocal for storing actual tokenizers
+/// Create using Dictionary.pre_tokenizer method.
+/// We use ThreadLocal for storing actual tokenizers.
 #[pyclass(module = "sudachipy.pretokenizer", name = "SudachiPreTokenizer")]
 pub struct PyPretokenizer {
     dict: Arc<PyDicData>,
diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
index 558d02cb..a53ce166 100644
--- a/python/src/tokenizer.rs
+++ b/python/src/tokenizer.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -36,7 +36,6 @@ use crate::morpheme::{PyMorphemeListWrapper, PyProjector};
 /// B == middle mode
 ///
 /// C == long mode
-//
 #[pyclass(module = "sudachipy.tokenizer", name = "SplitMode", frozen)]
 #[derive(Clone, PartialEq, Eq, Copy, Debug)]
 #[repr(u8)]
@@ -68,6 +67,7 @@ impl From<Mode> for PySplitMode {
 
 #[pymethods]
 impl PySplitMode {
+    /// Parse SplitMode from a character.
     #[new]
     fn new(mode: Option<&str>) -> PyResult<PySplitMode> {
         let mode = match mode {
@@ -82,7 +82,7 @@ impl PySplitMode {
     }
 }
 
-/// Sudachi Tokenizer, Python version
+/// Sudachi Tokenizer
 #[pyclass(module = "sudachipy.tokenizer", name = "Tokenizer")]
 pub(crate) struct PyTokenizer {
     tokenizer: StatefulTokenizer<Arc<PyDicData>>,
@@ -182,6 +182,7 @@ impl PyTokenizer {
         Ok(out_list)
     }
 
+    /// SplitMode of the tokenizer.
     #[getter]
     fn mode(&self) -> PySplitMode {
         self.tokenizer.mode().into()

From 8b597e341c3b9c2b9340c08d6507bfa75e041ab8 Mon Sep 17 00:00:00 2001
From: mh-northlander <mh.northlander+github@gmail.com>
Date: Fri, 5 Jul 2024 17:52:19 +0900
Subject: [PATCH 2/9] copy docstring from new to class

---
 python/src/dictionary.rs | 22 ++++++++++++++++------
 python/src/tokenizer.rs  | 11 ++++++++---
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
index bc333c8e..1bada310 100644
--- a/python/src/dictionary.rs
+++ b/python/src/dictionary.rs
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2021-2023 Works Applications Co., Ltd.
+ *  Copyright (c) 2021-2024 Works Applications Co., Ltd.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -78,7 +78,17 @@ impl PyDicData {
     }
 }
 
-/// A sudachi dictionary
+/// A sudachi dictionary.
+///
+/// If both config.systemDict and dict_type are not given, `sudachidict_core` is used.
+/// If both config.systemDict and dict_type are given, dict_type is used.
+/// If dict is an absolute path to a file, it is used as a dictionary.
+///
+/// :param config_path: path to the configuration JSON file.
+/// :param resource_dir: path to the resource directory folder.
+/// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict_type> packages on PyPI: https://pypi.org/search/?q=sudachidict.
+///     Also, can be an _absolute_ path to a compiled dictionary file.
+/// :param dict_type: deprecated alias to dict.
 #[pyclass(module = "sudachipy.dictionary", name = "Dictionary")]
 #[derive(Clone)]
 pub struct PyDictionary {
@@ -92,13 +102,13 @@ impl PyDictionary {
     ///
     /// If both config.systemDict and dict_type are not given, `sudachidict_core` is used.
     /// If both config.systemDict and dict_type are given, dict_type is used.
-    /// If dict is an absolute path to a file, it is used as a dictionary
+    /// If dict is an absolute path to a file, it is used as a dictionary.
     ///
-    /// :param config_path: path to the configuration JSON file
-    /// :param resource_dir: path to the resource directory folder
+    /// :param config_path: path to the configuration JSON file.
+    /// :param resource_dir: path to the resource directory folder.
     /// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict_type> packages on PyPI: https://pypi.org/search/?q=sudachidict.
     ///     Also, can be an _absolute_ path to a compiled dictionary file.
-    /// :param dict_type: deprecated alias to dict
+    /// :param dict_type: deprecated alias to dict.
     #[new]
     #[pyo3(signature=(config_path = None, resource_dir = None, dict = None, dict_type = None, *, config = None))]
     fn new(
diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
index a53ce166..fe3b66d3 100644
--- a/python/src/tokenizer.rs
+++ b/python/src/tokenizer.rs
@@ -29,13 +29,13 @@ use crate::dictionary::{extract_mode, PyDicData};
 use crate::errors::SudachiError as SudachiPyErr;
 use crate::morpheme::{PyMorphemeListWrapper, PyProjector};
 
-/// Unit to split text
+/// Unit to split text.
 ///
 /// A == short mode
-///
 /// B == middle mode
-///
 /// C == long mode
+///
+/// :param mode: str to parse. One of [A,B,C] in captital or lower case.
 #[pyclass(module = "sudachipy.tokenizer", name = "SplitMode", frozen)]
 #[derive(Clone, PartialEq, Eq, Copy, Debug)]
 #[repr(u8)]
@@ -68,7 +68,10 @@ impl From<Mode> for PySplitMode {
 #[pymethods]
 impl PySplitMode {
     /// Parse SplitMode from a character.
+    ///
+    /// :param mode: str to parse. One of [A,B,C] in captital or lower case.
     #[new]
+    #[pyo3(signature=(mode=None, *))]
     fn new(mode: Option<&str>) -> PyResult<PySplitMode> {
         let mode = match mode {
             Some(m) => m,
@@ -83,6 +86,8 @@ impl PySplitMode {
 }
 
 /// Sudachi Tokenizer
+///
+/// Create using Dictionary.create method.
 #[pyclass(module = "sudachipy.tokenizer", name = "Tokenizer")]
 pub(crate) struct PyTokenizer {
     tokenizer: StatefulTokenizer<Arc<PyDicData>>,

From c1d37c7f0aab64bd64144664537fa46512aac6c5 Mon Sep 17 00:00:00 2001
From: mh-northlander <mh.northlander+github@gmail.com>
Date: Mon, 8 Jul 2024 11:04:08 +0900
Subject: [PATCH 3/9] update text_signature

---
 python/src/build.rs      |  4 ++--
 python/src/dictionary.rs | 21 ++++++++++++---------
 python/src/morpheme.rs   | 36 ++++++++++++++++++------------------
 python/src/tokenizer.rs  |  9 ++++++---
 4 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/python/src/build.rs b/python/src/build.rs
index 59eb50c9..350f2fb3 100644
--- a/python/src/build.rs
+++ b/python/src/build.rs
@@ -60,7 +60,7 @@ fn create_file(p: &Path) -> std::io::Result<File> {
 
 /// Build system dictionary from matrix and lexicons.
 #[pyfunction]
-#[pyo3(text_signature = "(matrix, lex, output, description=None) -> list")]
+#[pyo3(text_signature="(matrix, lex, output, description=None) -> list[tuple[str, int, float]]")]
 fn build_system_dic<'p>(
     py: Python<'p>,
     matrix: &'p PyAny,
@@ -90,7 +90,7 @@ fn build_system_dic<'p>(
 
 /// Build user dictionary from lexicons based on the given system dictionary.
 #[pyfunction]
-#[pyo3(text_signature = "(system, lex, output, description=None) -> list")]
+#[pyo3(text_signature="(system, lex, output, description=None) -> list[tuple[str, int, float]]")]
 fn build_user_dic<'p>(
     py: Python<'p>,
     system: &'p PyAny,
diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
index 1bada310..e208492f 100644
--- a/python/src/dictionary.rs
+++ b/python/src/dictionary.rs
@@ -110,7 +110,10 @@ impl PyDictionary {
     ///     Also, can be an _absolute_ path to a compiled dictionary file.
     /// :param dict_type: deprecated alias to dict.
     #[new]
-    #[pyo3(signature=(config_path = None, resource_dir = None, dict = None, dict_type = None, *, config = None))]
+    #[pyo3(
+        text_signature="(config_path=None, resource_dir=None, dict=None, dict_type=None, *, config=None) -> Dictionary",
+        signature=(config_path=None, resource_dir=None, dict=None, dict_type=None, *, config=None)
+    )]
     fn new(
         py: Python,
         config_path: Option<&PyAny>,
@@ -230,8 +233,8 @@ impl PyDictionary {
     /// :param fields: load only a subset of fields.
     ///     See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html
     #[pyo3(
-        text_signature = "($self, mode = 'C') -> sudachipy.Tokenizer",
-        signature = (mode = None, fields = None, *, projection = None)
+        text_signature="(self, /, mode=None, fields=None, *, projection=None) -> Tokenizer",
+        signature=(mode=None, fields=None, *, projection=None)
     )]
     fn create<'py>(
         &'py self,
@@ -272,7 +275,7 @@ impl PyDictionary {
     /// (None, None, None, None, None, '終止形‐一般') will match any word in 終止形‐一般 conjugation form.
     ///
     /// :param target: can be either a callable or list of POS partial tuples
-    #[pyo3(text_signature = "($self, target)")]
+    #[pyo3(text_signature="(self, /, target) -> PosMatcher")]
     fn pos_matcher<'py>(&'py self, py: Python<'py>, target: &PyAny) -> PyResult<PyPosMatcher> {
         PyPosMatcher::create(py, self.dictionary.as_ref().unwrap(), target)
     }
@@ -293,8 +296,8 @@ impl PyDictionary {
     /// :type mode: sudachipy.SplitMode
     /// :type fields: Set[str]
     #[pyo3(
-        text_signature = "($self, mode, fields, handler) -> tokenizers.PreTokenizer",
-        signature = (mode = None, fields = None, handler = None, *, projection = None)
+        text_signature="(self, /, mode=None, fields=None, handler=None, *, projection=None) -> tokenizers.PreTokenizer",
+        signature=(mode=None, fields=None, handler=None, *, projection=None)
     )]
     fn pre_tokenizer<'p>(
         &'p self,
@@ -349,7 +352,7 @@ impl PyDictionary {
     ///     See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for details.
     /// :type surface: str
     /// :type out: sudachipy.MorphemeList
-    #[pyo3(text_signature = "($self, surface, out = None) -> sudachipy.MorphemeList")]
+    #[pyo3(text_signature="(self, /, surface, out=None) -> MorphemeList")]
     fn lookup<'p>(
         &'p self,
         py: Python<'p>,
@@ -377,13 +380,13 @@ impl PyDictionary {
     }
 
     /// Close this dictionary
-    #[pyo3(text_signature = "($self)")]
+    #[pyo3(text_signature="(self, /) -> ()")]
     fn close(&mut self) {
         self.dictionary = None;
     }
 
     /// Get POS Tuple by its id
-    #[pyo3(text_signature = "($self, pos_id: int)")]
+    #[pyo3(text_signature="(self, /, pos_id: int) -> tuple[str, str, str, str, str, str]")]
     fn pos_of<'p>(&'p self, py: Python<'p>, pos_id: usize) -> Option<&'p PyTuple> {
         let dic = self.dictionary.as_ref().unwrap();
         dic.pos.get(pos_id).map(|x| x.as_ref(py))
diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs
index 47e020ee..f1aa204d 100644
--- a/python/src/morpheme.rs
+++ b/python/src/morpheme.rs
@@ -91,7 +91,7 @@ impl PyMorphemeListWrapper {
 impl PyMorphemeListWrapper {
     /// Returns an empty morpheme list with dictionary
     #[classmethod]
-    #[pyo3(text_signature = "(dict: sudachipy.Dictionary) -> sudachipy.MorphemeList")]
+    #[pyo3(text_signature="(dict: Dictionary) -> MorphemeList")]
     fn empty(_cls: &PyType, py: Python, dict: &PyDictionary) -> PyResult<Self> {
         let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?;
         PyErr::warn(
@@ -110,13 +110,13 @@ impl PyMorphemeListWrapper {
     }
 
     /// Returns the total cost of the path
-    #[pyo3(text_signature = "($self) -> int")]
+    #[pyo3(text_signature="(self, /) -> int")]
     fn get_internal_cost(&self, py: Python) -> i32 {
         self.internal(py).get_internal_cost()
     }
 
     /// Returns the number of morpheme in this list.
-    #[pyo3(text_signature = "($self) -> int")]
+    #[pyo3(text_signature="(self, /) -> int")]
     fn size(&self, py: Python) -> usize {
         self.internal(py).len()
     }
@@ -279,21 +279,21 @@ impl PyMorpheme {
 #[pymethods]
 impl PyMorpheme {
     /// Returns the begin index of this in the input text
-    #[pyo3(text_signature = "($self) -> int")]
+    #[pyo3(text_signature="(self, /) -> int")]
     fn begin(&self, py: Python) -> usize {
         // call codepoint version
         self.morph(py).begin_c()
     }
 
     /// Returns the end index of this in the input text
-    #[pyo3(text_signature = "($self) -> int")]
+    #[pyo3(text_signature="(self, /) -> int")]
     fn end(&self, py: Python) -> usize {
         // call codepoint version
         self.morph(py).end_c()
     }
 
     /// Returns the substring of input text corresponding to the morpheme, or a projection if one is configured
-    #[pyo3(text_signature = "($self) -> str")]
+    #[pyo3(text_signature="(self, /) -> str")]
     fn surface<'py>(&'py self, py: Python<'py>) -> &'py PyString {
         let list = self.list(py);
         let morph = self.morph(py);
@@ -304,14 +304,14 @@ impl PyMorpheme {
     }
 
     /// Returns the substring of input text corresponding to the morpheme regardless the configured projection
-    #[pyo3(text_signature = "($self) -> str")]
+    #[pyo3(text_signature="(self, /) -> str")]
     fn raw_surface<'py>(&'py self, py: Python<'py>) -> &'py PyString {
         PyString::new(py, self.morph(py).surface().deref())
     }
 
     /// Returns the part of speech as a six-element tuple.
     /// Tuple elements are four POS levels, conjugation type and conjugation form.    
-    #[pyo3(text_signature = "($self)")]
+    #[pyo3(text_signature="(self, /) -> tuple[str, str, str, str, str, str]")]
     fn part_of_speech<'py>(&'py self, py: Python<'py>) -> Py<PyTuple> {
         let pos_id = self.part_of_speech_id(py);
         self.list(py)
@@ -322,25 +322,25 @@ impl PyMorpheme {
     }
 
     /// Returns the id of the part of speech in the dictionary
-    #[pyo3(text_signature = "($self) -> int")]
+    #[pyo3(text_signature="(self, /) -> int")]
     pub fn part_of_speech_id(&self, py: Python) -> u16 {
         self.morph(py).part_of_speech_id()
     }
 
     /// Returns the dictionary form
-    #[pyo3(text_signature = "($self) -> str")]
+    #[pyo3(text_signature="(self, /) -> str")]
     fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyObject {
         self.morph(py).get_word_info().dictionary_form().into_py(py)
     }
 
     /// Returns the normalized form
-    #[pyo3(text_signature = "($self) -> str")]
+    #[pyo3(text_signature="(self, /) -> str")]
     fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyObject {
         self.morph(py).get_word_info().normalized_form().into_py(py)
     }
 
     /// Returns the reading form
-    #[pyo3(text_signature = "($self) -> str")]
+    #[pyo3(text_signature="(self, /) -> str")]
     fn reading_form<'py>(&'py self, py: Python<'py>) -> PyObject {
         self.morph(py).get_word_info().reading_form().into_py(py)
     }
@@ -358,7 +358,7 @@ impl PyMorpheme {
     /// :type out: Optional[sudachipy.MorphemeList]
     /// :type add_single: bool
     #[pyo3(
-        text_signature = "($self, mode, out = None, add_single = False) -> sudachipy.MorphemeList"
+        text_signature="(self, /, mode, out=None, add_single=False) -> MorphemeList"
     )]
     fn split<'py>(
         &'py self,
@@ -402,19 +402,19 @@ impl PyMorpheme {
     }
 
     /// Returns whether if this is out of vocabulary word
-    #[pyo3(text_signature = "($self) -> bool")]
+    #[pyo3(text_signature="(self, /) -> bool")]
     fn is_oov(&self, py: Python) -> bool {
         self.morph(py).is_oov()
     }
 
     /// Returns word id of this word in the dictionary
-    #[pyo3(text_signature = "($self) -> int")]
+    #[pyo3(text_signature="(self, /) -> int")]
     fn word_id(&self, py: Python) -> u32 {
         self.morph(py).word_id().as_raw()
     }
 
     /// Returns the dictionary id which this word belongs
-    #[pyo3(text_signature = "($self) -> int")]
+    #[pyo3(text_signature="(self, /) -> int")]
     fn dictionary_id(&self, py: Python) -> i32 {
         let word_id = self.morph(py).word_id();
         if word_id.is_oov() {
@@ -425,7 +425,7 @@ impl PyMorpheme {
     }
 
     /// Returns the list of synonym group ids
-    #[pyo3(text_signature = "($self) -> List[int]")]
+    #[pyo3(text_signature="(self, /) -> List[int]")]
     fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> &'py PyList {
         let mref = self.morph(py);
         let ids = mref.get_word_info().synonym_group_ids();
@@ -433,7 +433,7 @@ impl PyMorpheme {
     }
 
     /// Returns the word info
-    #[pyo3(text_signature = "($self) -> sudachipy.WordInfo")]
+    #[pyo3(text_signature="(self, /) -> WordInfo")]
     fn get_word_info(&self, py: Python) -> PyResult<PyWordInfo> {
         let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?;
         PyErr::warn(py, cat, "Users should not touch the raw WordInfo.", 1)?;
diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
index fe3b66d3..16f2482a 100644
--- a/python/src/tokenizer.rs
+++ b/python/src/tokenizer.rs
@@ -71,7 +71,10 @@ impl PySplitMode {
     ///
     /// :param mode: str to parse. One of [A,B,C] in captital or lower case.
     #[new]
-    #[pyo3(signature=(mode=None, *))]
+    #[pyo3(
+        text_signature="(mode=None) -> SplitMode",
+        signature=(mode=None)
+    )]
     fn new(mode: Option<&str>) -> PyResult<PySplitMode> {
         let mode = match mode {
             Some(m) => m,
@@ -133,8 +136,8 @@ impl PyTokenizer {
     /// :type mode: sudachipy.SplitMode
     /// :type out: sudachipy.MorphemeList
     #[pyo3(
-        text_signature = "($self, text: str, mode = None, logger = None, out = None) -> sudachipy.MorphemeList",
-        signature = (text, mode = None, logger = None, out = None)
+        text_signature="(self, /, text: str, mode=None, logger=None, out=None) -> MorphemeList",
+        signature=(text, mode=None, logger=None, out=None)
     )]
     #[allow(unused_variables)]
     fn tokenize<'py>(

From dfc87edf656348474fef8b6aa46e8548e4895c5b Mon Sep 17 00:00:00 2001
From: mh-northlander <mh.northlander+github@gmail.com>
Date: Mon, 8 Jul 2024 11:05:04 +0900
Subject: [PATCH 4/9] add import of PosMatcher

---
 python/py_src/sudachipy/__init__.py | 1 +
 python/src/lib.rs                   | 1 +
 python/src/pos_matcher.rs           | 6 ++++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/py_src/sudachipy/__init__.py b/python/py_src/sudachipy/__init__.py
index bdf67f40..fb551538 100644
--- a/python/py_src/sudachipy/__init__.py
+++ b/python/py_src/sudachipy/__init__.py
@@ -5,6 +5,7 @@
     MorphemeList,
     Morpheme,
     WordInfo,
+    PosMatcher,
 )
 from .config import Config
 from . import errors
diff --git a/python/src/lib.rs b/python/src/lib.rs
index 4887a737..56a950c2 100644
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -37,6 +37,7 @@ fn sudachipy(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_class::<morpheme::PyMorphemeListWrapper>()?;
     m.add_class::<morpheme::PyMorpheme>()?;
     m.add_class::<word_info::PyWordInfo>()?;
+    m.add_class::<pos_matcher::PyPosMatcher>()?;
     build::register_functions(m)?;
     Ok(())
 }
diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs
index a849edf5..586c7d90 100644
--- a/python/src/pos_matcher.rs
+++ b/python/src/pos_matcher.rs
@@ -29,7 +29,9 @@ use crate::morpheme::PyMorpheme;
 /// A part-of-speech matcher which checks if a morpheme belongs to a set of part of speech.
 ///
 /// Create using Dictionary.pos_matcher method.
-#[pyclass(name = "PosMatcher", module = "sudachipy")]
+///
+/// Use `__call__(m: Morpheme) -> bool` to check if given morpheme matches the PosMatcher.
+#[pyclass(module = "sudachipy.pos_matcher", name = "PosMatcher")]
 pub struct PyPosMatcher {
     matcher: PosMatcher,
     dic: Arc<PyDicData>,
@@ -193,7 +195,7 @@ impl PyPosMatcher {
 }
 
 /// An iterator over POS tuples in the PosPatcher
-#[pyclass(name = "PosMatcherIterator", module = "sudachipy")]
+#[pyclass(module = "sudachipy.pos_matcher", name = "PosMatcherIterator")]
 pub struct PyPosIter {
     data: Vec<u16>,
     dic: Arc<PyDicData>,

From 8c35516a1f20fee8608401b1aea694063458c061 Mon Sep 17 00:00:00 2001
From: mh-northlander <mh.northlander+github@gmail.com>
Date: Mon, 8 Jul 2024 11:56:02 +0900
Subject: [PATCH 5/9] sync pyi and rs

---
 python/py_src/sudachipy/sudachipy.pyi | 104 ++++++++++++++++----------
 python/src/dictionary.rs              |  54 ++++++-------
 python/src/morpheme.rs                |  48 +++++++-----
 python/src/pos_matcher.rs             |  10 ++-
 python/src/tokenizer.rs               |  10 +--
 5 files changed, 136 insertions(+), 90 deletions(-)

diff --git a/python/py_src/sudachipy/sudachipy.pyi b/python/py_src/sudachipy/sudachipy.pyi
index 16c416f6..705b62af 100644
--- a/python/py_src/sudachipy/sudachipy.pyi
+++ b/python/py_src/sudachipy/sudachipy.pyi
@@ -1,6 +1,20 @@
+#   Copyright (c) 2024 Works Applications Co., Ltd.
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
 from typing import ClassVar, Iterator, List, Tuple, Union, Callable, Iterable, Optional, Literal, Set
 from .config import Config
 
+# Part Of Speech
 POS = Tuple[str, str, str, str, str, str]
 # POS element
 PE = Optional[str]
@@ -14,6 +28,8 @@ PartialPOS = Union[
     Tuple[()],
 ]
 
+# Fields that can be specified for partial dictionary loading.
+# See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
 FieldSet = Optional[Set[Literal["surface", "pos", "normalized_form", "dictionary_form", "reading_form",
                                 "word_structure", "split_a", "split_b", "synonym_group_id"]]]
 
@@ -23,9 +39,7 @@ class SplitMode:
     Unit to split text.
 
     A == short mode
-
     B == middle mode
-
     C == long mode
     """
 
@@ -36,8 +50,9 @@ class SplitMode:
     @classmethod
     def __init__(cls, mode: str = "C") -> None:
         """
-        Creates a split mode from a string value
-        :param mode: string representation of the split mode
+        Creates a split mode from a string value.
+
+        :param mode: string representation of the split mode. One of [A,B,C] in captital or lower case.
         """
         ...
 
@@ -54,14 +69,15 @@ class Dictionary:
         Creates a sudachi dictionary.
 
         If both config.systemDict and dict are not given, `sudachidict_core` is used.
-        If both config.systemDict and dict are given, dict_type is used.
+        If both config.systemDict and dict are given, dict is used.
+        If dict is an absolute path to a file, it is used as a dictionary.
 
-        :param config_path: path to the configuration JSON file, config json as a string, or a [sudachipy.config.Config] object
-        :param config: alias to config_path, only one of them can be specified at the same time
-        :param resource_dir: path to the resource directory folder
+        :param config_path: path to the configuration JSON file, config json as a string, or a [sudachipy.Config] object.
+        :param config: alias to config_path, only one of them can be specified at the same time.
+        :param resource_dir: path to the resource directory folder.
         :param dict: type of pre-packaged system dictionary, referring to sudachidict_<dict> packages on PyPI: https://pypi.org/search/?q=sudachidict.
             Also, can be an _absolute_ path to a compiled dictionary file.
-        :param dict_type: deprecated alias to dict
+        :param dict_type: deprecated alias to dict.
         """
         ...
 
@@ -77,11 +93,11 @@ class Dictionary:
                *,
                projection: str = None) -> Tokenizer:
         """
-        Creates a Sudachi Tokenizer.
+        Creates a sudachi tokenizer.
 
         :param mode: sets the analysis mode for this Tokenizer
         :param fields: load only a subset of fields.
-            See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html
+            See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
         :param projection: Projection override for created Tokenizer. See Config.projection for values.
         """
         ...
@@ -91,21 +107,21 @@ class Dictionary:
         Creates a new POS matcher.
 
         If target is a function, then it must return whether a POS should match or not.
-        If target a list, it should contain partially specified POS.
-        By partially specified it means that it is possible to omit POS fields or
-        use None as a sentinel value that matches any POS.
+        If target is a list, it should contain partially specified POS.
+        By partially specified it means that it is possible to omit POS fields or use None as a sentinel value that matches any POS.
 
         For example, ('名詞',) will match any noun and
         (None, None, None, None, None, '終止形') will match any word in 終止形 conjugation form.
 
-        :param target: can be either a function or a list of POS tuples.
+        :param target: can be either a list of POS partial tuples or a callable which maps POS to bool.
         """
         ...
 
     def pre_tokenizer(self,
                       mode: Union[SplitMode, Literal["A", "B", "C"]] = "C",
                       fields: FieldSet = None,
-                      handler: Optional[Callable[[int, object, MorphemeList], list]] = None,
+                      handler: Optional[Callable[[
+                          int, object, MorphemeList], list]] = None,
                       *,
                       projection: str = None) -> object:
         """
@@ -113,10 +129,10 @@ class Dictionary:
         Requires package `tokenizers` to be installed.
 
         :param mode: Use this split mode (C by default)
-        :param fields: ask Sudachi to load only a subset of fields. See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html
-        :param handler: custom callable to transform MorphemeList into list of tokens. See https://github.com/huggingface/tokenizers/blob/master/bindings/python/examples/custom_components.py
-            First two parameters are the index (int) and HuggingFace NormalizedString.
-            The handler must return a List[NormalizedString]. By default, just segment the tokens.
+        :param fields: ask Sudachi to load only a subset of fields. See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
+        :param handler: a custom callable to transform MorphemeList into list of tokens. If None, simply use surface as token representations.
+            It should be a `function(index: int, original: NormalizedString, morphemes: MorphemeList) -> List[NormalizedString]`.
+            See https://github.com/huggingface/tokenizers/blob/master/bindings/python/examples/custom_components.py.
         :param projection: Projection override for created Tokenizer. See Config.projection for values.
         """
         ...
@@ -126,7 +142,7 @@ class Dictionary:
         Returns POS with the given id.
 
         :param pos_id: POS id
-        :return: POS tuple with the given id.
+        :return: POS tuple with the given id or None for non existing id.
         """
         ...
 
@@ -197,7 +213,8 @@ class Morpheme:
 
     def part_of_speech(self) -> POS:
         """
-        Returns the part of speech.
+        Returns the part of speech as a six-element tuple.
+        Tuple elements are four POS levels, conjugation type and conjugation form.
         """
         ...
 
@@ -217,8 +234,8 @@ class Morpheme:
         """
         Returns sub-morphemes in the provided split mode.
 
-        :param mode: mode of new split
-        :param out: write results to this MorhpemeList instead of creating new one
+        :param mode: mode of new split.
+        :param out: write results to this MorhpemeList instead of creating new one.
             See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for
             more information on output parameters.
             Returned MorphemeList will be invalidated if this MorphemeList is used as an output parameter.
@@ -230,6 +247,7 @@ class Morpheme:
     def surface(self) -> str:
         """
         Returns the substring of input text corresponding to the morpheme, or a projection if one is configured.
+
         See `Config.projection`.
         """
         ...
@@ -237,6 +255,7 @@ class Morpheme:
     def raw_surface(self) -> str:
         """
         Returns the substring of input text corresponding to the morpheme regardless the configured projection.
+
         See `Config.projection`.
         """
         ...
@@ -255,7 +274,7 @@ class Morpheme:
 
     def __len__(self) -> int:
         """
-        Returns morpheme length in codepoints
+        Returns morpheme length in codepoints.
         """
 
 
@@ -293,6 +312,11 @@ class MorphemeList:
 
 
 class Tokenizer:
+    """
+    A sudachi tokenizer
+
+    Create using Dictionary.create method.
+    """
     SplitMode: ClassVar[SplitMode] = ...
     @classmethod
     def __init__(cls) -> None: ...
@@ -303,13 +327,12 @@ class Tokenizer:
         """
         Break text into morphemes.
 
-        SudachiPy 0.5.* had logger parameter, it is accepted, but ignored.
-
-        :param text: text to analyze
+        :param text: text to analyze.
         :param mode: analysis mode.
             This parameter is deprecated.
             Pass the analysis mode at the Tokenizer creation time and create different tokenizers for different modes.
             If you need multi-level splitting, prefer using :py:meth:`Morpheme.split` method instead.
+        :param logger: Arg for v0.5.* compatibility. Ignored.
         :param out: tokenization results will be written into this MorphemeList, a new one will be created instead.
             See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for details.
         """
@@ -342,41 +365,44 @@ class WordInfo:
 
 
 class PosMatcher:
+    """
+    A part-of-speech matcher which checks if a morpheme belongs to a set of part of speech.
+
+    Create using Dictionary.pos_matcher method.
+    """
+
     def __iter__(self) -> Iterator[POS]: ...
     def __len__(self) -> int: ...
 
     def __call__(self, m: Morpheme) -> bool:
         """
-        Checks whether a morpheme has matching POS
-        :param m: morpheme
-        :return: if morpheme has matching POS
+        Checks whether a morpheme has matching POS.
+
+        :param m: morpheme.
+        :return: if morpheme has matching POS.
         """
         ...
 
     def __or__(self, other: PosMatcher) -> PosMatcher:
         """
-        Returns a POS matcher which matches a POS if any of two matchers would match it
-        :return: PosMatcher
+        Returns a POS matcher which matches a POS if any of two matchers would match it.
         """
         ...
 
     def __and__(self, other: PosMatcher) -> PosMatcher:
         """
-        Returns a POS matcher which matches a POS if both matchers would match it at the same time
-        :return: PosMatcher
+        Returns a POS matcher which matches a POS if both matchers would match it at the same time.
         """
         ...
 
     def __sub__(self, other: PosMatcher) -> PosMatcher:
         """
-        Returns a POS matcher which matches a POS if self would match the POS and other would not match the POS
-        :return: PosMatcher
+        Returns a POS matcher which matches a POS if self would match the POS and other would not match the POS.
         """
         ...
 
     def __invert__(self) -> PosMatcher:
         """
-        Returns a POS matcher which matches all POS tags except ones defined in the current POS matcher
-        :return: PosMatcher
+        Returns a POS matcher which matches all POS tags except ones defined in the current POS matcher.
         """
         ...
diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
index e208492f..5f1e8f65 100644
--- a/python/src/dictionary.rs
+++ b/python/src/dictionary.rs
@@ -80,11 +80,12 @@ impl PyDicData {
 
 /// A sudachi dictionary.
 ///
-/// If both config.systemDict and dict_type are not given, `sudachidict_core` is used.
-/// If both config.systemDict and dict_type are given, dict_type is used.
+/// If both config.systemDict and dict are not given, `sudachidict_core` is used.
+/// If both config.systemDict and dict are given, dict is used.
 /// If dict is an absolute path to a file, it is used as a dictionary.
 ///
-/// :param config_path: path to the configuration JSON file.
+/// :param config_path: path to the configuration JSON file, config json as a string, or a [sudachipy.Config] object.
+/// :param config: alias to config_path, only one of them can be specified at the same time.
 /// :param resource_dir: path to the resource directory folder.
 /// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict_type> packages on PyPI: https://pypi.org/search/?q=sudachidict.
 ///     Also, can be an _absolute_ path to a compiled dictionary file.
@@ -100,11 +101,12 @@ pub struct PyDictionary {
 impl PyDictionary {
     /// Creates a sudachi dictionary.
     ///
-    /// If both config.systemDict and dict_type are not given, `sudachidict_core` is used.
-    /// If both config.systemDict and dict_type are given, dict_type is used.
+    /// If both config.systemDict and dict are not given, `sudachidict_core` is used.
+    /// If both config.systemDict and dict are given, dict is used.
     /// If dict is an absolute path to a file, it is used as a dictionary.
     ///
-    /// :param config_path: path to the configuration JSON file.
+    /// :param config_path: path to the configuration JSON file, config json as a string, or a [sudachipy.Config] object.
+    /// :param config: alias to config_path, only one of them can be specified at the same time.
     /// :param resource_dir: path to the resource directory folder.
     /// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict_type> packages on PyPI: https://pypi.org/search/?q=sudachidict.
     ///     Also, can be an _absolute_ path to a compiled dictionary file.
@@ -229,11 +231,12 @@ impl PyDictionary {
 
     /// Creates a sudachi tokenizer.
     ///
-    /// :param mode: tokenizer's default split mode (C by default).
+    /// :param mode: sets the analysis mode for this Tokenizer
     /// :param fields: load only a subset of fields.
-    ///     See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html
+    ///     See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
+    /// :param projection: Projection override for created Tokenizer. See Config.projection for values.
     #[pyo3(
-        text_signature="(self, /, mode=None, fields=None, *, projection=None) -> Tokenizer",
+        text_signature="(self, /, mode=SplitMode.C, fields=None, *, projection=None) -> Tokenizer",
         signature=(mode=None, fields=None, *, projection=None)
     )]
     fn create<'py>(
@@ -267,14 +270,13 @@ impl PyDictionary {
     /// Creates a POS matcher object
     ///
     /// If target is a function, then it must return whether a POS should match or not.
-    /// If target a list, it should contain partially specified POS.
-    /// By partially specified it means that it is possible to omit POS fields or
-    /// use None as a sentinel value that matches any POS.
+    /// If target is a list, it should contain partially specified POS.
+    /// By partially specified it means that it is possible to omit POS fields or use None as a sentinel value that matches any POS.
     ///
     /// For example, ('名詞',) will match any noun and
     /// (None, None, None, None, None, '終止形‐一般') will match any word in 終止形‐一般 conjugation form.
     ///
-    /// :param target: can be either a callable or list of POS partial tuples
+    /// :param target: can be either a list of POS partial tuples or a callable which maps POS to bool.
     #[pyo3(text_signature="(self, /, target) -> PosMatcher")]
     fn pos_matcher<'py>(&'py self, py: Python<'py>, target: &PyAny) -> PyResult<PyPosMatcher> {
         PyPosMatcher::create(py, self.dictionary.as_ref().unwrap(), target)
@@ -285,15 +287,13 @@ impl PyDictionary {
     ///
     /// :param mode: Use this split mode (C by default)
     /// :param fields: ask Sudachi to load only a subset of fields.
-    ///     See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html
-    /// :param handler: a custom callable to transform MorphemeList into list of tokens.
-    ///     It should be should be a `function(index: int, original: NormalizedString, morphemes: MorphemeList) -> List[NormalizedString]`.
-    ///     See https://github.com/huggingface/tokenizers/blob/master/bindings/python/examples/custom_components.py
-    ///     If nothing was passed, simply use surface as token representations.
-    /// :param projection: projection mode for a created PreTokenizer.
-    ///     See :class:`sudachipy.config.Config` object documentation for supported projections.
+    ///     See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
+    /// :param handler: a custom callable to transform MorphemeList into list of tokens. If None, simply use surface as token representations.
+    ///     It should be a `function(index: int, original: NormalizedString, morphemes: MorphemeList) -> List[NormalizedString]`.
+    ///     See https://github.com/huggingface/tokenizers/blob/master/bindings/python/examples/custom_components.py.
+    /// :param projection: Projection override for created Tokenizer. See Config.projection for values.
     ///
-    /// :type mode: sudachipy.SplitMode
+    /// :type mode: SplitMode
     /// :type fields: Set[str]
     #[pyo3(
         text_signature="(self, /, mode=None, fields=None, handler=None, *, projection=None) -> tokenizers.PreTokenizer",
@@ -350,8 +350,9 @@ impl PyDictionary {
     /// :param surface: find all morphemes with the given surface
     /// :param out: if passed, reuse the given morpheme list instead of creating a new one.
     ///     See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for details.
+    ///
     /// :type surface: str
-    /// :type out: sudachipy.MorphemeList
+    /// :type out: MorphemeList
     #[pyo3(text_signature="(self, /, surface, out=None) -> MorphemeList")]
     fn lookup<'p>(
         &'p self,
@@ -379,14 +380,17 @@ impl PyDictionary {
         Ok(l)
     }
 
-    /// Close this dictionary
+    /// Close this dictionary.
     #[pyo3(text_signature="(self, /) -> ()")]
     fn close(&mut self) {
         self.dictionary = None;
     }
 
-    /// Get POS Tuple by its id
-    #[pyo3(text_signature="(self, /, pos_id: int) -> tuple[str, str, str, str, str, str]")]
+    /// Returns POS with the given id.
+    ///
+    /// :param pos_id: POS id
+    /// :return: POS tuple with the given id or None for non existing id.
+    #[pyo3(text_signature="(self, /, pos_id: int) -> tuple[str, str, str, str, str, str] | None")]
     fn pos_of<'p>(&'p self, py: Python<'p>, pos_id: usize) -> Option<&'p PyTuple> {
         let dic = self.dictionary.as_ref().unwrap();
         dic.pos.get(pos_id).map(|x| x.as_ref(py))
diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs
index f1aa204d..0a18f6c4 100644
--- a/python/src/morpheme.rs
+++ b/python/src/morpheme.rs
@@ -31,7 +31,10 @@ use crate::word_info::PyWordInfo;
 pub(crate) type PyMorphemeList = MorphemeList<Arc<PyDicData>>;
 pub(crate) type PyProjector = Option<Arc<dyn MorphemeProjection + Send + Sync>>;
 
-/// A list of morphemes
+/// A list of morphemes.
+///
+/// An object can not be instantiated manually.
+/// Use Tokenizer.tokenize("") to create an empty morpheme list.
 #[pyclass(module = "sudachipy.morphemelist", name = "MorphemeList")]
 pub struct PyMorphemeListWrapper {
     /// use `internal()` function instead
@@ -89,7 +92,7 @@ impl PyMorphemeListWrapper {
 
 #[pymethods]
 impl PyMorphemeListWrapper {
-    /// Returns an empty morpheme list with dictionary
+    /// Returns an empty morpheme list with dictionary.
     #[classmethod]
     #[pyo3(text_signature="(dict: Dictionary) -> MorphemeList")]
     fn empty(_cls: &PyType, py: Python, dict: &PyDictionary) -> PyResult<Self> {
@@ -109,7 +112,7 @@ impl PyMorphemeListWrapper {
         })
     }
 
-    /// Returns the total cost of the path
+    /// Returns the total cost of the path.
     #[pyo3(text_signature="(self, /) -> int")]
     fn get_internal_cost(&self, py: Python) -> i32 {
         self.internal(py).get_internal_cost()
@@ -278,21 +281,23 @@ impl PyMorpheme {
 
 #[pymethods]
 impl PyMorpheme {
-    /// Returns the begin index of this in the input text
+    /// Returns the begin index of this in the input text.
     #[pyo3(text_signature="(self, /) -> int")]
     fn begin(&self, py: Python) -> usize {
         // call codepoint version
         self.morph(py).begin_c()
     }
 
-    /// Returns the end index of this in the input text
+    /// Returns the end index of this in the input text.
     #[pyo3(text_signature="(self, /) -> int")]
     fn end(&self, py: Python) -> usize {
         // call codepoint version
         self.morph(py).end_c()
     }
 
-    /// Returns the substring of input text corresponding to the morpheme, or a projection if one is configured
+    /// Returns the substring of input text corresponding to the morpheme, or a projection if one is configured.
+    ///
+    /// See `Config.projection`.
     #[pyo3(text_signature="(self, /) -> str")]
     fn surface<'py>(&'py self, py: Python<'py>) -> &'py PyString {
         let list = self.list(py);
@@ -303,14 +308,16 @@ impl PyMorpheme {
         }
     }
 
-    /// Returns the substring of input text corresponding to the morpheme regardless the configured projection
+    /// Returns the substring of input text corresponding to the morpheme regardless the configured projection.
+    ///
+    /// See `Config.projection`.
     #[pyo3(text_signature="(self, /) -> str")]
     fn raw_surface<'py>(&'py self, py: Python<'py>) -> &'py PyString {
         PyString::new(py, self.morph(py).surface().deref())
     }
 
     /// Returns the part of speech as a six-element tuple.
-    /// Tuple elements are four POS levels, conjugation type and conjugation form.    
+    /// Tuple elements are four POS levels, conjugation type and conjugation form.
     #[pyo3(text_signature="(self, /) -> tuple[str, str, str, str, str, str]")]
     fn part_of_speech<'py>(&'py self, py: Python<'py>) -> Py<PyTuple> {
         let pos_id = self.part_of_speech_id(py);
@@ -321,25 +328,25 @@ impl PyMorpheme {
             .clone_ref(py)
     }
 
-    /// Returns the id of the part of speech in the dictionary
+    /// Returns the id of the part of speech in the dictionary.
     #[pyo3(text_signature="(self, /) -> int")]
     pub fn part_of_speech_id(&self, py: Python) -> u16 {
         self.morph(py).part_of_speech_id()
     }
 
-    /// Returns the dictionary form
+    /// Returns the dictionary form.
     #[pyo3(text_signature="(self, /) -> str")]
     fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyObject {
         self.morph(py).get_word_info().dictionary_form().into_py(py)
     }
 
-    /// Returns the normalized form
+    /// Returns the normalized form.
     #[pyo3(text_signature="(self, /) -> str")]
     fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyObject {
         self.morph(py).get_word_info().normalized_form().into_py(py)
     }
 
-    /// Returns the reading form
+    /// Returns the reading form.
     #[pyo3(text_signature="(self, /) -> str")]
     fn reading_form<'py>(&'py self, py: Python<'py>) -> PyObject {
         self.morph(py).get_word_info().reading_form().into_py(py)
@@ -347,13 +354,14 @@ impl PyMorpheme {
 
     /// Returns sub-morphemes in the provided split mode.
     ///
-    /// :param mode: mode of new split
-    /// :param out: write results to this MorhpemeList instead of creating new one
+    /// :param mode: mode of new split.
+    /// :param out: write results to this MorhpemeList instead of creating new one.
     ///     See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for
     ///     more information on output parameters.
     ///     Returned MorphemeList will be invalidated if this MorphemeList is used as an output parameter.
     /// :param add_single: return lists with the current morpheme if the split hasn't produced any elements.
     ///     When False is passed, empty lists are returned instead.
+    ///
     /// :type mode: sudachipy.SplitMode    
     /// :type out: Optional[sudachipy.MorphemeList]
     /// :type add_single: bool
@@ -401,19 +409,19 @@ impl PyMorpheme {
         Ok(out_cell)
     }
 
-    /// Returns whether if this is out of vocabulary word
+    /// Returns whether if this is out of vocabulary word.
     #[pyo3(text_signature="(self, /) -> bool")]
     fn is_oov(&self, py: Python) -> bool {
         self.morph(py).is_oov()
     }
 
-    /// Returns word id of this word in the dictionary
+    /// Returns word id of this word in the dictionary.
     #[pyo3(text_signature="(self, /) -> int")]
     fn word_id(&self, py: Python) -> u32 {
         self.morph(py).word_id().as_raw()
     }
 
-    /// Returns the dictionary id which this word belongs
+    /// Returns the dictionary id which this word belongs.
     #[pyo3(text_signature="(self, /) -> int")]
     fn dictionary_id(&self, py: Python) -> i32 {
         let word_id = self.morph(py).word_id();
@@ -424,7 +432,7 @@ impl PyMorpheme {
         }
     }
 
-    /// Returns the list of synonym group ids
+    /// Returns the list of synonym group ids.
     #[pyo3(text_signature="(self, /) -> List[int]")]
     fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> &'py PyList {
         let mref = self.morph(py);
@@ -432,7 +440,7 @@ impl PyMorpheme {
         PyList::new(py, ids)
     }
 
-    /// Returns the word info
+    /// Returns the word info.
     #[pyo3(text_signature="(self, /) -> WordInfo")]
     fn get_word_info(&self, py: Python) -> PyResult<PyWordInfo> {
         let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?;
@@ -441,7 +449,7 @@ impl PyMorpheme {
         Ok(self.morph(py).get_word_info().clone().into())
     }
 
-    /// Returns morpheme length in codepoints    
+    /// Returns morpheme length in codepoints.
     pub fn __len__(&self, py: Python) -> usize {
         let m = self.morph(py);
         m.end_c() - m.begin_c()
diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs
index 586c7d90..16d1fa56 100644
--- a/python/src/pos_matcher.rs
+++ b/python/src/pos_matcher.rs
@@ -30,7 +30,7 @@ use crate::morpheme::PyMorpheme;
 ///
 /// Create using Dictionary.pos_matcher method.
 ///
-/// Use `__call__(m: Morpheme) -> bool` to check if given morpheme matches the PosMatcher.
+/// Use `__call__(m: Morpheme) -> bool` to check whether a morpheme has matching POS.
 #[pyclass(module = "sudachipy.pos_matcher", name = "PosMatcher")]
 pub struct PyPosMatcher {
     matcher: PosMatcher,
@@ -123,6 +123,10 @@ impl PyPosMatcher {
 
 #[pymethods]
 impl PyPosMatcher {
+    /// Checks whether a morpheme has matching POS.
+    ///
+    /// :param m: morpheme.
+    /// :return: if morpheme has matching POS.
     pub fn __call__<'py>(&'py self, py: Python<'py>, m: &'py PyMorpheme) -> bool {
         let pos_id = m.part_of_speech_id(py);
         self.matcher.matches_id(pos_id)
@@ -140,6 +144,7 @@ impl PyPosMatcher {
         self.matcher.num_entries()
     }
 
+    /// Returns a POS matcher which matches a POS if any of two matchers would match it.
     pub fn __or__(&self, other: &Self) -> Self {
         assert_eq!(
             Arc::as_ptr(&self.dic),
@@ -153,6 +158,7 @@ impl PyPosMatcher {
         }
     }
 
+    /// Returns a POS matcher which matches a POS if both matchers would match it at the same time.
     pub fn __and__(&self, other: &Self) -> Self {
         assert_eq!(
             Arc::as_ptr(&self.dic),
@@ -166,6 +172,7 @@ impl PyPosMatcher {
         }
     }
 
+    /// Returns a POS matcher which matches a POS if self would match the POS and other would not match the POS.
     pub fn __sub__(&self, other: &Self) -> Self {
         assert_eq!(
             Arc::as_ptr(&self.dic),
@@ -179,6 +186,7 @@ impl PyPosMatcher {
         }
     }
 
+    /// Returns a POS matcher which matches all POS tags except ones defined in the current POS matcher.
     pub fn __invert__(&self) -> Self {
         let max_id = self.dic.pos.len();
         // map -> filter chain is needed to handle exactly u16::MAX POS entries
diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
index 16f2482a..8c7c1c84 100644
--- a/python/src/tokenizer.rs
+++ b/python/src/tokenizer.rs
@@ -35,7 +35,7 @@ use crate::morpheme::{PyMorphemeListWrapper, PyProjector};
 /// B == middle mode
 /// C == long mode
 ///
-/// :param mode: str to parse. One of [A,B,C] in captital or lower case.
+/// :param mode: string representation of the split mode. One of [A,B,C] in captital or lower case.
 #[pyclass(module = "sudachipy.tokenizer", name = "SplitMode", frozen)]
 #[derive(Clone, PartialEq, Eq, Copy, Debug)]
 #[repr(u8)]
@@ -88,7 +88,7 @@ impl PySplitMode {
     }
 }
 
-/// Sudachi Tokenizer
+/// A sudachi tokenizer
 ///
 /// Create using Dictionary.create method.
 #[pyclass(module = "sudachipy.tokenizer", name = "Tokenizer")]
@@ -123,15 +123,15 @@ impl PyTokenizer {
 
     /// Break text into morphemes.
     ///
-    /// SudachiPy 0.5.* had logger parameter, it is accepted, but ignored.
-    ///
-    /// :param text: text to analyze
+    /// :param text: text to analyze.
     /// :param mode: analysis mode.
     ///    This parameter is deprecated.
     ///    Pass the analysis mode at the Tokenizer creation time and create different tokenizers for different modes.
     ///    If you need multi-level splitting, prefer using :py:meth:`Morpheme.split` method instead.
+    /// :param logger: Arg for v0.5.* compatibility. Ignored.
     /// :param out: tokenization results will be written into this MorphemeList, a new one will be created instead.
     ///    See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for details.
+    ///
     /// :type text: str
     /// :type mode: sudachipy.SplitMode
     /// :type out: sudachipy.MorphemeList

From 706a573311551542cc726486daabfb10bf2c5966 Mon Sep 17 00:00:00 2001
From: mh-northlander <mh.northlander+github@gmail.com>
Date: Mon, 8 Jul 2024 13:53:56 +0900
Subject: [PATCH 6/9] add type fields for rs

---
 python/src/build.rs       | 26 ++++++++++++++++++++++--
 python/src/dictionary.rs  | 36 ++++++++++++++++++++++++++-------
 python/src/morpheme.rs    | 42 +++++++++++++++++++--------------------
 python/src/pos_matcher.rs |  4 +++-
 python/src/tokenizer.rs   | 14 +++++++++----
 5 files changed, 86 insertions(+), 36 deletions(-)

diff --git a/python/src/build.rs b/python/src/build.rs
index 350f2fb3..2b2ce94f 100644
--- a/python/src/build.rs
+++ b/python/src/build.rs
@@ -59,8 +59,19 @@ fn create_file(p: &Path) -> std::io::Result<File> {
 }
 
 /// Build system dictionary from matrix and lexicons.
+///
+/// :param matrix: Path to the matrix file.
+/// :param lex: List of paths to lexicon files.
+/// :param output: Path to output built dictionray.
+/// :param description: A description text to embed in the dictionary.
+/// :return: A build report, list of (part, size, time).
+/// 
+/// :type matrix: pathlib.Path | str | bytes
+/// :type lex: list[pathlib.Path | str | bytes]
+/// :type output: pathlib.Path | str
+/// :type description: str
 #[pyfunction]
-#[pyo3(text_signature="(matrix, lex, output, description=None) -> list[tuple[str, int, float]]")]
+#[pyo3(text_signature = "(matrix, lex, output, description=None) -> list[tuple[str, int, float]]")]
 fn build_system_dic<'p>(
     py: Python<'p>,
     matrix: &'p PyAny,
@@ -89,8 +100,19 @@ fn build_system_dic<'p>(
 }
 
 /// Build user dictionary from lexicons based on the given system dictionary.
+///
+/// :param system: Path to the system dictionary.
+/// :param lex: List of paths to lexicon files.
+/// :param output: Path to output built dictionray.
+/// :param description: A description text to embed in the dictionary.
+/// :return: A build report, list of (part, size, time).
+/// 
+/// :type system: pathlib.Path | str
+/// :type lex: list[pathlib.Path | str | bytes]
+/// :type output: pathlib.Path | str
+/// :type description: str
 #[pyfunction]
-#[pyo3(text_signature="(system, lex, output, description=None) -> list[tuple[str, int, float]]")]
+#[pyo3(text_signature = "(system, lex, output, description=None) -> list[tuple[str, int, float]]")]
 fn build_user_dic<'p>(
     py: Python<'p>,
     system: &'p PyAny,
diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
index 5f1e8f65..2b5c849b 100644
--- a/python/src/dictionary.rs
+++ b/python/src/dictionary.rs
@@ -90,6 +90,12 @@ impl PyDicData {
 /// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict_type> packages on PyPI: https://pypi.org/search/?q=sudachidict.
 ///     Also, can be an _absolute_ path to a compiled dictionary file.
 /// :param dict_type: deprecated alias to dict.
+///
+/// :type config_path: Config | pathlib.Path | str | None
+/// :type config: Config | pathlib.Path | str | None
+/// :type resource_dir: pathlib.Path | str | None
+/// :type dict: pathlib.Path | str | None
+/// :type dict_type: pathlib.Path | str | None
 #[pyclass(module = "sudachipy.dictionary", name = "Dictionary")]
 #[derive(Clone)]
 pub struct PyDictionary {
@@ -111,6 +117,12 @@ impl PyDictionary {
     /// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict_type> packages on PyPI: https://pypi.org/search/?q=sudachidict.
     ///     Also, can be an _absolute_ path to a compiled dictionary file.
     /// :param dict_type: deprecated alias to dict.
+    ///
+    /// :type config_path: Config | pathlib.Path | str | None
+    /// :type config: Config | pathlib.Path | str | None
+    /// :type resource_dir: pathlib.Path | str | None
+    /// :type dict: pathlib.Path | str | None
+    /// :type dict_type: pathlib.Path | str | None
     #[new]
     #[pyo3(
         text_signature="(config_path=None, resource_dir=None, dict=None, dict_type=None, *, config=None) -> Dictionary",
@@ -235,6 +247,10 @@ impl PyDictionary {
     /// :param fields: load only a subset of fields.
     ///     See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
     /// :param projection: Projection override for created Tokenizer. See Config.projection for values.
+    ///
+    /// :type mode: SplitMode | str | None
+    /// :type fields: set[str] | None
+    /// :type projection: str | None
     #[pyo3(
         text_signature="(self, /, mode=SplitMode.C, fields=None, *, projection=None) -> Tokenizer",
         signature=(mode=None, fields=None, *, projection=None)
@@ -277,7 +293,9 @@ impl PyDictionary {
     /// (None, None, None, None, None, '終止形‐一般') will match any word in 終止形‐一般 conjugation form.
     ///
     /// :param target: can be either a list of POS partial tuples or a callable which maps POS to bool.
-    #[pyo3(text_signature="(self, /, target) -> PosMatcher")]
+    ///
+    /// :type target: Iterable[PartialPOS] | Callable[[POS], bool]
+    #[pyo3(text_signature = "(self, /, target) -> PosMatcher")]
     fn pos_matcher<'py>(&'py self, py: Python<'py>, target: &PyAny) -> PyResult<PyPosMatcher> {
         PyPosMatcher::create(py, self.dictionary.as_ref().unwrap(), target)
     }
@@ -293,8 +311,10 @@ impl PyDictionary {
     ///     See https://github.com/huggingface/tokenizers/blob/master/bindings/python/examples/custom_components.py.
     /// :param projection: Projection override for created Tokenizer. See Config.projection for values.
     ///
-    /// :type mode: SplitMode
-    /// :type fields: Set[str]
+    /// :type mode: SplitMode | str | None
+    /// :type fields: set[str] | None
+    /// :type handler: Callable[[int, NormalizedString, MorphemeList], list[NormalizedString]] | None
+    /// :type projection: str | None
     #[pyo3(
         text_signature="(self, /, mode=None, fields=None, handler=None, *, projection=None) -> tokenizers.PreTokenizer",
         signature=(mode=None, fields=None, handler=None, *, projection=None)
@@ -352,8 +372,8 @@ impl PyDictionary {
     ///     See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for details.
     ///
     /// :type surface: str
-    /// :type out: MorphemeList
-    #[pyo3(text_signature="(self, /, surface, out=None) -> MorphemeList")]
+    /// :type out: MorphemeList | None
+    #[pyo3(text_signature = "(self, /, surface, out=None) -> MorphemeList")]
     fn lookup<'p>(
         &'p self,
         py: Python<'p>,
@@ -381,7 +401,7 @@ impl PyDictionary {
     }
 
     /// Close this dictionary.
-    #[pyo3(text_signature="(self, /) -> ()")]
+    #[pyo3(text_signature = "(self, /) -> ()")]
     fn close(&mut self) {
         self.dictionary = None;
     }
@@ -390,7 +410,9 @@ impl PyDictionary {
     ///
     /// :param pos_id: POS id
     /// :return: POS tuple with the given id or None for non existing id.
-    #[pyo3(text_signature="(self, /, pos_id: int) -> tuple[str, str, str, str, str, str] | None")]
+    ///
+    /// :type pos_id: int
+    #[pyo3(text_signature = "(self, /, pos_id: int) -> tuple[str, str, str, str, str, str] | None")]
     fn pos_of<'p>(&'p self, py: Python<'p>, pos_id: usize) -> Option<&'p PyTuple> {
         let dic = self.dictionary.as_ref().unwrap();
         dic.pos.get(pos_id).map(|x| x.as_ref(py))
diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs
index 0a18f6c4..522d8ecd 100644
--- a/python/src/morpheme.rs
+++ b/python/src/morpheme.rs
@@ -94,7 +94,7 @@ impl PyMorphemeListWrapper {
 impl PyMorphemeListWrapper {
     /// Returns an empty morpheme list with dictionary.
     #[classmethod]
-    #[pyo3(text_signature="(dict: Dictionary) -> MorphemeList")]
+    #[pyo3(text_signature = "(dict: Dictionary) -> MorphemeList")]
     fn empty(_cls: &PyType, py: Python, dict: &PyDictionary) -> PyResult<Self> {
         let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?;
         PyErr::warn(
@@ -113,13 +113,13 @@ impl PyMorphemeListWrapper {
     }
 
     /// Returns the total cost of the path.
-    #[pyo3(text_signature="(self, /) -> int")]
+    #[pyo3(text_signature = "(self, /) -> int")]
     fn get_internal_cost(&self, py: Python) -> i32 {
         self.internal(py).get_internal_cost()
     }
 
     /// Returns the number of morpheme in this list.
-    #[pyo3(text_signature="(self, /) -> int")]
+    #[pyo3(text_signature = "(self, /) -> int")]
     fn size(&self, py: Python) -> usize {
         self.internal(py).len()
     }
@@ -282,14 +282,14 @@ impl PyMorpheme {
 #[pymethods]
 impl PyMorpheme {
     /// Returns the begin index of this in the input text.
-    #[pyo3(text_signature="(self, /) -> int")]
+    #[pyo3(text_signature = "(self, /) -> int")]
     fn begin(&self, py: Python) -> usize {
         // call codepoint version
         self.morph(py).begin_c()
     }
 
     /// Returns the end index of this in the input text.
-    #[pyo3(text_signature="(self, /) -> int")]
+    #[pyo3(text_signature = "(self, /) -> int")]
     fn end(&self, py: Python) -> usize {
         // call codepoint version
         self.morph(py).end_c()
@@ -298,7 +298,7 @@ impl PyMorpheme {
     /// Returns the substring of input text corresponding to the morpheme, or a projection if one is configured.
     ///
     /// See `Config.projection`.
-    #[pyo3(text_signature="(self, /) -> str")]
+    #[pyo3(text_signature = "(self, /) -> str")]
     fn surface<'py>(&'py self, py: Python<'py>) -> &'py PyString {
         let list = self.list(py);
         let morph = self.morph(py);
@@ -311,14 +311,14 @@ impl PyMorpheme {
     /// Returns the substring of input text corresponding to the morpheme regardless the configured projection.
     ///
     /// See `Config.projection`.
-    #[pyo3(text_signature="(self, /) -> str")]
+    #[pyo3(text_signature = "(self, /) -> str")]
     fn raw_surface<'py>(&'py self, py: Python<'py>) -> &'py PyString {
         PyString::new(py, self.morph(py).surface().deref())
     }
 
     /// Returns the part of speech as a six-element tuple.
     /// Tuple elements are four POS levels, conjugation type and conjugation form.
-    #[pyo3(text_signature="(self, /) -> tuple[str, str, str, str, str, str]")]
+    #[pyo3(text_signature = "(self, /) -> tuple[str, str, str, str, str, str]")]
     fn part_of_speech<'py>(&'py self, py: Python<'py>) -> Py<PyTuple> {
         let pos_id = self.part_of_speech_id(py);
         self.list(py)
@@ -329,25 +329,25 @@ impl PyMorpheme {
     }
 
     /// Returns the id of the part of speech in the dictionary.
-    #[pyo3(text_signature="(self, /) -> int")]
+    #[pyo3(text_signature = "(self, /) -> int")]
     pub fn part_of_speech_id(&self, py: Python) -> u16 {
         self.morph(py).part_of_speech_id()
     }
 
     /// Returns the dictionary form.
-    #[pyo3(text_signature="(self, /) -> str")]
+    #[pyo3(text_signature = "(self, /) -> str")]
     fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyObject {
         self.morph(py).get_word_info().dictionary_form().into_py(py)
     }
 
     /// Returns the normalized form.
-    #[pyo3(text_signature="(self, /) -> str")]
+    #[pyo3(text_signature = "(self, /) -> str")]
     fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyObject {
         self.morph(py).get_word_info().normalized_form().into_py(py)
     }
 
     /// Returns the reading form.
-    #[pyo3(text_signature="(self, /) -> str")]
+    #[pyo3(text_signature = "(self, /) -> str")]
     fn reading_form<'py>(&'py self, py: Python<'py>) -> PyObject {
         self.morph(py).get_word_info().reading_form().into_py(py)
     }
@@ -362,12 +362,10 @@ impl PyMorpheme {
     /// :param add_single: return lists with the current morpheme if the split hasn't produced any elements.
     ///     When False is passed, empty lists are returned instead.
     ///
-    /// :type mode: sudachipy.SplitMode    
-    /// :type out: Optional[sudachipy.MorphemeList]
+    /// :type mode: SplitMode | None
+    /// :type out: MorphemeList | None
     /// :type add_single: bool
-    #[pyo3(
-        text_signature="(self, /, mode, out=None, add_single=False) -> MorphemeList"
-    )]
+    #[pyo3(text_signature = "(self, /, mode, out=None, add_single=False) -> MorphemeList")]
     fn split<'py>(
         &'py self,
         py: Python<'py>,
@@ -410,19 +408,19 @@ impl PyMorpheme {
     }
 
     /// Returns whether if this is out of vocabulary word.
-    #[pyo3(text_signature="(self, /) -> bool")]
+    #[pyo3(text_signature = "(self, /) -> bool")]
     fn is_oov(&self, py: Python) -> bool {
         self.morph(py).is_oov()
     }
 
     /// Returns word id of this word in the dictionary.
-    #[pyo3(text_signature="(self, /) -> int")]
+    #[pyo3(text_signature = "(self, /) -> int")]
     fn word_id(&self, py: Python) -> u32 {
         self.morph(py).word_id().as_raw()
     }
 
     /// Returns the dictionary id which this word belongs.
-    #[pyo3(text_signature="(self, /) -> int")]
+    #[pyo3(text_signature = "(self, /) -> int")]
     fn dictionary_id(&self, py: Python) -> i32 {
         let word_id = self.morph(py).word_id();
         if word_id.is_oov() {
@@ -433,7 +431,7 @@ impl PyMorpheme {
     }
 
     /// Returns the list of synonym group ids.
-    #[pyo3(text_signature="(self, /) -> List[int]")]
+    #[pyo3(text_signature = "(self, /) -> List[int]")]
     fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> &'py PyList {
         let mref = self.morph(py);
         let ids = mref.get_word_info().synonym_group_ids();
@@ -441,7 +439,7 @@ impl PyMorpheme {
     }
 
     /// Returns the word info.
-    #[pyo3(text_signature="(self, /) -> WordInfo")]
+    #[pyo3(text_signature = "(self, /) -> WordInfo")]
     fn get_word_info(&self, py: Python) -> PyResult<PyWordInfo> {
         let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?;
         PyErr::warn(py, cat, "Users should not touch the raw WordInfo.", 1)?;
diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs
index 16d1fa56..bb9749f2 100644
--- a/python/src/pos_matcher.rs
+++ b/python/src/pos_matcher.rs
@@ -125,8 +125,10 @@ impl PyPosMatcher {
 impl PyPosMatcher {
     /// Checks whether a morpheme has matching POS.
     ///
-    /// :param m: morpheme.
+    /// :param m: a morpheme to check.
     /// :return: if morpheme has matching POS.
+    ///
+    /// :type m: Morpheme
     pub fn __call__<'py>(&'py self, py: Python<'py>, m: &'py PyMorpheme) -> bool {
         let pos_id = m.part_of_speech_id(py);
         self.matcher.matches_id(pos_id)
diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
index 8c7c1c84..c14f7076 100644
--- a/python/src/tokenizer.rs
+++ b/python/src/tokenizer.rs
@@ -36,6 +36,9 @@ use crate::morpheme::{PyMorphemeListWrapper, PyProjector};
 /// C == long mode
 ///
 /// :param mode: string representation of the split mode. One of [A,B,C] in captital or lower case.
+///     If None, returns SplitMode.C.
+///
+/// :type mode: str | None
 #[pyclass(module = "sudachipy.tokenizer", name = "SplitMode", frozen)]
 #[derive(Clone, PartialEq, Eq, Copy, Debug)]
 #[repr(u8)]
@@ -67,9 +70,12 @@ impl From<Mode> for PySplitMode {
 
 #[pymethods]
 impl PySplitMode {
-    /// Parse SplitMode from a character.
+    /// Creates a split mode from a string value.
+    ///
+    /// :param mode: string representation of the split mode. One of [A,B,C] in captital or lower case.
+    ///     If None, returns SplitMode.C.
     ///
-    /// :param mode: str to parse. One of [A,B,C] in captital or lower case.
+    /// :type mode: str | None
     #[new]
     #[pyo3(
         text_signature="(mode=None) -> SplitMode",
@@ -133,8 +139,8 @@ impl PyTokenizer {
     ///    See https://worksapplications.github.io/sudachi.rs/python/topics/out_param.html for details.
     ///
     /// :type text: str
-    /// :type mode: sudachipy.SplitMode
-    /// :type out: sudachipy.MorphemeList
+    /// :type mode: SplitMode | str | None
+    /// :type out: MorphemeList
     #[pyo3(
         text_signature="(self, /, text: str, mode=None, logger=None, out=None) -> MorphemeList",
         signature=(text, mode=None, logger=None, out=None)

From 5d8620ee643096027a687275b26838cb70874a68 Mon Sep 17 00:00:00 2001
From: mh-northlander <mh.northlander+github@gmail.com>
Date: Mon, 8 Jul 2024 14:16:45 +0900
Subject: [PATCH 7/9] improve pyi

---
 python/py_src/sudachipy/sudachipy.pyi | 47 ++++++++++++++++++---------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/python/py_src/sudachipy/sudachipy.pyi b/python/py_src/sudachipy/sudachipy.pyi
index 705b62af..0b1c4fc2 100644
--- a/python/py_src/sudachipy/sudachipy.pyi
+++ b/python/py_src/sudachipy/sudachipy.pyi
@@ -28,12 +28,20 @@ PartialPOS = Union[
     Tuple[()],
 ]
 
-# Fields that can be specified for partial dictionary loading.
-# See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
+"""
+Fields that can be specified for partial dictionary loading.
+See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
+"""
 FieldSet = Optional[Set[Literal["surface", "pos", "normalized_form", "dictionary_form", "reading_form",
                                 "word_structure", "split_a", "split_b", "synonym_group_id"]]]
 
 
+"""
+Strings that can be parsed as SplitMode
+"""
+SplitModeStr = Literal["A", "a", "B", "b", "C", "c"]
+
+
 class SplitMode:
     """
     Unit to split text.
@@ -48,11 +56,12 @@ class SplitMode:
     C: ClassVar[SplitMode] = ...
 
     @classmethod
-    def __init__(cls, mode: str = "C") -> None:
+    def __init__(cls, mode: Optional[SplitModeStr] = "C") -> None:
         """
         Creates a split mode from a string value.
 
         :param mode: string representation of the split mode. One of [A,B,C] in captital or lower case.
+            If None, returns SplitMode.C.
         """
         ...
 
@@ -88,10 +97,10 @@ class Dictionary:
         ...
 
     def create(self,
-               mode: Union[SplitMode, Literal["A", "B", "C"]] = SplitMode.C,
-               fields: FieldSet = None,
+               mode: Union[SplitMode, SplitModeStr, None] = SplitMode.C,
+               fields: Optional[FieldSet] = None,
                *,
-               projection: str = None) -> Tokenizer:
+               projection: Optional[str] = None) -> Tokenizer:
         """
         Creates a sudachi tokenizer.
 
@@ -118,12 +127,12 @@ class Dictionary:
         ...
 
     def pre_tokenizer(self,
-                      mode: Union[SplitMode, Literal["A", "B", "C"]] = "C",
-                      fields: FieldSet = None,
+                      mode: Union[SplitMode, SplitModeStr, None] = SplitMode.C,
+                      fields: Optional[FieldSet] = None,
                       handler: Optional[Callable[[
                           int, object, MorphemeList], list]] = None,
                       *,
-                      projection: str = None) -> object:
+                      projection: Optional[str] = None) -> object:
         """
         Creates HuggingFace Tokenizers-compatible PreTokenizer.
         Requires package `tokenizers` to be installed.
@@ -230,7 +239,10 @@ class Morpheme:
         """
         ...
 
-    def split(self, mode: Union[SplitMode, Literal["A", "B", "C"]], out: Optional[MorphemeList] = None, add_single: bool = True) -> MorphemeList:
+    def split(self,
+              mode: Union[SplitMode, SplitModeStr],
+              out: Optional[MorphemeList] = None,
+              add_single: bool = True) -> MorphemeList:
         """
         Returns sub-morphemes in the provided split mode.
 
@@ -288,7 +300,7 @@ class MorphemeList:
     def __init__(self) -> None: ...
 
     @classmethod
-    def empty(cls, dict) -> MorphemeList:
+    def empty(cls, dict: Dictionary) -> MorphemeList:
         """
         Returns an empty morpheme list with dictionary.
         """
@@ -306,7 +318,7 @@ class MorphemeList:
         """
         ...
 
-    def __getitem__(self, index) -> Morpheme: ...
+    def __getitem__(self, index: int) -> Morpheme: ...
     def __iter__(self) -> Iterator[Morpheme]: ...
     def __len__(self) -> int: ...
 
@@ -318,11 +330,13 @@ class Tokenizer:
     Create using Dictionary.create method.
     """
     SplitMode: ClassVar[SplitMode] = ...
+
     @classmethod
     def __init__(cls) -> None: ...
 
-    def tokenize(self, text: str,
-                 mode: Union[SplitMode, Literal["A", "B", "C"]] = ...,
+    def tokenize(self,
+                 text: str,
+                 mode: Union[SplitMode, SplitModeStr, None] = None,
                  out: Optional[MorphemeList] = None) -> MorphemeList:
         """
         Break text into morphemes.
@@ -359,6 +373,7 @@ class WordInfo:
     surface: ClassVar[str] = ...
     synonym_group_ids: ClassVar[List[int]] = ...
     word_structure: ClassVar[List[int]] = ...
+
     @classmethod
     def __init__(self) -> None: ...
     def length(self) -> int: ...
@@ -374,11 +389,11 @@ class PosMatcher:
     def __iter__(self) -> Iterator[POS]: ...
     def __len__(self) -> int: ...
 
-    def __call__(self, m: Morpheme) -> bool:
+    def __call__(self, /, m: Morpheme) -> bool:
         """
         Checks whether a morpheme has matching POS.
 
-        :param m: morpheme.
+        :param m: a morpheme to check.
         :return: if morpheme has matching POS.
         """
         ...

From d1c31655292adc80e1b9a1051bb4b90752500e6f Mon Sep 17 00:00:00 2001
From: mh-northlander <mh.northlander+github@gmail.com>
Date: Mon, 8 Jul 2024 14:51:00 +0900
Subject: [PATCH 8/9] add deprecated directive and fix

---
 python/py_src/sudachipy/sudachipy.pyi | 9 +++++++++
 python/src/build.rs                   | 4 ++--
 python/src/dictionary.rs              | 4 ++--
 python/src/morpheme.rs                | 6 ++++++
 python/src/tokenizer.rs               | 2 ++
 5 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/python/py_src/sudachipy/sudachipy.pyi b/python/py_src/sudachipy/sudachipy.pyi
index 0b1c4fc2..ca39a95c 100644
--- a/python/py_src/sudachipy/sudachipy.pyi
+++ b/python/py_src/sudachipy/sudachipy.pyi
@@ -47,7 +47,9 @@ class SplitMode:
     Unit to split text.
 
     A == short mode
+
     B == middle mode
+
     C == long mode
     """
 
@@ -205,6 +207,9 @@ class Morpheme:
     def get_word_info(self) -> WordInfo:
         """
         Returns the word info.
+
+        ..deprecated:: v0.6.0
+           Users should not touch the raw WordInfo.
         """
         ...
 
@@ -293,6 +298,7 @@ class Morpheme:
 class MorphemeList:
     """
     A list of morphemes.
+
     An object can not be instantiated manually.
     Use Tokenizer.tokenize("") to create an empty morpheme list.
     """
@@ -303,6 +309,9 @@ class MorphemeList:
     def empty(cls, dict: Dictionary) -> MorphemeList:
         """
         Returns an empty morpheme list with dictionary.
+
+        .. deprecated::
+            Use Tokenizer.tokenize("") if you need.
         """
         ...
 
diff --git a/python/src/build.rs b/python/src/build.rs
index 2b2ce94f..b37ed807 100644
--- a/python/src/build.rs
+++ b/python/src/build.rs
@@ -65,7 +65,7 @@ fn create_file(p: &Path) -> std::io::Result<File> {
 /// :param output: Path to output built dictionray.
 /// :param description: A description text to embed in the dictionary.
 /// :return: A build report, list of (part, size, time).
-/// 
+///
 /// :type matrix: pathlib.Path | str | bytes
 /// :type lex: list[pathlib.Path | str | bytes]
 /// :type output: pathlib.Path | str
@@ -106,7 +106,7 @@ fn build_system_dic<'p>(
 /// :param output: Path to output built dictionray.
 /// :param description: A description text to embed in the dictionary.
 /// :return: A build report, list of (part, size, time).
-/// 
+///
 /// :type system: pathlib.Path | str
 /// :type lex: list[pathlib.Path | str | bytes]
 /// :type output: pathlib.Path | str
diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
index 2b5c849b..22241f95 100644
--- a/python/src/dictionary.rs
+++ b/python/src/dictionary.rs
@@ -87,7 +87,7 @@ impl PyDicData {
 /// :param config_path: path to the configuration JSON file, config json as a string, or a [sudachipy.Config] object.
 /// :param config: alias to config_path, only one of them can be specified at the same time.
 /// :param resource_dir: path to the resource directory folder.
-/// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict_type> packages on PyPI: https://pypi.org/search/?q=sudachidict.
+/// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict> packages on PyPI: https://pypi.org/search/?q=sudachidict.
 ///     Also, can be an _absolute_ path to a compiled dictionary file.
 /// :param dict_type: deprecated alias to dict.
 ///
@@ -114,7 +114,7 @@ impl PyDictionary {
     /// :param config_path: path to the configuration JSON file, config json as a string, or a [sudachipy.Config] object.
     /// :param config: alias to config_path, only one of them can be specified at the same time.
     /// :param resource_dir: path to the resource directory folder.
-    /// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict_type> packages on PyPI: https://pypi.org/search/?q=sudachidict.
+    /// :param dict: type of pre-packaged dictionary, referring to sudachidict_<dict> packages on PyPI: https://pypi.org/search/?q=sudachidict.
     ///     Also, can be an _absolute_ path to a compiled dictionary file.
     /// :param dict_type: deprecated alias to dict.
     ///
diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs
index 522d8ecd..b9367e10 100644
--- a/python/src/morpheme.rs
+++ b/python/src/morpheme.rs
@@ -93,6 +93,9 @@ impl PyMorphemeListWrapper {
 #[pymethods]
 impl PyMorphemeListWrapper {
     /// Returns an empty morpheme list with dictionary.
+    ///
+    /// .. deprecated:: 0.6.0
+    ///     Use Tokenizer.tokenize("") if you need.
     #[classmethod]
     #[pyo3(text_signature = "(dict: Dictionary) -> MorphemeList")]
     fn empty(_cls: &PyType, py: Python, dict: &PyDictionary) -> PyResult<Self> {
@@ -439,6 +442,9 @@ impl PyMorpheme {
     }
 
     /// Returns the word info.
+    ///
+    /// ..deprecated:: v0.6.0
+    ///    Users should not touch the raw WordInfo.
     #[pyo3(text_signature = "(self, /) -> WordInfo")]
     fn get_word_info(&self, py: Python) -> PyResult<PyWordInfo> {
         let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?;
diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
index c14f7076..d96763de 100644
--- a/python/src/tokenizer.rs
+++ b/python/src/tokenizer.rs
@@ -32,7 +32,9 @@ use crate::morpheme::{PyMorphemeListWrapper, PyProjector};
 /// Unit to split text.
 ///
 /// A == short mode
+///
 /// B == middle mode
+///
 /// C == long mode
 ///
 /// :param mode: string representation of the split mode. One of [A,B,C] in captital or lower case.

From 4a3da5bacd868112165ac5f3c5c49d5f82eba48f Mon Sep 17 00:00:00 2001
From: mh-northlander <mh.northlander+github@gmail.com>
Date: Wed, 25 Sep 2024 09:36:20 +0900
Subject: [PATCH 9/9] update Dictionary arg name

---
 python/README.md | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/python/README.md b/python/README.md
index 4d95d7fb..b1ad3e5e 100644
--- a/python/README.md
+++ b/python/README.md
@@ -66,7 +66,7 @@ $ pip install sudachipy
 
 ### Step 2. Get a Dictionary
 
-You can get dictionary as a Python package. It make take a while to download the dictionary file (around 70MB for the `core` edition).
+You can get dictionary as a Python package. It may take a while to download the dictionary file (around 70MB for the `core` edition).
 
 ```bash
 $ pip install sudachidict_core
@@ -209,7 +209,7 @@ There are three editions of Sudachi Dictionary, namely, `small`, `core`, and `fu
 
 SudachiPy uses `sudachidict_core` by default.
 
-Dictionaries are installed as Python packages `sudachidict_small`, `sudachidict_core`, and `sudachidict_full`.
+Dictionaries can be installed as Python packages `sudachidict_small`, `sudachidict_core`, and `sudachidict_full`.
 
 * [SudachiDict-small · PyPI](https://pypi.org/project/SudachiDict-small/)
 * [SudachiDict-core · PyPI](https://pypi.org/project/SudachiDict-core/)
@@ -234,19 +234,19 @@ $ echo "外国人参政権" | sudachipy -s full
 
 ### Dictionary option: Python package
 
-You can specify the dictionary with the `Dicionary()` argument; `config_path` or `dict_type`.
+You can specify the dictionary with the `Dicionary()` argument; `config` or `dict`.
 
 ```python
-class Dictionary(config_path=None, resource_dir=None, dict_type=None)
+class Dictionary(config=None, resource_dir=None, dict=None)
 ```
 
-1. `config_path`
-    * You can specify the file path to the setting file with `config_path` (See [Dictionary in The Setting File](#Dictionary in The Setting File) for the detail).
+1. `config`
+    * You can specify the file path to the setting file with `config` (See [Dictionary in The Setting File](#Dictionary in The Setting File) for the detail).
     * If the dictionary file is specified in the setting file as `systemDict`, SudachiPy will use the dictionary.
-2. `dict_type`
-    * You can also specify the dictionary type with `dict_type`.
-    * The available arguments are `small`, `core`, or `full`.
-    * If different dictionaries are specified with `config_path` and `dict_type`, **a dictionary defined `dict_type` overrides** those defined in the config path.
+2. `dict`
+    * You can also specify the dictionary type with `dict`.
+    * The available arguments are `small`, `core`, `full`, or a path to the dictionary file.
+    * If different dictionaries are specified with `config` and `dict`, **a dictionary defined `dict` overrides** those defined in the config.
 
 ```python
 from sudachipy import Dictionary
@@ -255,16 +255,16 @@ from sudachipy import Dictionary
 tokenizer_obj = Dictionary().create()
 
 # The dictionary given by the `systemDict` key in the config file (/path/to/sudachi.json) will be used
-tokenizer_obj = Dictionary(config_path="/path/to/sudachi.json").create()
+tokenizer_obj = Dictionary(config="/path/to/sudachi.json").create()
 
-# The dictionary specified by `dict_type` will be set.
-tokenizer_obj = Dictionary(dict_type="core").create()  # sudachidict_core (same as default)
-tokenizer_obj = Dictionary(dict_type="small").create()  # sudachidict_small
-tokenizer_obj = Dictionary(dict_type="full").create()  # sudachidict_full
+# The dictionary specified by `dict` will be used.
+tokenizer_obj = Dictionary(dict="core").create()  # sudachidict_core (same as default)
+tokenizer_obj = Dictionary(dict="small").create()  # sudachidict_small
+tokenizer_obj = Dictionary(dict="full").create()  # sudachidict_full
 
-# The dictionary specified by `dict_type` overrides those defined in the config path.
+# The dictionary specified by `dict` overrides those defined in the config.
 # In the following code, `sudachidict_full` will be used regardless of a dictionary defined in the config file.
-tokenizer_obj = Dictionary(config_path="/path/to/sudachi.json", dict_type="full").create()
+tokenizer_obj = Dictionary(config="/path/to/sudachi.json", dict="full").create()
 ```
 
 
@@ -303,10 +303,8 @@ Then specify your `sudachi.json` with the `-r` option.
 $ sudachipy -r path/to/sudachi.json
 ```
 
-
 You can build a user dictionary with the subcommand `ubuild`.
 
-
 ```bash
 $ sudachipy ubuild -h
 usage: sudachipy ubuild [-h] [-o file] [-d string] -s file file [file ...]