From 113ad4cea2c77ea08ed097a299fa08ebb129da01 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 4 Jun 2024 11:01:37 +0900 Subject: [PATCH 1/8] update pyo3 to v0.21 --- python/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Cargo.toml b/python/Cargo.toml index e1143743..4c5513d9 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -15,7 +15,7 @@ name = "sudachipy" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.20", features = ["extension-module"] } +pyo3 = { version = "0.21", features = ["extension-module", "gil-refs"] } thread_local = "1.1" # Apache 2.0/MIT scopeguard = "1" # Apache 2.0/MIT From 4d0d1c0d253af545e0a6006b012e2d74ca0fff59 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Tue, 4 Jun 2024 16:07:09 +0900 Subject: [PATCH 2/8] use Bound instead of PyCell --- python/src/dictionary.rs | 24 ++++++++++++------------ python/src/morpheme.rs | 10 +++++----- python/src/pos_matcher.rs | 4 ++-- python/src/pretokenizer.rs | 12 ++++++------ python/src/tokenizer.rs | 8 ++++---- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs index bc333c8e..9a6f062d 100644 --- a/python/src/dictionary.rs +++ b/python/src/dictionary.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -319,7 +319,7 @@ impl PyDictionary { let projector = resolve_projection(passed, &dict.projection); let internal = PyPretokenizer::new(dict, mode, required_fields, handler, projector); - let internal_cell = PyCell::new(py, internal)?; + let internal_cell = Bound::new(py, internal)?; let module = py.import("tokenizers.pre_tokenizers")?; module .getattr("PreTokenizer")? @@ -340,18 +340,18 @@ impl PyDictionary { /// :type surface: str /// :type out: sudachipy.MorphemeList #[pyo3(text_signature = "($self, surface, out = None) -> sudachipy.MorphemeList")] - fn lookup<'p>( - &'p self, - py: Python<'p>, - surface: &'p str, - out: Option<&'p PyCell>, - ) -> PyResult<&'p PyCell> { + fn lookup<'py>( + &'py self, + py: Python<'py>, + surface: &'py str, + out: Option>, + ) -> PyResult> { let l = match out { Some(l) => l, - None => PyCell::new( - py, - PyMorphemeListWrapper::new(self.dictionary.clone().unwrap()), - )?, + None => { + let list = PyMorphemeListWrapper::new(self.dictionary.clone().unwrap()); + Bound::new(py, list)? + } }; // this needs to be a variable diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs index ad3929dd..1c8cf553 100644 --- a/python/src/morpheme.rs +++ b/python/src/morpheme.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -124,7 +124,7 @@ impl PyMorphemeListWrapper { self.size(py) } - fn __getitem__(slf: &PyCell, mut idx: isize) -> PyResult { + fn __getitem__(slf: Bound, mut idx: isize) -> PyResult { let list = slf.borrow(); let py = slf.py(); let len = list.size(py) as isize; @@ -362,9 +362,9 @@ impl PyMorpheme { &'py self, py: Python<'py>, mode: &PyAny, - out: Option<&'py PyCell>, + out: Option>, add_single: Option, - ) -> PyResult<&'py PyCell> { + ) -> PyResult> { let list = self.list(py); let mode = extract_mode(py, mode)?; @@ -372,7 +372,7 @@ impl PyMorpheme { let out_cell = match out { None => { let list = list.empty_clone(py); - PyCell::new(py, list)? + Bound::new(py, list)? } Some(r) => r, }; diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs index 7c6a884d..062d0d0c 100644 --- a/python/src/pos_matcher.rs +++ b/python/src/pos_matcher.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -210,7 +210,7 @@ impl PyPosIter { #[pymethods] impl PyPosIter { - fn __iter__(slf: &PyCell) -> &PyCell { + fn __iter__(slf: Bound) -> Bound { slf } diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs index 755f040b..303f7645 100644 --- a/python/src/pretokenizer.rs +++ b/python/src/pretokenizer.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -157,11 +157,11 @@ impl PyPretokenizer { } /// Entry function for tokenization - pub fn pre_tokenize<'p>( - self_: &'p PyCell, - py: Python<'p>, - data: &'p PyAny, - ) -> PyResult<&'p PyAny> { + pub fn pre_tokenize<'py>( + self_: Bound<'py, Self>, + py: Python<'py>, + data: &'py PyAny, + ) -> PyResult<&'py PyAny> { data.call_method1("split", PyTuple::new(py, [self_])) } } diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs index 558d02cb..5f364380 100644 --- a/python/src/tokenizer.rs +++ b/python/src/tokenizer.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -138,8 +138,8 @@ impl PyTokenizer { text: &'py str, mode: Option<&PyAny>, logger: Option, - out: Option<&'py PyCell>, - ) -> PyResult<&'py PyCell> { + out: Option>, + ) -> PyResult> { // restore default mode on scope exit let mode = match mode { None => None, @@ -164,7 +164,7 @@ impl PyTokenizer { let morphemes = MorphemeList::empty(dict); let wrapper = PyMorphemeListWrapper::from_components(morphemes, self.projection.clone()); - PyCell::new(py, wrapper)? + Bound::new(py, wrapper)? } Some(list) => list, }; From 2787346223d129f59adb1fc690789373acecd163 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Thu, 6 Jun 2024 10:34:58 +0900 Subject: [PATCH 3/8] deactivate gil-ref feature and fix related deprecation warnings --- python/Cargo.toml | 2 +- python/src/build.rs | 100 ++++++++++++++++++++++--------------- python/src/dictionary.rs | 83 +++++++++++++++--------------- python/src/lib.rs | 4 +- python/src/morpheme.rs | 36 ++++++------- python/src/pos_matcher.rs | 23 +++++---- python/src/pretokenizer.rs | 53 ++++++++++---------- python/src/projection.rs | 35 ++++++------- python/src/tokenizer.rs | 2 +- 9 files changed, 183 insertions(+), 155 deletions(-) diff --git a/python/Cargo.toml b/python/Cargo.toml index 4c5513d9..6e564c2e 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -15,7 +15,7 @@ name = "sudachipy" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.21", features = ["extension-module", "gil-refs"] } +pyo3 = { version = "0.21", features = ["extension-module"] } thread_local = "1.1" # Apache 2.0/MIT scopeguard = "1" # Apache 2.0/MIT diff --git a/python/src/build.rs b/python/src/build.rs index a6005b26..40e52c34 100644 --- a/python/src/build.rs +++ b/python/src/build.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,17 +26,17 @@ use sudachi::config::Config; use sudachi::dic::build::{DataSource, DictBuilder}; use sudachi::dic::dictionary::JapaneseDictionary; -pub fn register_functions(m: &PyModule) -> PyResult<()> { +pub fn register_functions(m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(build_system_dic, m)?)?; m.add_function(wrap_pyfunction!(build_user_dic, m)?)?; Ok(()) } -fn to_stats(py: Python, builder: DictBuilder) -> PyResult<&PyList> { - let stats = PyList::empty(py); +fn to_stats(py: Python, builder: DictBuilder) -> PyResult> { + let stats = PyList::empty_bound(py); for p in builder.report() { - let t = PyTuple::new( + let t = PyTuple::new_bound( py, [ p.part().into_py(py), @@ -60,23 +60,26 @@ fn create_file(p: &Path) -> std::io::Result { #[pyfunction] #[pyo3(text_signature = "(matrix, lex, output, description=None) -> list")] -fn build_system_dic<'p>( - py: Python<'p>, - matrix: &'p PyAny, - lex: &'p PyList, - output: &'p PyAny, +fn build_system_dic<'py>( + py: Python<'py>, + matrix: &Bound<'py, PyAny>, + lex: &Bound<'py, PyList>, + output: &Bound<'py, PyAny>, description: Option<&str>, -) -> PyResult<&'p PyList> { +) -> PyResult> { let mut builder = DictBuilder::new_system(); description.map(|d| builder.set_description(d)); - let matrix_src = as_data_source(py, matrix)?; + let matrix_path = resolve_as_pypathstr(py, matrix)?; + let matrix_src = as_data_source(matrix_path.as_ref(), matrix)?; errors::wrap_ctx(builder.read_conn(matrix_src), matrix)?; for f in lex.iter() { - let lex_src = as_data_source(py, &f)?; + let lex_path = resolve_as_pypathstr(py, &f)?; + let lex_src = as_data_source(lex_path.as_ref(), &f)?; errors::wrap_ctx(builder.read_lexicon(lex_src), &f)?; } - let out_file = match as_data_source(py, output)? { + let out_path = resolve_as_pypathstr(py, output)?; + let out_file = match as_data_source(out_path.as_ref(), output)? { DataSource::File(p) => errors::wrap_ctx(create_file(p), p)?, DataSource::Data(_) => return errors::wrap(Err("can't use bytes for output")), }; @@ -89,14 +92,15 @@ fn build_system_dic<'p>( #[pyfunction] #[pyo3(text_signature = "(system, lex, output, description=None) -> list")] -fn build_user_dic<'p>( - py: Python<'p>, - system: &'p PyAny, - lex: &'p PyList, - output: &'p PyAny, +fn build_user_dic<'py>( + py: Python<'py>, + system: &Bound<'py, PyAny>, + lex: &Bound<'py, PyList>, + output: &Bound<'py, PyAny>, description: Option<&str>, -) -> PyResult<&'p PyList> { - let system_dic = match as_data_source(py, system)? { +) -> PyResult> { + let system_path = resolve_as_pypathstr(py, system)?; + let system_dic = match as_data_source(system_path.as_ref(), system)? { DataSource::File(f) => { let resource_path = get_default_resource_dir(py)?; let cfg = Config::minimal_at(resource_path).with_system_dic(f); @@ -113,10 +117,12 @@ fn build_user_dic<'p>( description.map(|d| builder.set_description(d)); for f in lex.iter() { - let lex_src = as_data_source(py, &f)?; + let lex_path = resolve_as_pypathstr(py, &f)?; + let lex_src = as_data_source(lex_path.as_ref(), &f)?; errors::wrap_ctx(builder.read_lexicon(lex_src), &f)?; } - let out_file = match as_data_source(py, output)? { + let out_path = resolve_as_pypathstr(py, output)?; + let out_file = match as_data_source(out_path.as_ref(), output)? { DataSource::File(p) => errors::wrap_ctx(create_file(p), p)?, DataSource::Data(_) => return errors::wrap(Err("can't use bytes for output")), }; @@ -127,25 +133,39 @@ fn build_user_dic<'p>( to_stats(py, builder) } -fn as_data_source<'p>(py: Python<'p>, data: &'p PyAny) -> PyResult> { - let path = py - .import("pathlib")? - .getattr("Path")? - .downcast::()?; +fn resolve_as_pypathstr<'py>( + py: Python<'py>, + data: &Bound<'py, PyAny>, +) -> PyResult>> { + let binding = py.import_bound("pathlib")?.getattr("Path")?; + let path = binding.downcast::()?; if data.is_instance(path)? { - let pypath = data.call_method0("resolve")?.str()?; - Ok(DataSource::File(Path::new(pypath.to_str()?))) + Ok(Some(data.call_method0("resolve")?.str()?)) } else if data.is_instance_of::() { - let pypath = data.str()?; - Ok(DataSource::File(Path::new(pypath.to_str()?))) - } else if data.is_instance_of::() { - let data = data.downcast::()?; - Ok(DataSource::Data(data.as_bytes())) + Ok(Some(data.str()?)) } else { - Err(pyo3::exceptions::PyValueError::new_err(format!( - "data source should can be only Path, bytes or str, was {}: {}", - data, - data.get_type() - ))) + Ok(None) + } +} + +fn as_data_source<'py>( + resolved_path: Option<&'py Bound<'py, PyString>>, + original_obj: &'py Bound<'py, PyAny>, +) -> PyResult> { + match resolved_path { + Some(pystr) => Ok(DataSource::File(Path::new(pystr.to_str()?))), + None => { + if original_obj.is_instance_of::() { + Ok(DataSource::Data( + original_obj.downcast::()?.as_bytes(), + )) + } else { + Err(pyo3::exceptions::PyValueError::new_err(format!( + "data source should can be only Path, bytes or str, was {}: {}", + original_obj, + original_obj.get_type() + ))) + } + } } } diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs index 9a6f062d..251267ab 100644 --- a/python/src/dictionary.rs +++ b/python/src/dictionary.rs @@ -103,11 +103,11 @@ impl PyDictionary { #[pyo3(signature=(config_path = None, resource_dir = None, dict = None, dict_type = None, *, config = None))] fn new( py: Python, - config_path: Option<&PyAny>, + config_path: Option<&Bound>, resource_dir: Option, dict: Option<&str>, dict_type: Option<&str>, - config: Option<&PyAny>, + config: Option<&Bound>, ) -> PyResult { if config.is_some() && config_path.is_some() { return Err(SudachiErr::new_err("Both config and config_path options were specified at the same time, use one of them")); @@ -131,10 +131,10 @@ impl PyDictionary { }; if dict_type.is_some() { - let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?; - PyErr::warn( + let cat = PyModule::import_bound(py, "builtins")?.getattr("DeprecationWarning")?; + PyErr::warn_bound( py, - cat, + &cat, "Parameter dict_type of Dictionary() is deprecated, use dict instead", 1, )?; @@ -189,7 +189,7 @@ impl PyDictionary { .pos_list .iter() .map(|pos| { - let tuple: Py = PyTuple::new(py, pos).into_py(py); + let tuple: Py = PyTuple::new_bound(py, pos).into_py(py); tuple }) .collect(); @@ -226,9 +226,9 @@ impl PyDictionary { fn create<'py>( &'py self, py: Python<'py>, - mode: Option<&'py PyAny>, - fields: Option<&'py PySet>, - projection: Option<&'py PyString>, + mode: Option<&Bound<'py, PyAny>>, + fields: Option<&Bound<'py, PySet>>, + projection: Option<&Bound<'py, PyString>>, ) -> PyResult { let mode = match mode { Some(m) => extract_mode(py, m)?, @@ -263,7 +263,11 @@ impl PyDictionary { /// /// :param target: can be either a callable or list of POS partial tuples #[pyo3(text_signature = "($self, target)")] - fn pos_matcher<'py>(&'py self, py: Python<'py>, target: &PyAny) -> PyResult { + fn pos_matcher<'py>( + &'py self, + py: Python<'py>, + target: &Bound<'py, PyAny>, + ) -> PyResult { PyPosMatcher::create(py, self.dictionary.as_ref().unwrap(), target) } @@ -286,21 +290,21 @@ impl PyDictionary { text_signature = "($self, mode, fields, handler) -> tokenizers.PreTokenizer", signature = (mode = None, fields = None, handler = None, *, projection = None) )] - fn pre_tokenizer<'p>( - &'p self, - py: Python<'p>, - mode: Option<&PyAny>, - fields: Option<&PySet>, + fn pre_tokenizer<'py>( + &'py self, + py: Python<'py>, + mode: Option<&Bound<'py, PyAny>>, + fields: Option<&Bound<'py, PySet>>, handler: Option>, - projection: Option<&PyString>, - ) -> PyResult<&'p PyAny> { + projection: Option<&Bound<'py, PyString>>, + ) -> PyResult> { let mode = match mode { Some(m) => extract_mode(py, m)?, None => Mode::C, }; let subset = parse_field_subset(fields)?; if let Some(h) = handler.as_ref() { - if !h.as_ref(py).is_callable() { + if !h.bind(py).is_callable() { return Err(SudachiErr::new_err("handler must be callable")); } } @@ -320,11 +324,11 @@ impl PyDictionary { let projector = resolve_projection(passed, &dict.projection); let internal = PyPretokenizer::new(dict, mode, required_fields, handler, projector); let internal_cell = Bound::new(py, internal)?; - let module = py.import("tokenizers.pre_tokenizers")?; + let module = py.import_bound("tokenizers.pre_tokenizers")?; module .getattr("PreTokenizer")? .getattr("custom")? - .call1(PyTuple::new(py, [internal_cell])) + .call1(PyTuple::new_bound(py, [internal_cell])) } /// Look up morphemes in the binary dictionary without performing the analysis. @@ -374,9 +378,9 @@ impl PyDictionary { /// Get POS Tuple by its id #[pyo3(text_signature = "($self, pos_id: int)")] - fn pos_of<'p>(&'p self, py: Python<'p>, pos_id: usize) -> Option<&'p PyTuple> { + fn pos_of<'py>(&'py self, py: Python<'py>, pos_id: usize) -> Option<&Bound<'py, PyTuple>> { let dic = self.dictionary.as_ref().unwrap(); - dic.pos.get(pos_id).map(|x| x.as_ref(py)) + dic.pos.get(pos_id).map(|x| x.bind(py)) } fn __repr__(&self) -> PyResult { @@ -411,10 +415,9 @@ fn config_repr(cfg: &Config) -> Result { Ok(result) } -pub(crate) fn extract_mode<'py>(py: Python<'py>, mode: &'py PyAny) -> PyResult { +pub(crate) fn extract_mode<'py>(py: Python<'py>, mode: &Bound<'py, PyAny>) -> PyResult { if mode.is_instance_of::() { - let mode = mode.str()?.to_str()?; - Mode::from_str(mode).map_err(|e| SudachiErr::new_err(e).into()) + Mode::from_str(mode.str()?.to_str()?).map_err(|e| SudachiErr::new_err(e).into()) } else if mode.is_instance_of::() { let mode = mode.extract::()?; Ok(Mode::from(mode)) @@ -427,9 +430,10 @@ fn read_config_from_fs(path: Option<&Path>) -> PyResult { wrap(ConfigBuilder::from_opt_file(path)) } -fn read_config(config_opt: &PyAny) -> PyResult { +fn read_config(config_opt: &Bound) -> PyResult { if config_opt.is_instance_of::() { - let config_str = config_opt.str()?.to_str()?.trim(); + let config_pystr = config_opt.str()?; + let config_str = config_pystr.to_str()?.trim(); // looks like json if config_str.starts_with("{") && config_str.ends_with("}") { let result = ConfigBuilder::from_bytes(config_str.as_bytes()); @@ -445,10 +449,10 @@ fn read_config(config_opt: &PyAny) -> PyResult { ))); } let py = config_opt.py(); - let cfg_type = py.import("sudachipy.config")?.getattr("Config")?; - if config_opt.is_instance(cfg_type)? { + let cfg_type = py.import_bound("sudachipy.config")?.getattr("Config")?; + if config_opt.is_instance(&cfg_type)? { let cfg_as_str = config_opt.call_method0("as_jsons")?; - return read_config(cfg_as_str); + return read_config(&cfg_as_str); } Err(SudachiErr::new_err(( format!("passed config was not a string, json object or sudachipy.config.Config object"), @@ -457,24 +461,22 @@ fn read_config(config_opt: &PyAny) -> PyResult { } pub(crate) fn read_default_config(py: Python) -> PyResult { - let path = PyModule::import(py, "sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?; + let path = PyModule::import_bound(py, "sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?; let path = path.downcast::()?.to_str()?; let path = PathBuf::from(path); wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path) } pub(crate) fn get_default_resource_dir(py: Python) -> PyResult { - let path = PyModule::import(py, "sudachipy")?.getattr("_DEFAULT_RESOURCEDIR")?; + let path = PyModule::import_bound(py, "sudachipy")?.getattr("_DEFAULT_RESOURCEDIR")?; let path = path.downcast::()?.to_str()?; Ok(PathBuf::from(path)) } fn find_dict_path(py: Python, dict_type: &str) -> PyResult { - let pyfunc = PyModule::import(py, "sudachipy")?.getattr("_find_dict_path")?; - let path = pyfunc - .call1((dict_type,))? - .downcast::()? - .to_str()?; + let pyfunc = PyModule::import_bound(py, "sudachipy")?.getattr("_find_dict_path")?; + let path = pyfunc.call1((dict_type,))?; + let path = path.downcast::()?.to_str()?; Ok(PathBuf::from(path)) } @@ -491,15 +493,14 @@ fn locate_system_dict(py: Python, path: &Path) -> PyResult { } } -fn parse_field_subset(data: Option<&PySet>) -> PyResult { +fn parse_field_subset(data: Option<&Bound>) -> PyResult { if data.is_none() { return Ok(InfoSubset::all()); } let mut subset = InfoSubset::empty(); - for el in data.unwrap().iter() { - let s = el.str()?.to_str()?; - subset |= match s { + for elem in data.unwrap().iter() { + subset |= match elem.str()?.to_str()? { "surface" => InfoSubset::SURFACE, "pos" | "pos_id" => InfoSubset::POS_ID, "normalized_form" => InfoSubset::NORMALIZED_FORM, diff --git a/python/src/lib.rs b/python/src/lib.rs index 68a9c91d..f2c13703 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ mod word_info; /// module root #[pymodule] -fn sudachipy(_py: Python, m: &PyModule) -> PyResult<()> { +fn sudachipy(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs index 1c8cf553..69418d32 100644 --- a/python/src/morpheme.rs +++ b/python/src/morpheme.rs @@ -91,11 +91,11 @@ impl PyMorphemeListWrapper { /// Returns an empty morpheme list with dictionary #[classmethod] #[pyo3(text_signature = "(dict: sudachipy.Dictionary) -> sudachipy.MorphemeList")] - fn empty(_cls: &PyType, py: Python, dict: &PyDictionary) -> PyResult { - let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?; - PyErr::warn( + fn empty(_cls: &Bound, py: Python, dict: &PyDictionary) -> PyResult { + let cat = PyModule::import_bound(py, "builtins")?.getattr("DeprecationWarning")?; + PyErr::warn_bound( py, - cat, + &cat, "Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList.", 1, )?; @@ -150,7 +150,7 @@ impl PyMorphemeListWrapper { }) } - fn __str__<'py>(&'py self, py: Python<'py>) -> &PyString { + fn __str__<'py>(&'py self, py: Python<'py>) -> Bound<'py, PyString> { // do a simple tokenization __str__ let list = self.internal(py); let mut result = String::with_capacity(list.surface().len() * 2); @@ -161,10 +161,10 @@ impl PyMorphemeListWrapper { result.push_str(" "); } } - PyString::new(py, result.as_str()) + PyString::new_bound(py, result.as_str()) } - fn __repr__(slf: Py, py: Python) -> PyResult<&PyString> { + fn __repr__(slf: Py, py: Python) -> PyResult> { let self_ref = slf.borrow(py); let list = self_ref.internal(py); let mut result = String::with_capacity(list.surface().len() * 10); @@ -182,7 +182,7 @@ impl PyMorphemeListWrapper { result.push_str(",\n"); } result.push_str("]>"); - Ok(PyString::new(py, result.as_str())) + Ok(PyString::new_bound(py, result.as_str())) } fn __iter__(slf: Py) -> PyMorphemeIter { @@ -292,19 +292,19 @@ impl PyMorpheme { /// Returns the substring of input text corresponding to the morpheme, or a projection if one is configured #[pyo3(text_signature = "($self) -> str")] - fn surface<'py>(&'py self, py: Python<'py>) -> &'py PyString { + fn surface<'py>(&'py self, py: Python<'py>) -> Bound<'py, PyString> { let list = self.list(py); let morph = self.morph(py); match list.projection() { - None => PyString::new(py, morph.surface().deref()), + None => PyString::new_bound(py, morph.surface().deref()), Some(proj) => proj.project(morph.deref(), py), } } /// Returns the substring of input text corresponding to the morpheme regardless the configured projection #[pyo3(text_signature = "($self) -> str")] - fn raw_surface<'py>(&'py self, py: Python<'py>) -> &'py PyString { - PyString::new(py, self.morph(py).surface().deref()) + fn raw_surface<'py>(&'py self, py: Python<'py>) -> Bound<'py, PyString> { + PyString::new_bound(py, self.morph(py).surface().deref()) } /// Returns the part of speech as a six-element tuple. @@ -361,7 +361,7 @@ impl PyMorpheme { fn split<'py>( &'py self, py: Python<'py>, - mode: &PyAny, + mode: &Bound<'py, PyAny>, out: Option>, add_single: Option, ) -> PyResult> { @@ -424,17 +424,17 @@ impl PyMorpheme { /// Returns the list of synonym group ids #[pyo3(text_signature = "($self) -> List[int]")] - fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> &'py PyList { + fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> Bound { let mref = self.morph(py); let ids = mref.get_word_info().synonym_group_ids(); - PyList::new(py, ids) + PyList::new_bound(py, ids) } /// Returns the word info #[pyo3(text_signature = "($self) -> sudachipy.WordInfo")] fn get_word_info(&self, py: Python) -> PyResult { - let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?; - PyErr::warn(py, cat, "Users should not touch the raw WordInfo.", 1)?; + let cat = PyModule::import_bound(py, "builtins")?.getattr("DeprecationWarning")?; + PyErr::warn_bound(py, &cat, "Users should not touch the raw WordInfo.", 1)?; Ok(self.morph(py).get_word_info().clone().into()) } @@ -445,7 +445,7 @@ impl PyMorpheme { m.end_c() - m.begin_c() } - pub fn __str__<'py>(&'py self, py: Python<'py>) -> &'py PyString { + pub fn __str__<'py>(&'py self, py: Python<'py>) -> Bound<'py, PyString> { self.surface(py) } diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs index 062d0d0c..f0753f4b 100644 --- a/python/src/pos_matcher.rs +++ b/python/src/pos_matcher.rs @@ -36,20 +36,20 @@ impl PyPosMatcher { pub(crate) fn create<'py>( py: Python<'py>, dic: &'py Arc, - data: &'py PyAny, + data: &Bound<'py, PyAny>, ) -> PyResult { if data.is_callable() { Self::create_from_fn(dic, data, py) } else { let iter = data.iter()?; - Self::create_from_iter(dic, iter) + Self::create_from_iter(dic, &iter) } } - fn create_from_fn(dic: &Arc, func: &PyAny, py: Python) -> PyResult { + fn create_from_fn(dic: &Arc, func: &Bound, py: Python) -> PyResult { let mut data = Vec::new(); for (pos_id, pos) in dic.pos.iter().enumerate() { - let args = PyTuple::new(py, &[pos]); + let args = PyTuple::new_bound(py, &[pos]); if func.call1(args)?.downcast::()?.is_true() { data.push(pos_id as u16); } @@ -60,10 +60,11 @@ impl PyPosMatcher { }) } - fn create_from_iter(dic: &Arc, data: &PyIterator) -> PyResult { + fn create_from_iter(dic: &Arc, data: &Bound) -> PyResult { let mut result = Vec::new(); for item in data { - let item = item?.downcast::()?; + let item = item?; + let item = item.downcast::()?; Self::match_pos_elements(&mut result, dic.as_ref(), item)?; } Ok(Self { @@ -72,7 +73,11 @@ impl PyPosMatcher { }) } - fn match_pos_elements(data: &mut Vec, dic: &PyDicData, elem: &PyTuple) -> PyResult<()> { + fn match_pos_elements( + data: &mut Vec, + dic: &PyDicData, + elem: &Bound, + ) -> PyResult<()> { let start_len = data.len(); let elen = elem.len(); @@ -214,7 +219,7 @@ impl PyPosIter { slf } - fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&'py PyTuple> { + fn __next__<'py>(&'py mut self, py: Python<'py>) -> Option<&Bound<'py, PyTuple>> { let idx = self.index; self.index += 1; if idx >= self.data.len() { @@ -222,6 +227,6 @@ impl PyPosIter { } let pos_id = self.data[idx]; let pos = &self.dic.pos[pos_id as usize]; - Some(pos.as_ref(py)) + Some(pos.bind(py)) } } diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs index 303f7645..cd15b1b3 100644 --- a/python/src/pretokenizer.rs +++ b/python/src/pretokenizer.rs @@ -126,13 +126,14 @@ impl PyPretokenizer { /// /// Implementation uses Sudachi to perform the analysis, then uses slice method /// of the passed parameter to create output data - pub fn __call__<'p>( - &'p self, - py: Python<'p>, - index: &'p PyAny, - string: &'p PyAny, - ) -> PyResult<&'p PyAny> { - let input_data = string.str()?.to_str()?; + pub fn __call__<'py>( + &'py self, + py: Python<'py>, + index: &Bound<'py, PyAny>, + string: &Bound<'py, PyAny>, + ) -> PyResult> { + let pystr = string.str()?; + let input_data = pystr.to_str()?; // tokenization itself should work without GIL, we have thread-local tokenizers here py.allow_threads(|| self.tokenizer_cell().borrow_mut().tokenize(input_data))?; // then prepare results with GIL @@ -144,14 +145,14 @@ impl PyPretokenizer { let py_ref = morphs.borrow(py); let morphs = py_ref.internal(py); match self.projection.as_deref() { - None => make_result_for_surface(py, morphs, string), - Some(p) => make_result_for_projection(py, morphs, p), + None => make_result_for_surface(py, morphs, string).map(|bl| bl.into_any()), + Some(p) => make_result_for_projection(py, morphs, p).map(|bl| bl.into_any()), } } Some(h) => { - let mrp: &PyAny = morphs.as_ref(py); - let args = PyTuple::new(py, &[index, string, mrp]); - h.as_ref(py).call1(args) + let mrp: &Bound = morphs.bind(py); + let args = PyTuple::new_bound(py, &[index, string, mrp]); + h.bind(py).call1(args) } } } @@ -160,22 +161,22 @@ impl PyPretokenizer { pub fn pre_tokenize<'py>( self_: Bound<'py, Self>, py: Python<'py>, - data: &'py PyAny, - ) -> PyResult<&'py PyAny> { - data.call_method1("split", PyTuple::new(py, [self_])) + data: &Bound<'py, PyAny>, + ) -> PyResult> { + data.call_method1("split", PyTuple::new_bound(py, [self_])) } } fn make_result_for_surface<'py>( py: Python<'py>, morphs: &PyMorphemeList, - string: &'py PyAny, -) -> PyResult<&'py PyAny> { - let result = PyList::empty(py); + string: &Bound<'py, PyAny>, +) -> PyResult> { + let result = PyList::empty_bound(py); for idx in 0..morphs.len() { let node = morphs.get(idx); - let slice = PySlice::new(py, node.begin_c() as isize, node.end_c() as isize, 1); - let args = PyTuple::new(py, [slice]); + let slice = PySlice::new_bound(py, node.begin_c() as isize, node.end_c() as isize, 1); + let args = PyTuple::new_bound(py, [slice]); let substring = string.call_method1(intern!(py, "slice"), args)?; result.append(substring)?; } @@ -186,20 +187,20 @@ fn make_result_for_projection<'py>( py: Python<'py>, morphs: &PyMorphemeList, proj: &dyn MorphemeProjection, -) -> PyResult<&'py PyAny> { - let result = PyList::empty(py); +) -> PyResult> { + let result = PyList::empty_bound(py); let nstring = { static NORMALIZED_STRING: GILOnceCell> = pyo3::sync::GILOnceCell::new(); NORMALIZED_STRING.get_or_try_init(py, || -> PyResult> { - let ns = py.import("tokenizers")?.getattr("NormalizedString")?; - let tpe = ns.downcast::(); - tpe.map(|x| x.into_py(py)).map_err(|e| e.into()) + let ns = py.import_bound("tokenizers")?.getattr("NormalizedString")?; + let tpe = ns.downcast::()?; + Ok(tpe.clone().unbind()) })? }; for idx in 0..morphs.len() { let node = morphs.get(idx); let value = proj.project(&node, py); - let args = PyTuple::new(py, [value]); + let args = PyTuple::new_bound(py, [value]); let substring = nstring.call1(py, args)?; result.append(substring)?; } diff --git a/python/src/projection.rs b/python/src/projection.rs index 8bea35be..8c7dd142 100644 --- a/python/src/projection.rs +++ b/python/src/projection.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Works Applications Co., Ltd. + * Copyright (c) 2023-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ use crate::dictionary::PyDicData; use crate::morpheme::PyProjector; +use pyo3::prelude::*; use pyo3::types::PyString; use pyo3::{PyResult, Python}; use std::convert::TryFrom; @@ -27,14 +28,14 @@ use sudachi::pos::PosMatcher; use sudachi::prelude::Morpheme; pub(crate) trait MorphemeProjection { - fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> &'py PyString; + fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString>; } struct Surface {} impl MorphemeProjection for Surface { - fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> &'py PyString { - PyString::new(py, m.surface().deref()) + fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { + PyString::new_bound(py, m.surface().deref()) } } @@ -43,8 +44,8 @@ struct Mapped Fn(&'a Morpheme<'a, Arc>) -> &'a str> { } impl Fn(&'a Morpheme<'a, Arc>) -> &'a str> MorphemeProjection for Mapped { - fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> &'py PyString { - PyString::new(py, (self.func)(m)) + fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { + PyString::new_bound(py, (self.func)(m)) } } @@ -60,11 +61,11 @@ impl DictionaryAndSurface { } impl MorphemeProjection for DictionaryAndSurface { - fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> &'py PyString { + fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { if self.matcher.matches_id(m.part_of_speech_id()) { - PyString::new(py, m.surface().deref()) + PyString::new_bound(py, m.surface().deref()) } else { - PyString::new(py, m.dictionary_form()) + PyString::new_bound(py, m.dictionary_form()) } } } @@ -81,11 +82,11 @@ impl NormalizedAndSurface { } impl MorphemeProjection for NormalizedAndSurface { - fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> &'py PyString { + fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { if self.matcher.matches_id(m.part_of_speech_id()) { - PyString::new(py, m.surface().deref()) + PyString::new_bound(py, m.surface().deref()) } else { - PyString::new(py, m.normalized_form()) + PyString::new_bound(py, m.normalized_form()) } } } @@ -102,11 +103,11 @@ impl NormalizedNouns { } impl MorphemeProjection for NormalizedNouns { - fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> &'py PyString { + fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> Bound<'py, PyString> { if self.matcher.matches_id(m.part_of_speech_id()) { - PyString::new(py, m.normalized_form()) + PyString::new_bound(py, m.normalized_form()) } else { - PyString::new(py, m.surface().deref()) + PyString::new_bound(py, m.surface().deref()) } } } @@ -164,7 +165,7 @@ pub(crate) fn resolve_projection(base: PyProjector, fallback: &PyProjector) -> P } pub(crate) fn parse_projection( - value: &PyString, + value: &Bound, dict: &D, ) -> PyResult<(PyProjector, SurfaceProjection)> { value.to_str().and_then(|s| parse_projection_raw(s, dict)) @@ -189,7 +190,7 @@ pub(crate) fn parse_projection_raw( } pub(crate) fn parse_projection_opt( - value: Option<&PyString>, + value: Option<&Bound>, dict: &D, ) -> PyResult<(PyProjector, SurfaceProjection)> { match value { diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs index 5f364380..cc8142e7 100644 --- a/python/src/tokenizer.rs +++ b/python/src/tokenizer.rs @@ -136,7 +136,7 @@ impl PyTokenizer { &'py mut self, py: Python<'py>, text: &'py str, - mode: Option<&PyAny>, + mode: Option<&Bound<'py, PyAny>>, logger: Option, out: Option>, ) -> PyResult> { From 73c8cd94e533a932a8ed94a08f523d915bdeabf5 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Mon, 3 Jun 2024 14:47:35 +0900 Subject: [PATCH 4/8] update dependencies --- Cargo.lock | 338 ++++++++++++++++------------------------- python/Cargo.toml | 2 +- sudachi-cli/Cargo.toml | 2 +- sudachi/Cargo.toml | 10 +- 4 files changed, 136 insertions(+), 216 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e9ad71bc..73ca27fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,47 +19,48 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.13" +version = "0.6.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", + "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" [[package]] name = "anstyle-parse" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.2" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" dependencies = [ "windows-sys", ] [[package]] name = "anstyle-wincon" -version = "3.0.2" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" dependencies = [ "anstyle", "windows-sys", @@ -92,12 +93,6 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.5.0" @@ -106,9 +101,9 @@ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "bumpalo" -version = "3.15.4" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "cast" @@ -200,9 +195,9 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "colorchoice" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" [[package]] name = "criterion" @@ -261,9 +256,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.19" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crunchy" @@ -301,9 +296,9 @@ dependencies = [ [[package]] name = "either" -version = "1.10.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "equivalent" @@ -313,9 +308,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", "windows-sys", @@ -334,15 +329,15 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "half" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", @@ -350,9 +345,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "heck" @@ -374,13 +369,13 @@ checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "honggfuzz" -version = "0.5.55" +version = "0.5.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "848e9c511092e0daa0a35a63e8e6e475a3e8f870741448b9f6028d69b142f18e" +checksum = "7c76b6234c13c9ea73946d1379d33186151148e0da231506b964b44f3d023505" dependencies = [ "arbitrary", "lazy_static", - "memmap2 0.5.10", + "memmap2", "rustc_version", ] @@ -411,6 +406,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + [[package]] name = "itertools" version = "0.10.5" @@ -422,9 +423,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -466,9 +467,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libloading" @@ -477,14 +478,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.52.4", + "windows-targets", ] [[package]] name = "linux-raw-sys" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "lock_api" @@ -503,18 +504,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "memchr" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" - -[[package]] -name = "memmap2" -version = "0.5.10" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc", -] +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "memmap2" @@ -552,9 +544,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] @@ -573,9 +565,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", "parking_lot_core", @@ -583,22 +575,22 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.48.5", + "windows-targets", ] [[package]] name = "plotters" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" dependencies = [ "num-traits", "plotters-backend", @@ -609,15 +601,15 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" +checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" [[package]] name = "plotters-svg" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" dependencies = [ "plotters-backend", ] @@ -630,18 +622,18 @@ checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" [[package]] name = "proc-macro2" -version = "1.0.79" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" +checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" dependencies = [ "cfg-if", "indoc", @@ -657,9 +649,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" dependencies = [ "once_cell", "target-lexicon", @@ -667,9 +659,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b42531d03e08d4ef1f6e85a2ed422eb678b8cd62b762e53891c05faf0d4afa" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" dependencies = [ "libc", "pyo3-build-config", @@ -677,9 +669,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7305c720fa01b8055ec95e484a6eca7a83c841267f0dd5280f0c8b8551d2c158" +checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -689,9 +681,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.20.3" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c7e9b68bb9c3149c5b0cade5d07f953d6d125eb4337723c4ccdb665f1f96185" +checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" dependencies = [ "heck 0.4.1", "proc-macro2", @@ -702,9 +694,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -731,11 +723,11 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.4.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" dependencies = [ - "bitflags 1.3.2", + "bitflags", ] [[package]] @@ -778,11 +770,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.32" +version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ - "bitflags 2.5.0", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -791,9 +783,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "same-file" @@ -812,24 +804,24 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.197" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", @@ -838,9 +830,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.115" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ "itoa", "ryu", @@ -871,18 +863,18 @@ name = "sudachi" version = "0.6.9-a1" dependencies = [ "aho-corasick", - "bitflags 2.5.0", + "bitflags", "claim", "csv", "default_input_text", "fancy-regex", "indexmap", - "itertools 0.12.1", + "itertools 0.13.0", "join_katakana_oov", "join_numeric", "lazy_static", "libloading", - "memmap2 0.9.4", + "memmap2", "nom", "regex", "serde", @@ -900,7 +892,7 @@ version = "0.6.9-a1" dependencies = [ "cfg-if", "clap", - "memmap2 0.9.4", + "memmap2", "sudachi", ] @@ -926,9 +918,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.55" +version = "2.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" dependencies = [ "proc-macro2", "quote", @@ -955,18 +947,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", @@ -1109,159 +1101,87 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-targets", ] [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" -version = "0.48.5" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" [[package]] -name = "windows_i686_gnu" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" +name = "windows_i686_gnullvm" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" [[package]] name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "yada" diff --git a/python/Cargo.toml b/python/Cargo.toml index 6e564c2e..53cd97e5 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -16,8 +16,8 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.21", features = ["extension-module"] } -thread_local = "1.1" # Apache 2.0/MIT scopeguard = "1" # Apache 2.0/MIT +thread_local = "1.1" # Apache 2.0/MIT [dependencies.sudachi] path = "../sudachi" diff --git a/sudachi-cli/Cargo.toml b/sudachi-cli/Cargo.toml index c5070424..14aeebb5 100644 --- a/sudachi-cli/Cargo.toml +++ b/sudachi-cli/Cargo.toml @@ -14,8 +14,8 @@ license.workspace = true sudachi = { path = "../sudachi" } cfg-if = "1.0.0" # MIT/Apache 2.0 -memmap2 = "0.9" # MIT/Apache 2.0 clap = { version = "4.5", features = ["derive"] } # MIT/Apache 2.0 +memmap2 = "0.9" # MIT/Apache 2.0 [[bin]] name = "sudachi" diff --git a/sudachi/Cargo.toml b/sudachi/Cargo.toml index 76b4cfe4..76e5f72c 100644 --- a/sudachi/Cargo.toml +++ b/sudachi/Cargo.toml @@ -12,15 +12,15 @@ license.workspace = true [dependencies] # this should be sorted aho-corasick = "1" # MIT/Apache 2.0 -bitflags = "2.0" # MIT/Apache 2.0 -csv = "1.1" # Unilicense/MIT +bitflags = "2.5" # MIT/Apache 2.0 +csv = "1.3" # Unilicense/MIT fancy-regex = "0.13" # MIT -indexmap = "2.0" # MIT/Apache 2.0 -itertools = "0.12" # MIT/Apachie 2.0 +indexmap = "2.2" # MIT/Apache 2.0 +itertools = "0.13" # MIT/Apachie 2.0 lazy_static = "1.4" # MIT/Apache 2.0 libloading = "0.8" # ISC (MIT-compatible) -nom = "7" # MIT memmap2 = "0.9" # MIT/Apache 2.0 +nom = "7" # MIT regex = "1" # MIT/Apache 2.0 serde = { version = "1.0", features = ["derive"] } # MIT/Apache 2.0 serde_json = "1.0" # MIT/Apache 2.0 From 4345772882bd4a87511ac136e8906b38c77581c1 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 7 Jun 2024 09:09:16 +0900 Subject: [PATCH 5/8] use pyo3::intern macro inside pretokenizer --- python/src/dictionary.rs | 10 +++++++--- python/src/pretokenizer.rs | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs index 251267ab..e9cbf1ed 100644 --- a/python/src/dictionary.rs +++ b/python/src/dictionary.rs @@ -461,20 +461,24 @@ fn read_config(config_opt: &Bound) -> PyResult { } pub(crate) fn read_default_config(py: Python) -> PyResult { - let path = PyModule::import_bound(py, "sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?; + let path = py + .import_bound("sudachipy")? + .getattr("_DEFAULT_SETTINGFILE")?; let path = path.downcast::()?.to_str()?; let path = PathBuf::from(path); wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path) } pub(crate) fn get_default_resource_dir(py: Python) -> PyResult { - let path = PyModule::import_bound(py, "sudachipy")?.getattr("_DEFAULT_RESOURCEDIR")?; + let path = py + .import_bound("sudachipy")? + .getattr("_DEFAULT_RESOURCEDIR")?; let path = path.downcast::()?.to_str()?; Ok(PathBuf::from(path)) } fn find_dict_path(py: Python, dict_type: &str) -> PyResult { - let pyfunc = PyModule::import_bound(py, "sudachipy")?.getattr("_find_dict_path")?; + let pyfunc = py.import_bound("sudachipy")?.getattr("_find_dict_path")?; let path = pyfunc.call1((dict_type,))?; let path = path.downcast::()?.to_str()?; Ok(PathBuf::from(path)) diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs index cd15b1b3..20e5cf65 100644 --- a/python/src/pretokenizer.rs +++ b/python/src/pretokenizer.rs @@ -163,7 +163,7 @@ impl PyPretokenizer { py: Python<'py>, data: &Bound<'py, PyAny>, ) -> PyResult> { - data.call_method1("split", PyTuple::new_bound(py, [self_])) + data.call_method1(intern!(py, "split"), PyTuple::new_bound(py, [self_])) } } @@ -190,7 +190,7 @@ fn make_result_for_projection<'py>( ) -> PyResult> { let result = PyList::empty_bound(py); let nstring = { - static NORMALIZED_STRING: GILOnceCell> = pyo3::sync::GILOnceCell::new(); + static NORMALIZED_STRING: GILOnceCell> = GILOnceCell::new(); NORMALIZED_STRING.get_or_try_init(py, || -> PyResult> { let ns = py.import_bound("tokenizers")?.getattr("NormalizedString")?; let tpe = ns.downcast::()?; From 3fd3e52e268b6906fd9eda2e6ad8c84abc44979e Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Mon, 8 Jul 2024 11:04:18 +0900 Subject: [PATCH 6/8] use get_all for wordinfo --- python/src/word_info.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/python/src/word_info.rs b/python/src/word_info.rs index 4f74d0f1..eb51a28d 100644 --- a/python/src/word_info.rs +++ b/python/src/word_info.rs @@ -18,29 +18,18 @@ use pyo3::prelude::*; use sudachi::dic::lexicon::word_infos::{WordInfo, WordInfoData}; -#[pyclass(module = "sudachipy.wordinfo", name = "WordInfo")] +#[pyclass(module = "sudachipy.wordinfo", name = "WordInfo", get_all)] pub struct PyWordInfo { - #[pyo3(get)] surface: String, - #[pyo3(get)] head_word_length: u16, - #[pyo3(get)] pos_id: u16, - #[pyo3(get)] normalized_form: String, - #[pyo3(get)] dictionary_form_word_id: i32, - #[pyo3(get)] dictionary_form: String, - #[pyo3(get)] reading_form: String, - #[pyo3(get)] a_unit_split: Vec, - #[pyo3(get)] b_unit_split: Vec, - #[pyo3(get)] word_structure: Vec, - #[pyo3(get)] synonym_group_ids: Vec, } From 75cda40da26e0917c55a2f3a9e78421b9b6f9399 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Thu, 7 Nov 2024 17:24:02 +0900 Subject: [PATCH 7/8] add note to the help of pycli -d option and warn on its use --- python/py_src/sudachipy/command_line.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/python/py_src/sudachipy/command_line.py b/python/py_src/sudachipy/command_line.py index 07f59c19..e5cd87d1 100644 --- a/python/py_src/sudachipy/command_line.py +++ b/python/py_src/sudachipy/command_line.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Works Applications Co., Ltd. +# Copyright (c) 2019-2024 Works Applications Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,13 @@ from . import sudachipy +logging.basicConfig( + style="{", + format='{levelname} {asctime} [{module}:{funcName}:{lineno}] {message}', + datefmt="%m-%d-%Y %H:%M:%S", +) + + def _set_default_subparser(self, name, args=None): """ copy and modify code from https://bitbucket.org/ruamel/std.argparse @@ -97,14 +104,13 @@ def _command_tokenize(args, print_usage): if args.fpath_out: output = open(args.fpath_out, "w", encoding="utf-8") - stdout_logger = logging.getLogger(__name__) - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(logging.DEBUG) - stdout_logger.addHandler(handler) - stdout_logger.setLevel(logging.DEBUG) - stdout_logger.propagate = False + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) print_all = args.a + debug = args.d + if debug: + logger.warning("-d option is not implemented in python.") try: dict_ = Dictionary(config_path=args.fpath_setting, @@ -217,7 +223,7 @@ def main(): parser_tk.add_argument("-a", action="store_true", help="print all of the fields") parser_tk.add_argument("-d", action="store_true", - help="print the debug information") + help="print the debug information (not implemented yet)") parser_tk.add_argument("-v", "--version", action="store_true", dest="version", help="print sudachipy version") parser_tk.add_argument("in_files", metavar="file", From 1cad6c95f463e83c6a774e145ff8c55b93ad580c Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Thu, 7 Nov 2024 17:24:21 +0900 Subject: [PATCH 8/8] rename pos_list and fmt --- python/py_src/sudachipy/command_line.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/python/py_src/sudachipy/command_line.py b/python/py_src/sudachipy/command_line.py index e5cd87d1..e7574bf1 100644 --- a/python/py_src/sudachipy/command_line.py +++ b/python/py_src/sudachipy/command_line.py @@ -58,7 +58,7 @@ def _set_default_subparser(self, name, args=None): argparse.ArgumentParser.set_default_subparser = _set_default_subparser -def run(tokenizer, input_, output, print_all, morphs, is_stdout): +def run(tokenizer, input_, output, print_all, pos_list, is_stdout): # get an empty MorphemeList for memory reuse mlist = tokenizer.tokenize("") for line in input_: @@ -67,7 +67,7 @@ def run(tokenizer, input_, output, print_all, morphs, is_stdout): for m in tokenizer.tokenize(line, out=mlist): list_info = [ m.surface(), - morphs[m.part_of_speech_id()], + pos_list[m.part_of_speech_id()], m.normalized_form()] if print_all: list_info += [ @@ -116,14 +116,15 @@ def _command_tokenize(args, print_usage): dict_ = Dictionary(config_path=args.fpath_setting, dict_type=args.system_dict_type) # empty matcher - get all POS tags - all_morphs = dict_.pos_matcher([()]) + all_pos_matcher = dict_.pos_matcher([()]) # precompute output POS strings - morphs = [",".join(ms) for ms in all_morphs] + pos_list = [",".join(ms) for ms in all_pos_matcher] tokenizer_obj = dict_.create(mode=args.mode) input_ = fileinput.input( args.in_files, openhook=fileinput.hook_encoded("utf-8")) - run(tokenizer_obj, input_, output, print_all, morphs, is_stdout=args.fpath_out is None) + run(tokenizer_obj, input_, output, print_all, + pos_list, is_stdout=args.fpath_out is None) finally: if args.fpath_out: output.close() @@ -145,7 +146,8 @@ def _command_build(args, print_usage): out_file = Path(args.out_file) if out_file.exists(): - print("File", out_file, "already exists, refusing to overwrite it", file=sys.stderr) + print("File", out_file, + "already exists, refusing to overwrite it", file=sys.stderr) return description = args.description or "" @@ -167,7 +169,8 @@ def _command_build(args, print_usage): def _command_user_build(args, print_usage): system = Path(args.system_dic) if not system.exists(): - print("System dictionary file", system, "does not exist", file=sys.stderr) + print("System dictionary file", system, + "does not exist", file=sys.stderr) return print_usage() in_files = [] @@ -180,7 +183,8 @@ def _command_user_build(args, print_usage): out_file = Path(args.out_file) if out_file.exists(): - print("File", out_file, "already exists, refusing to overwrite it", file=sys.stderr) + print("File", out_file, + "already exists, refusing to overwrite it", file=sys.stderr) return description = args.description or ""