From c943da8d452b28ff291f4671d1cab50b0078d3e9 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Mon, 10 Jun 2024 09:41:05 +0900 Subject: [PATCH 1/2] use crate::errors to send err --- python/src/build.rs | 4 +-- python/src/dictionary.rs | 67 +++++++++++++++++--------------------- python/src/errors.rs | 8 ++++- python/src/morpheme.rs | 31 +++++++----------- python/src/pos_matcher.rs | 4 +-- python/src/pretokenizer.rs | 4 +-- python/src/projection.rs | 18 ++++------ python/src/tokenizer.rs | 30 ++++++++--------- 8 files changed, 74 insertions(+), 92 deletions(-) diff --git a/python/src/build.rs b/python/src/build.rs index a6005b26..6b3bd0ca 100644 --- a/python/src/build.rs +++ b/python/src/build.rs @@ -142,8 +142,8 @@ fn as_data_source<'p>(py: Python<'p>, data: &'p PyAny) -> PyResult()?; Ok(DataSource::Data(data.as_bytes())) } else { - Err(pyo3::exceptions::PyValueError::new_err(format!( - "data source should can be only Path, bytes or str, was {}: {}", + errors::wrap(Err(format!( + "data source should be Path, bytes or str, was {}: {}", data, data.get_type() ))) diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs index bc333c8e..802e23c2 100644 --- a/python/src/dictionary.rs +++ b/python/src/dictionary.rs @@ -24,7 +24,6 @@ use std::str::FromStr; use std::sync::Arc; use sudachi::analysis::Mode; -use crate::errors::{wrap, wrap_ctx, SudachiError as SudachiErr}; use sudachi::analysis::stateless_tokenizer::DictionaryAccess; use sudachi::config::{Config, ConfigBuilder, SurfaceProjection}; use sudachi::dic::dictionary::JapaneseDictionary; @@ -35,6 +34,7 @@ use sudachi::plugin::input_text::InputTextPlugin; use sudachi::plugin::oov::OovProviderPlugin; use sudachi::plugin::path_rewrite::PathRewritePlugin; +use crate::errors; use crate::morpheme::{PyMorphemeListWrapper, PyProjector}; use crate::pos_matcher::PyPosMatcher; use crate::pretokenizer::PyPretokenizer; @@ -110,7 +110,7 @@ impl PyDictionary { config: Option<&PyAny>, ) -> PyResult { if config.is_some() && config_path.is_some() { - return Err(SudachiErr::new_err("Both config and config_path options were specified at the same time, use one of them")); + return errors::wrap(Err("Both config and config_path options were specified at the same time, use one of them")); } let default_config = read_default_config(py)?; @@ -131,13 +131,10 @@ impl PyDictionary { }; if dict_type.is_some() { - let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?; - PyErr::warn( + errors::warn_deprecation( py, - cat, "Parameter dict_type of Dictionary() is deprecated, use dict instead", - 1, - )?; + )? } let config_builder = match resource_dir { @@ -177,12 +174,10 @@ impl PyDictionary { } } - let jdic = JapaneseDictionary::from_cfg(&config).map_err(|e| { - SudachiErr::new_err(format!( - "Error while constructing dictionary: {}", - e.to_string() - )) - })?; + let jdic = errors::wrap_ctx( + JapaneseDictionary::from_cfg(&config), + "Error while constructing dictionary", + )?; let pos_data = jdic .grammar() @@ -238,7 +233,7 @@ impl PyDictionary { let mut required_fields = self.config.projection.required_subset(); let dict = self.dictionary.as_ref().unwrap().clone(); let projobj = if let Some(s) = projection { - let proj = wrap(SurfaceProjection::try_from(s.to_str()?))?; + let proj = errors::wrap(SurfaceProjection::try_from(s.to_str()?))?; required_fields = proj.required_subset(); Some(morpheme_projection(proj, &dict)) } else { @@ -301,7 +296,7 @@ impl PyDictionary { let subset = parse_field_subset(fields)?; if let Some(h) = handler.as_ref() { if !h.as_ref(py).is_callable() { - return Err(SudachiErr::new_err("handler must be callable")); + return errors::wrap(Err("handler must be callable")); } } @@ -357,12 +352,12 @@ impl PyDictionary { // this needs to be a variable let mut borrow = l.try_borrow_mut(); let out_list = match borrow { - Err(_) => return Err(SudachiErr::new_err("out was used twice at the same time")), Ok(ref mut ms) => ms.internal_mut(py), + Err(_) => return errors::wrap(Err("out was used twice at the same time")), }; out_list.clear(); - wrap_ctx(out_list.lookup(surface, InfoSubset::all()), surface)?; + errors::wrap_ctx(out_list.lookup(surface, InfoSubset::all()), surface)?; Ok(l) } @@ -380,7 +375,7 @@ impl PyDictionary { } fn __repr__(&self) -> PyResult { - wrap(config_repr(&self.config)) + errors::wrap(config_repr(&self.config)) } } @@ -413,18 +408,21 @@ fn config_repr(cfg: &Config) -> Result { pub(crate) fn extract_mode<'py>(py: Python<'py>, mode: &'py PyAny) -> PyResult { if mode.is_instance_of::() { - let mode = mode.str()?.to_str()?; - Mode::from_str(mode).map_err(|e| SudachiErr::new_err(e).into()) + errors::wrap(Mode::from_str(mode.str()?.to_str()?)) } else if mode.is_instance_of::() { let mode = mode.extract::()?; Ok(Mode::from(mode)) } else { - Err(SudachiErr::new_err(("unknown mode", mode.into_py(py)))) + errors::wrap(Err(format!( + "mode should be sudachipy.SplitMode or str, was {}: {}", + mode, + mode.get_type() + ))) } } fn read_config_from_fs(path: Option<&Path>) -> PyResult { - wrap(ConfigBuilder::from_opt_file(path)) + errors::wrap(ConfigBuilder::from_opt_file(path)) } fn read_config(config_opt: &PyAny) -> PyResult { @@ -433,13 +431,13 @@ fn read_config(config_opt: &PyAny) -> PyResult { // looks like json if config_str.starts_with("{") && config_str.ends_with("}") { let result = ConfigBuilder::from_bytes(config_str.as_bytes()); - return wrap(result); + return errors::wrap(result); } let p = Path::new(config_str); if p.exists() && p.is_file() { return read_config_from_fs(Some(p)); } - return Err(SudachiErr::new_err(format!( + return errors::wrap(Err(format!( "config file [{}] do not exist or is not a file", p.display() ))); @@ -450,9 +448,10 @@ fn read_config(config_opt: &PyAny) -> PyResult { let cfg_as_str = config_opt.call_method0("as_jsons")?; return read_config(cfg_as_str); } - Err(SudachiErr::new_err(( - format!("passed config was not a string, json object or sudachipy.config.Config object"), - config_opt.into_py(py), + errors::wrap(Err(format!( + "config should be sudachipy.Config or str which represents a file path or json obj, was {}: {}", + config_opt, + config_opt.get_type() ))) } @@ -460,7 +459,7 @@ pub(crate) fn read_default_config(py: Python) -> PyResult { let path = PyModule::import(py, "sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?; let path = path.downcast::()?.to_str()?; let path = PathBuf::from(path); - wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path) + errors::wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path) } pub(crate) fn get_default_resource_dir(py: Python) -> PyResult { @@ -484,10 +483,7 @@ fn locate_system_dict(py: Python, path: &Path) -> PyResult { } match path.to_str() { Some(name @ ("small" | "core" | "full")) => find_dict_path(py, name), - _ => Err(SudachiErr::new_err(format!( - "invalid dictionary path {:?}", - path - ))), + _ => errors::wrap(Err(format!("invalid dictionary path {:?}", path))), } } @@ -509,12 +505,7 @@ fn parse_field_subset(data: Option<&PySet>) -> PyResult { "split_a" => InfoSubset::SPLIT_A, "split_b" => InfoSubset::SPLIT_B, "synonym_group_id" => InfoSubset::SYNONYM_GROUP_ID, - x => { - return Err(SudachiErr::new_err(format!( - "Invalid WordInfo field name {}", - x - ))) - } + x => return errors::wrap(Err(format!("Invalid WordInfo field name {}", x))), }; } Ok(subset) diff --git a/python/src/errors.rs b/python/src/errors.rs index 04827fd4..da72601a 100644 --- a/python/src/errors.rs +++ b/python/src/errors.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,8 @@ * limitations under the License. */ +use pyo3::exceptions::PyDeprecationWarning; +use pyo3::prelude::*; use pyo3::{import_exception, PyResult}; use std::fmt::{Debug, Display}; @@ -33,3 +35,7 @@ pub fn wrap_ctx(v: Result, ctx: &C) -> P Err(e) => Err(SudachiError::new_err(format!("{:?}: {}", ctx, e))), } } + +pub fn warn_deprecation(py: Python<'_>, msg: &str) -> PyResult<()> { + PyErr::warn(py, &py.get_type::(), msg, 1) +} diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs index ad3929dd..fd097336 100644 --- a/python/src/morpheme.rs +++ b/python/src/morpheme.rs @@ -18,13 +18,14 @@ use std::fmt::Write; use std::ops::Deref; use std::sync::Arc; -use pyo3::exceptions::{PyException, PyIndexError}; +use pyo3::exceptions::PyIndexError; use pyo3::prelude::*; use pyo3::types::{PyList, PyString, PyTuple, PyType}; use sudachi::prelude::{Morpheme, MorphemeList}; use crate::dictionary::{extract_mode, PyDicData, PyDictionary}; +use crate::errors; use crate::projection::MorphemeProjection; use crate::word_info::PyWordInfo; @@ -92,12 +93,9 @@ impl PyMorphemeListWrapper { #[classmethod] #[pyo3(text_signature = "(dict: sudachipy.Dictionary) -> sudachipy.MorphemeList")] fn empty(_cls: &PyType, py: Python, dict: &PyDictionary) -> PyResult { - let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?; - PyErr::warn( + errors::warn_deprecation( py, - cat, "Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList.", - 1, )?; let cloned = dict.dictionary.as_ref().unwrap().clone(); @@ -176,9 +174,7 @@ impl PyMorphemeListWrapper { list: slf.clone_ref(py), index: i, }; - pymorph - .write_repr(py, &mut result) - .map_err(|_| PyException::new_err("format failed"))?; + errors::wrap_ctx(pymorph.write_repr(py, &mut result), "format failed")?; result.push_str(",\n"); } result.push_str("]>"); @@ -380,16 +376,14 @@ impl PyMorpheme { let mut borrow = out_cell.try_borrow_mut(); let out_ref = match borrow { Ok(ref mut v) => v.internal_mut(py), - Err(_) => return Err(PyException::new_err("out was used twice")), + Err(_) => return errors::wrap(Err("out was used twice at the same time")), }; out_ref.clear(); - let splitted = list - .internal(py) - .split_into(mode, self.index, out_ref) - .map_err(|e| { - PyException::new_err(format!("Error while splitting morpheme: {}", e.to_string())) - })?; + let splitted = errors::wrap_ctx( + list.internal(py).split_into(mode, self.index, out_ref), + "Error while splitting morpheme", + )?; if add_single.unwrap_or(true) && !splitted { list.internal(py) @@ -433,9 +427,7 @@ impl PyMorpheme { /// Returns the word info #[pyo3(text_signature = "($self) -> sudachipy.WordInfo")] fn get_word_info(&self, py: Python) -> PyResult { - let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?; - PyErr::warn(py, cat, "Users should not touch the raw WordInfo.", 1)?; - + errors::warn_deprecation(py, "Users should not touch the raw WordInfo.")?; Ok(self.morph(py).get_word_info().clone().into()) } @@ -451,8 +443,7 @@ impl PyMorpheme { pub fn __repr__<'py>(&'py self, py: Python<'py>) -> PyResult { let mut result = String::new(); - self.write_repr(py, &mut result) - .map_err(|_| PyException::new_err("failed to format repr"))?; + errors::wrap_ctx(self.write_repr(py, &mut result), "failed to format repr")?; Ok(result) } } diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs index 7c6a884d..f0a53b64 100644 --- a/python/src/pos_matcher.rs +++ b/python/src/pos_matcher.rs @@ -16,7 +16,6 @@ use std::sync::Arc; -use pyo3::exceptions::PyException; use pyo3::prelude::*; use pyo3::types::{PyBool, PyIterator, PyTuple}; @@ -24,6 +23,7 @@ use sudachi::analysis::stateless_tokenizer::DictionaryAccess; use sudachi::pos::PosMatcher; use crate::dictionary::PyDicData; +use crate::errors; use crate::morpheme::PyMorpheme; #[pyclass(name = "PosMatcher", module = "sudachipy")] @@ -106,7 +106,7 @@ impl PyPosMatcher { } if start_len == data.len() { - Err(PyException::new_err(format!( + errors::wrap(Err(format!( "POS {:?} did not match any elements", elem.repr()? ))) diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs index 755f040b..49cf1a29 100644 --- a/python/src/pretokenizer.rs +++ b/python/src/pretokenizer.rs @@ -15,7 +15,7 @@ */ use crate::dictionary::PyDicData; -use crate::errors::wrap; +use crate::errors; use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector}; use pyo3::intern; use pyo3::prelude::*; @@ -49,7 +49,7 @@ impl PerThreadPreTokenizer { pub fn tokenize(&mut self, data: &str) -> PyResult<()> { self.tokenizer.reset().push_str(data); - wrap(self.tokenizer.do_tokenize())?; + errors::wrap(self.tokenizer.do_tokenize())?; Ok(()) } diff --git a/python/src/projection.rs b/python/src/projection.rs index 8bea35be..7739c7bc 100644 --- a/python/src/projection.rs +++ b/python/src/projection.rs @@ -15,6 +15,7 @@ */ use crate::dictionary::PyDicData; +use crate::errors; use crate::morpheme::PyProjector; use pyo3::types::PyString; use pyo3::{PyResult, Python}; @@ -174,18 +175,13 @@ pub(crate) fn parse_projection_raw( value: &str, dict: &D, ) -> PyResult<(PyProjector, SurfaceProjection)> { - match SurfaceProjection::try_from(value) { - Ok(v) => { - if v == SurfaceProjection::Surface { - Ok((None, SurfaceProjection::Surface)) - } else { - Ok((Some(morpheme_projection(v, dict)), v)) - } + errors::wrap_ctx(SurfaceProjection::try_from(value).map(|v| { + if v == SurfaceProjection::Surface { + (None, SurfaceProjection::Surface) + } else { + (Some(morpheme_projection(v, dict)), v) } - Err(e) => Err(crate::errors::SudachiError::new_err(format!( - "invalid surface projection: {e:?}" - ))), - } + }), "invalid surface projection") } pub(crate) fn parse_projection_opt( diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs index 558d02cb..18ec0a63 100644 --- a/python/src/tokenizer.rs +++ b/python/src/tokenizer.rs @@ -26,7 +26,7 @@ use sudachi::dic::subset::InfoSubset; use sudachi::prelude::*; use crate::dictionary::{extract_mode, PyDicData}; -use crate::errors::SudachiError as SudachiPyErr; +use crate::errors; use crate::morpheme::{PyMorphemeListWrapper, PyProjector}; /// Unit to split text @@ -74,11 +74,7 @@ impl PySplitMode { Some(m) => m, None => return Ok(PySplitMode::C), }; - - match Mode::from_str(mode) { - Ok(m) => Ok(m.into()), - Err(e) => Err(SudachiPyErr::new_err(e.to_string())), - } + errors::wrap(Mode::from_str(mode).map(|m| m.into())) } } @@ -151,12 +147,13 @@ impl PyTokenizer { }); // analysis can be done without GIL - let err = py.allow_threads(|| { - tokenizer.reset().push_str(text); - tokenizer.do_tokenize() - }); - - err.map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e.to_string())))?; + errors::wrap_ctx( + py.allow_threads(|| { + tokenizer.reset().push_str(text); + tokenizer.do_tokenize() + }), + "Error during tokenization", + )?; let out_list = match out { None => { @@ -172,12 +169,13 @@ impl PyTokenizer { let mut borrow = out_list.try_borrow_mut(); let morphemes = match borrow { Ok(ref mut ms) => ms.internal_mut(py), - Err(e) => return Err(SudachiPyErr::new_err("out was used twice at the same time")), + Err(_) => return errors::wrap(Err("out was used twice at the same time")), }; - morphemes - .collect_results(tokenizer.deref_mut()) - .map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e.to_string())))?; + errors::wrap_ctx( + morphemes.collect_results(tokenizer.deref_mut()), + "Error during tokenization", + )?; Ok(out_list) } From a4a47e21c6b27ffd39cfa2dbebc4d51f85b1c0e3 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 25 Oct 2024 16:20:38 +0900 Subject: [PATCH 2/2] cargo fmt --- python/src/projection.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/python/src/projection.rs b/python/src/projection.rs index 7739c7bc..9140e747 100644 --- a/python/src/projection.rs +++ b/python/src/projection.rs @@ -175,13 +175,16 @@ pub(crate) fn parse_projection_raw( value: &str, dict: &D, ) -> PyResult<(PyProjector, SurfaceProjection)> { - errors::wrap_ctx(SurfaceProjection::try_from(value).map(|v| { - if v == SurfaceProjection::Surface { - (None, SurfaceProjection::Surface) - } else { - (Some(morpheme_projection(v, dict)), v) - } - }), "invalid surface projection") + errors::wrap_ctx( + SurfaceProjection::try_from(value).map(|v| { + if v == SurfaceProjection::Surface { + (None, SurfaceProjection::Surface) + } else { + (Some(morpheme_projection(v, dict)), v) + } + }), + "invalid surface projection", + ) } pub(crate) fn parse_projection_opt(