Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use crate::errors to send error #273

Merged
merged 3 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ fn as_data_source<'py>(
original_obj.downcast::<PyBytes>()?.as_bytes(),
))
} else {
Err(pyo3::exceptions::PyValueError::new_err(format!(
"data source should can be only Path, bytes or str, was {}: {}",
errors::wrap(Err(format!(
"data source should be only Path, bytes or str, was {}: {}",
original_obj,
original_obj.get_type()
)))
Expand Down
73 changes: 32 additions & 41 deletions python/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use std::str::FromStr;
use std::sync::Arc;
use sudachi::analysis::Mode;

use crate::errors::{wrap, wrap_ctx, SudachiError as SudachiErr};
use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::config::{Config, ConfigBuilder, SurfaceProjection};
use sudachi::dic::dictionary::JapaneseDictionary;
Expand All @@ -35,6 +34,7 @@ use sudachi::plugin::input_text::InputTextPlugin;
use sudachi::plugin::oov::OovProviderPlugin;
use sudachi::plugin::path_rewrite::PathRewritePlugin;

use crate::errors;
use crate::morpheme::{PyMorphemeListWrapper, PyProjector};
use crate::pos_matcher::PyPosMatcher;
use crate::pretokenizer::PyPretokenizer;
Expand Down Expand Up @@ -137,7 +137,7 @@ impl PyDictionary {
config: Option<&Bound<PyAny>>,
) -> PyResult<Self> {
if config.is_some() && config_path.is_some() {
return Err(SudachiErr::new_err("Both config and config_path options were specified at the same time, use one of them"));
return errors::wrap(Err("Both config and config_path options were specified at the same time, use one of them"));
}

let default_config = read_default_config(py)?;
Expand All @@ -158,13 +158,10 @@ impl PyDictionary {
};

if dict_type.is_some() {
let cat = PyModule::import_bound(py, "builtins")?.getattr("DeprecationWarning")?;
PyErr::warn_bound(
errors::warn_deprecation(
py,
&cat,
"Parameter dict_type of Dictionary() is deprecated, use dict instead",
1,
)?;
)?
}

let config_builder = match resource_dir {
Expand Down Expand Up @@ -204,12 +201,10 @@ impl PyDictionary {
}
}

let jdic = JapaneseDictionary::from_cfg(&config).map_err(|e| {
SudachiErr::new_err(format!(
"Error while constructing dictionary: {}",
e.to_string()
))
})?;
let jdic = errors::wrap_ctx(
JapaneseDictionary::from_cfg(&config),
"Error while constructing dictionary",
)?;

let pos_data = jdic
.grammar()
Expand Down Expand Up @@ -257,20 +252,19 @@ impl PyDictionary {
)]
fn create<'py>(
&'py self,
py: Python<'py>,
mode: Option<&Bound<'py, PyAny>>,
fields: Option<&Bound<'py, PySet>>,
projection: Option<&Bound<'py, PyString>>,
) -> PyResult<PyTokenizer> {
let mode = match mode {
Some(m) => extract_mode(py, m)?,
Some(m) => extract_mode(m)?,
None => Mode::C,
};
let fields = parse_field_subset(fields)?;
let mut required_fields = self.config.projection.required_subset();
let dict = self.dictionary.as_ref().unwrap().clone();
let projobj = if let Some(s) = projection {
let proj = wrap(SurfaceProjection::try_from(s.to_str()?))?;
let proj = errors::wrap(SurfaceProjection::try_from(s.to_str()?))?;
required_fields = proj.required_subset();
Some(morpheme_projection(proj, &dict))
} else {
Expand Down Expand Up @@ -331,13 +325,13 @@ impl PyDictionary {
projection: Option<&Bound<'py, PyString>>,
) -> PyResult<Bound<'py, PyAny>> {
let mode = match mode {
Some(m) => extract_mode(py, m)?,
Some(m) => extract_mode(m)?,
None => Mode::C,
};
let subset = parse_field_subset(fields)?;
if let Some(h) = handler.as_ref() {
if !h.bind(py).is_callable() {
return Err(SudachiErr::new_err("handler must be callable"));
return errors::wrap(Err("handler must be callable"));
}
}

Expand Down Expand Up @@ -394,12 +388,12 @@ impl PyDictionary {
// this needs to be a variable
let mut borrow = l.try_borrow_mut();
let out_list = match borrow {
Err(_) => return Err(SudachiErr::new_err("out was used twice at the same time")),
Ok(ref mut ms) => ms.internal_mut(py),
Err(_) => return errors::wrap(Err("out was used twice at the same time")),
};

out_list.clear();
wrap_ctx(out_list.lookup(surface, InfoSubset::all()), surface)?;
errors::wrap_ctx(out_list.lookup(surface, InfoSubset::all()), surface)?;
Ok(l)
}

Expand All @@ -422,7 +416,7 @@ impl PyDictionary {
}

fn __repr__(&self) -> PyResult<String> {
wrap(config_repr(&self.config))
errors::wrap(config_repr(&self.config))
}
}

Expand Down Expand Up @@ -453,19 +447,23 @@ fn config_repr(cfg: &Config) -> Result<String, std::fmt::Error> {
Ok(result)
}

pub(crate) fn extract_mode<'py>(py: Python<'py>, mode: &Bound<'py, PyAny>) -> PyResult<Mode> {
pub(crate) fn extract_mode<'py>(mode: &Bound<'py, PyAny>) -> PyResult<Mode> {
if mode.is_instance_of::<PyString>() {
Mode::from_str(mode.str()?.to_str()?).map_err(|e| SudachiErr::new_err(e).into())
errors::wrap(Mode::from_str(mode.str()?.to_str()?))
} else if mode.is_instance_of::<PySplitMode>() {
let mode = mode.extract::<PySplitMode>()?;
Ok(Mode::from(mode))
} else {
Err(SudachiErr::new_err(("unknown mode", mode.into_py(py))))
errors::wrap(Err(format!(
"mode should be sudachipy.SplitMode or str, was {}: {}",
mode,
mode.get_type()
)))
}
}

fn read_config_from_fs(path: Option<&Path>) -> PyResult<ConfigBuilder> {
wrap(ConfigBuilder::from_opt_file(path))
errors::wrap(ConfigBuilder::from_opt_file(path))
}

fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
Expand All @@ -475,13 +473,13 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
// looks like json
if config_str.starts_with("{") && config_str.ends_with("}") {
let result = ConfigBuilder::from_bytes(config_str.as_bytes());
return wrap(result);
return errors::wrap(result);
}
let p = Path::new(config_str);
if p.exists() && p.is_file() {
return read_config_from_fs(Some(p));
}
return Err(SudachiErr::new_err(format!(
return errors::wrap(Err(format!(
"config file [{}] do not exist or is not a file",
p.display()
)));
Expand All @@ -492,9 +490,10 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
let cfg_as_str = config_opt.call_method0("as_jsons")?;
return read_config(&cfg_as_str);
}
Err(SudachiErr::new_err((
format!("passed config was not a string, json object or sudachipy.config.Config object"),
config_opt.into_py(py),
errors::wrap(Err(format!(
"config should be sudachipy.Config or str which represents a file path or json obj, was {}: {}",
config_opt,
config_opt.get_type()
)))
}

Expand All @@ -504,7 +503,7 @@ pub(crate) fn read_default_config(py: Python) -> PyResult<ConfigBuilder> {
.getattr("_DEFAULT_SETTINGFILE")?;
let path = path.downcast::<PyString>()?.to_str()?;
let path = PathBuf::from(path);
wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path)
errors::wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path)
}

pub(crate) fn get_default_resource_dir(py: Python) -> PyResult<PathBuf> {
Expand All @@ -528,10 +527,7 @@ fn locate_system_dict(py: Python, path: &Path) -> PyResult<PathBuf> {
}
match path.to_str() {
Some(name @ ("small" | "core" | "full")) => find_dict_path(py, name),
_ => Err(SudachiErr::new_err(format!(
"invalid dictionary path {:?}",
path
))),
_ => errors::wrap(Err(format!("invalid dictionary path {:?}", path))),
}
}

Expand All @@ -552,12 +548,7 @@ fn parse_field_subset(data: Option<&Bound<PySet>>) -> PyResult<InfoSubset> {
"split_a" => InfoSubset::SPLIT_A,
"split_b" => InfoSubset::SPLIT_B,
"synonym_group_id" => InfoSubset::SYNONYM_GROUP_ID,
x => {
return Err(SudachiErr::new_err(format!(
"Invalid WordInfo field name {}",
x
)))
}
x => return errors::wrap(Err(format!("Invalid WordInfo field name {}", x))),
};
}
Ok(subset)
Expand Down
8 changes: 7 additions & 1 deletion python/src/errors.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,6 +14,8 @@
* limitations under the License.
*/

use pyo3::exceptions::PyDeprecationWarning;
use pyo3::prelude::*;
use pyo3::{import_exception, PyResult};
use std::fmt::{Debug, Display};

Expand All @@ -33,3 +35,7 @@ pub fn wrap_ctx<T, E: Display, C: Debug + ?Sized>(v: Result<T, E>, ctx: &C) -> P
Err(e) => Err(SudachiError::new_err(format!("{:?}: {}", ctx, e))),
}
}

pub fn warn_deprecation(py: Python<'_>, msg: &str) -> PyResult<()> {
PyErr::warn_bound(py, &py.get_type_bound::<PyDeprecationWarning>(), msg, 1)
}
33 changes: 12 additions & 21 deletions python/src/morpheme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ use std::fmt::Write;
use std::ops::Deref;
use std::sync::Arc;

use pyo3::exceptions::{PyException, PyIndexError};
use pyo3::exceptions::PyIndexError;
use pyo3::prelude::*;
use pyo3::types::{PyList, PyString, PyTuple, PyType};

use sudachi::prelude::{Morpheme, MorphemeList};

use crate::dictionary::{extract_mode, PyDicData, PyDictionary};
use crate::errors;
use crate::projection::MorphemeProjection;
use crate::word_info::PyWordInfo;

Expand Down Expand Up @@ -99,12 +100,9 @@ impl PyMorphemeListWrapper {
#[classmethod]
#[pyo3(text_signature = "(dict: Dictionary) -> MorphemeList")]
fn empty(_cls: &Bound<PyType>, py: Python, dict: &PyDictionary) -> PyResult<Self> {
let cat = PyModule::import_bound(py, "builtins")?.getattr("DeprecationWarning")?;
PyErr::warn_bound(
errors::warn_deprecation(
py,
&cat,
"Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList.",
1,
)?;

let cloned = dict.dictionary.as_ref().unwrap().clone();
Expand Down Expand Up @@ -183,9 +181,7 @@ impl PyMorphemeListWrapper {
list: slf.clone_ref(py),
index: i,
};
pymorph
.write_repr(py, &mut result)
.map_err(|_| PyException::new_err("format failed"))?;
errors::wrap_ctx(pymorph.write_repr(py, &mut result), "format failed")?;
result.push_str(",\n");
}
result.push_str("]>");
Expand Down Expand Up @@ -378,7 +374,7 @@ impl PyMorpheme {
) -> PyResult<Bound<'py, PyMorphemeListWrapper>> {
let list = self.list(py);

let mode = extract_mode(py, mode)?;
let mode = extract_mode(mode)?;

let out_cell = match out {
None => {
Expand All @@ -391,16 +387,14 @@ impl PyMorpheme {
let mut borrow = out_cell.try_borrow_mut();
let out_ref = match borrow {
Ok(ref mut v) => v.internal_mut(py),
Err(_) => return Err(PyException::new_err("out was used twice")),
Err(_) => return errors::wrap(Err("out was used twice at the same time")),
};

out_ref.clear();
let splitted = list
.internal(py)
.split_into(mode, self.index, out_ref)
.map_err(|e| {
PyException::new_err(format!("Error while splitting morpheme: {}", e.to_string()))
})?;
let splitted = errors::wrap_ctx(
list.internal(py).split_into(mode, self.index, out_ref),
"Error while splitting morpheme",
)?;

if add_single.unwrap_or(true) && !splitted {
list.internal(py)
Expand Down Expand Up @@ -447,9 +441,7 @@ impl PyMorpheme {
/// Users should not touch the raw WordInfo.
#[pyo3(text_signature = "(self, /) -> WordInfo")]
fn get_word_info(&self, py: Python) -> PyResult<PyWordInfo> {
let cat = PyModule::import_bound(py, "builtins")?.getattr("DeprecationWarning")?;
PyErr::warn_bound(py, &cat, "Users should not touch the raw WordInfo.", 1)?;

errors::warn_deprecation(py, "Users should not touch the raw WordInfo.")?;
Ok(self.morph(py).get_word_info().clone().into())
}

Expand All @@ -465,8 +457,7 @@ impl PyMorpheme {

pub fn __repr__<'py>(&'py self, py: Python<'py>) -> PyResult<String> {
let mut result = String::new();
self.write_repr(py, &mut result)
.map_err(|_| PyException::new_err("failed to format repr"))?;
errors::wrap_ctx(self.write_repr(py, &mut result), "failed to format repr")?;
Ok(result)
}
}
4 changes: 2 additions & 2 deletions python/src/pos_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@

use std::sync::Arc;

use pyo3::exceptions::PyException;
use pyo3::prelude::*;
use pyo3::types::{PyBool, PyIterator, PyTuple};

use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::pos::PosMatcher;

use crate::dictionary::PyDicData;
use crate::errors;
use crate::morpheme::PyMorpheme;

/// A part-of-speech matcher which checks if a morpheme belongs to a set of part of speech.
Expand Down Expand Up @@ -116,7 +116,7 @@ impl PyPosMatcher {
}

if start_len == data.len() {
Err(PyException::new_err(format!(
errors::wrap(Err(format!(
"POS {:?} did not match any elements",
elem.repr()?
)))
Expand Down
4 changes: 2 additions & 2 deletions python/src/pretokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/

use crate::dictionary::PyDicData;
use crate::errors::wrap;
use crate::errors;
use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector};
use pyo3::intern;
use pyo3::prelude::*;
Expand Down Expand Up @@ -49,7 +49,7 @@ impl PerThreadPreTokenizer {

pub fn tokenize(&mut self, data: &str) -> PyResult<()> {
self.tokenizer.reset().push_str(data);
wrap(self.tokenizer.do_tokenize())?;
errors::wrap(self.tokenizer.do_tokenize())?;
Ok(())
}

Expand Down
Loading