Skip to content

Commit

Permalink
Merge pull request #263 from WorksApplications/fix/clippy-warnings-no…
Browse files Browse the repository at this point in the history
…breaking

Fix clippy warnings (no breaking changes)
  • Loading branch information
mh-northlander authored Nov 11, 2024
2 parents 4e72d54 + 8676342 commit e158aeb
Show file tree
Hide file tree
Showing 66 changed files with 386 additions and 427 deletions.
19 changes: 13 additions & 6 deletions python/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,21 @@
* limitations under the License.
*/

use crate::dictionary::get_default_resource_dir;
use crate::errors;
use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType};
use std::fs::{File, OpenOptions};
use std::io::BufWriter;
use std::path::Path;

use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType};

use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::config::Config;
use sudachi::dic::build::{DataSource, DictBuilder};
use sudachi::dic::dictionary::JapaneseDictionary;

use crate::dictionary::get_default_resource_dir;
use crate::errors;

pub fn register_functions(m: &Bound<PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(build_system_dic, m)?)?;
m.add_function(wrap_pyfunction!(build_user_dic, m)?)?;
Expand Down Expand Up @@ -80,7 +83,9 @@ fn build_system_dic<'py>(
description: Option<&str>,
) -> PyResult<Bound<'py, PyList>> {
let mut builder = DictBuilder::new_system();
description.map(|d| builder.set_description(d));
if let Some(d) = description {
builder.set_description(d)
}

let matrix_path = resolve_as_pypathstr(py, matrix)?;
let matrix_src = as_data_source(matrix_path.as_ref(), matrix)?;
Expand Down Expand Up @@ -138,7 +143,9 @@ fn build_user_dic<'py>(
};

let mut builder = DictBuilder::new_user(&system_dic);
description.map(|d| builder.set_description(d));
if let Some(d) = description {
builder.set_description(d)
}

for f in lex.iter() {
let lex_path = resolve_as_pypathstr(py, &f)?;
Expand Down
11 changes: 6 additions & 5 deletions python/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,18 @@
* limitations under the License.
*/

use pyo3::prelude::*;
use pyo3::types::{PySet, PyString, PyTuple};
use std::convert::TryFrom;
use std::fmt::Write;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use sudachi::analysis::Mode;

use pyo3::prelude::*;
use pyo3::types::{PySet, PyString, PyTuple};

use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::analysis::Mode;
use sudachi::config::{Config, ConfigBuilder, SurfaceProjection};
use sudachi::dic::dictionary::JapaneseDictionary;
use sudachi::dic::grammar::Grammar;
Expand Down Expand Up @@ -447,7 +448,7 @@ fn config_repr(cfg: &Config) -> Result<String, std::fmt::Error> {
Ok(result)
}

pub(crate) fn extract_mode<'py>(mode: &Bound<'py, PyAny>) -> PyResult<Mode> {
pub(crate) fn extract_mode(mode: &Bound<'_, PyAny>) -> PyResult<Mode> {
if mode.is_instance_of::<PyString>() {
errors::wrap(Mode::from_str(mode.str()?.to_str()?))
} else if mode.is_instance_of::<PySplitMode>() {
Expand All @@ -471,7 +472,7 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
let config_pystr = config_opt.str()?;
let config_str = config_pystr.to_str()?.trim();
// looks like json
if config_str.starts_with("{") && config_str.ends_with("}") {
if config_str.starts_with('{') && config_str.ends_with('}') {
let result = ConfigBuilder::from_bytes(config_str.as_bytes());
return errors::wrap(result);
}
Expand Down
3 changes: 2 additions & 1 deletion python/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
* limitations under the License.
*/

use std::fmt::{Debug, Display};

use pyo3::exceptions::PyDeprecationWarning;
use pyo3::prelude::*;
use pyo3::{import_exception, PyResult};
use std::fmt::{Debug, Display};

// Sudachi exception class is defined in Python
import_exception!(sudachipy.errors, SudachiError);
Expand Down
4 changes: 2 additions & 2 deletions python/src/morpheme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ impl PyMorphemeListWrapper {
for (i, m) in list.iter().enumerate() {
result.push_str(m.surface().deref());
if i + 1 != nmorphs {
result.push_str(" ");
result.push(' ');
}
}
PyString::new_bound(py, result.as_str())
Expand Down Expand Up @@ -196,7 +196,7 @@ impl PyMorphemeListWrapper {
}

fn __bool__(&self, py: Python) -> bool {
self.internal(py).len() != 0
!self.internal(py).is_empty()
}
}

Expand Down
3 changes: 1 addition & 2 deletions python/src/pos_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl PyPosMatcher {
fn create_from_fn(dic: &Arc<PyDicData>, func: &Bound<PyAny>, py: Python) -> PyResult<Self> {
let mut data = Vec::new();
for (pos_id, pos) in dic.pos.iter().enumerate() {
let args = PyTuple::new_bound(py, &[pos]);
let args = PyTuple::new_bound(py, [pos]);
if func.call1(args)?.downcast::<PyBool>()?.is_true() {
data.push(pos_id as u16);
}
Expand Down Expand Up @@ -198,7 +198,6 @@ impl PyPosMatcher {
let max_id = self.dic.pos.len();
// map -> filter chain is needed to handle exactly u16::MAX POS entries
let values = (0..max_id)
.into_iter()
.map(|x| x as u16)
.filter(|id| !self.matcher.matches_id(*id));
let matcher = PosMatcher::new(values);
Expand Down
18 changes: 10 additions & 8 deletions python/src/pretokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,23 @@
* limitations under the License.
*/

use crate::dictionary::PyDicData;
use crate::errors;
use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector};
use std::cell::RefCell;
use std::sync::Arc;

use pyo3::intern;
use pyo3::prelude::*;
use pyo3::sync::GILOnceCell;
use pyo3::types::{PyList, PySlice, PyTuple, PyType};
use std::cell::RefCell;
use std::sync::Arc;
use thread_local::ThreadLocal;

use crate::projection::MorphemeProjection;
use sudachi::analysis::stateful_tokenizer::StatefulTokenizer;
use sudachi::dic::subset::InfoSubset;
use sudachi::prelude::Mode;
use thread_local::ThreadLocal;

use crate::dictionary::PyDicData;
use crate::errors;
use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector};
use crate::projection::MorphemeProjection;

/// This struct perform actual tokenization
/// There should be at most one instance per thread of execution
Expand Down Expand Up @@ -152,7 +154,7 @@ impl PyPretokenizer {
}
Some(h) => {
let mrp: &Bound<PyAny> = morphs.bind(py);
let args = PyTuple::new_bound(py, &[index, string, mrp]);
let args = PyTuple::new_bound(py, [index, string, mrp]);
h.bind(py).call1(args)
}
}
Expand Down
20 changes: 11 additions & 9 deletions python/src/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,23 @@
* limitations under the License.
*/

use crate::dictionary::PyDicData;
use crate::errors;
use crate::morpheme::PyProjector;
use pyo3::prelude::*;
use pyo3::types::PyString;
use pyo3::{PyResult, Python};
use std::convert::TryFrom;
use std::ops::Deref;
use std::sync::Arc;

use pyo3::prelude::*;
use pyo3::types::PyString;
use pyo3::{PyResult, Python};

use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::config::SurfaceProjection;
use sudachi::pos::PosMatcher;
use sudachi::prelude::Morpheme;

use crate::dictionary::PyDicData;
use crate::errors;
use crate::morpheme::PyProjector;

pub(crate) trait MorphemeProjection {
fn project<'py>(&self, m: &Morpheme<Arc<PyDicData>>, py: Python<'py>) -> Bound<'py, PyString>;
}
Expand Down Expand Up @@ -114,9 +117,8 @@ impl MorphemeProjection for NormalizedNouns {
}

fn conjugating_matcher<D: DictionaryAccess>(dic: &D) -> PosMatcher {
make_matcher(dic, |pos| match pos[0].deref() {
"動詞" | "形容詞" | "助動詞" => true,
_ => false,
make_matcher(dic, |pos| {
matches!(pos[0].deref(), "動詞" | "形容詞" | "助動詞")
})
}

Expand Down
3 changes: 1 addition & 2 deletions python/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ use std::sync::Arc;
use pyo3::prelude::*;

use sudachi::analysis::stateful_tokenizer::StatefulTokenizer;

use sudachi::dic::subset::InfoSubset;
use sudachi::prelude::*;

Expand Down Expand Up @@ -157,7 +156,7 @@ impl PyTokenizer {
None => None,
Some(m) => Some(extract_mode(m)?),
};
let default_mode = mode.map(|m| self.tokenizer.set_mode(m.into()));
let default_mode = mode.map(|m| self.tokenizer.set_mode(m));
let mut tokenizer = scopeguard::guard(&mut self.tokenizer, |t| {
default_mode.map(|m| t.set_mode(m));
});
Expand Down
4 changes: 2 additions & 2 deletions sudachi-cli/src/build.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,12 +14,12 @@
* limitations under the License.
*/

use memmap2::Mmap;
use std::fs::{File, OpenOptions};
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};

use clap::{Args, Subcommand};
use memmap2::Mmap;

use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::config::Config;
Expand Down
17 changes: 6 additions & 11 deletions sudachi-cli/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,22 +34,17 @@ use sudachi::prelude::*;
#[cfg(feature = "bake_dictionary")]
const BAKED_DICTIONARY_BYTES: &[u8] = include_bytes!(env!("SUDACHI_DICT_PATH"));

#[derive(Clone, Debug, Eq, PartialEq)]
#[derive(Clone, Debug, Eq, PartialEq, Default)]
pub enum SentenceSplitMode {
/// Do both sentence splitting and analysis
#[default]
Default,
/// Do only sentence splitting and not analysis
Only,
/// Do only analysis without sentence splitting
None,
}

impl Default for SentenceSplitMode {
fn default() -> Self {
SentenceSplitMode::Default
}
}

impl FromStr for SentenceSplitMode {
type Err = &'static str;

Expand Down Expand Up @@ -156,7 +151,7 @@ fn main() {
// output: stdout or file
let inner_writer: Box<dyn Write> = match &args.output_file {
Some(output_path) => Box::new(
File::create(&output_path)
File::create(output_path)
.unwrap_or_else(|_| panic!("Failed to open output file {:?}", &output_path)),
),
None => Box::new(io::stdout()),
Expand Down Expand Up @@ -207,10 +202,10 @@ fn strip_eol(data: &str) -> &str {
let mut bytes = data.as_bytes();
let mut len = bytes.len();
if len > 1 && bytes[len - 1] == b'\n' {
len = len - 1;
len -= 1;
bytes = &bytes[..len];
if len > 1 && bytes[len - 1] == b'\r' {
len = len - 1;
len -= 1;
bytes = &bytes[..len];
}
}
Expand Down
4 changes: 2 additions & 2 deletions sudachi-cli/src/output.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,7 +44,7 @@ impl Wakachi {

impl<T: DictionaryAccess> SudachiOutput<T> for Wakachi {
fn write(&self, writer: &mut Writer, morphemes: &MorphemeList<T>) -> SudachiResult<()> {
if morphemes.len() == 0 {
if morphemes.is_empty() {
writer.write_all(b"\n")?;
return Ok(());
}
Expand Down
18 changes: 8 additions & 10 deletions sudachi/src/analysis/created.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,7 +40,7 @@ impl CreatedWords {
const MAX_SHIFT: Carrier = CreatedWords::MAX_VALUE - 1;

pub fn empty() -> CreatedWords {
return Default::default();
Default::default()
}

pub fn single<Pos: Into<i64>>(length: Pos) -> CreatedWords {
Expand All @@ -55,7 +55,7 @@ impl CreatedWords {
#[must_use]
pub fn add_word<P: Into<i64>>(&self, length: P) -> CreatedWords {
let mask = CreatedWords::single(length);
return self.add(mask);
self.add(mask)
}

#[must_use]
Expand All @@ -67,21 +67,19 @@ impl CreatedWords {
let mask = CreatedWords::single(length);
if (self.0 & mask.0) == 0 {
HasWord::No
} else if length.into() >= CreatedWords::MAX_VALUE as _ {
HasWord::Maybe
} else {
if length.into() >= CreatedWords::MAX_VALUE as _ {
HasWord::Maybe
} else {
HasWord::Yes
}
HasWord::Yes
}
}

pub fn is_empty(&self) -> bool {
return self.0 == 0;
self.0 == 0
}

pub fn not_empty(&self) -> bool {
return !self.is_empty();
!self.is_empty()
}
}

Expand Down
Loading

0 comments on commit e158aeb

Please sign in to comment.