Skip to content

Commit

Permalink
Merge pull request #47 from rskmoi/v0.3.1
Browse files Browse the repository at this point in the history
V0.3.1
  • Loading branch information
rskmoi authored Oct 14, 2024
2 parents 5fb901d + 6642857 commit 7f4edf3
Show file tree
Hide file tree
Showing 9 changed files with 33 additions and 36 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12']
python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install libomp if the operating system is macOS.
Expand Down
21 changes: 11 additions & 10 deletions namedivider/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from pathlib import Path

import typer
from tqdm import tqdm

from namedivider.divider.basic_name_divider import BasicNameDivider
from namedivider.divider.config import BasicNameDividerConfig, GBDTNameDividerConfig
Expand Down Expand Up @@ -75,8 +74,9 @@ def file(
with open(undivided_name_text, "rb") as f:
undivided_names = f.read().decode(encoding).strip().split("\n")
divided_names = []
for _undivided_name in tqdm(undivided_names):
divided_names.append(str(divider.divide_name(_undivided_name)))
with typer.progressbar(undivided_names) as bar:
for _undivided_name in bar:
divided_names.append(str(divider.divide_name(_undivided_name)))
print("\n".join(divided_names))


Expand Down Expand Up @@ -116,13 +116,14 @@ def accuracy(
divided_names = f.read().decode(encoding).strip().split("\n")
is_correct_list = []
wrong_list = []
for _divided_name in tqdm(divided_names):
_undivided_name = _divided_name.replace(separator, "")
_divided_name_pred = str(divider.divide_name(_undivided_name))
is_correct = _divided_name == _divided_name_pred
is_correct_list.append(is_correct)
if not is_correct:
wrong_list.append(f"True: {_divided_name}, Pred: {_divided_name_pred}")
with typer.progressbar(divided_names) as bar:
for _divided_name in bar:
_undivided_name = _divided_name.replace(separator, "")
_divided_name_pred = str(divider.divide_name(_undivided_name))
is_correct = _divided_name == _divided_name_pred
is_correct_list.append(is_correct)
if not is_correct:
wrong_list.append(f"True: {_divided_name}, Pred: {_divided_name_pred}")
print(f"{sum(is_correct_list) / len(is_correct_list):.04}")
if len(wrong_list) != 0:
print("\n".join(wrong_list))
Expand Down
7 changes: 3 additions & 4 deletions namedivider/divider/gbdt_name_divider.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import _pickle as pickle # type: ignore
from dataclasses import asdict
from pathlib import Path
from typing import Optional, cast

import _pickle as pickle # type: ignore
import lightgbm as lgb
import pandas as pd

from namedivider.divider.config import GBDTNameDividerConfig
from namedivider.divider.name_divider_base import _NameDivider
Expand Down Expand Up @@ -47,7 +46,7 @@ def calc_score(self, family: str, given: str) -> float:
:return: Score of dividing.
"""
feature = self.feature_extractor.get_features(family=family, given=given)
df = pd.DataFrame([asdict(feature)])
score_list = self.model.predict(df)
feature_list = [list(asdict(feature).values())]
score_list = self.model.predict(feature_list)
score = cast(float, score_list[0])
return score
9 changes: 5 additions & 4 deletions namedivider/util.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import urllib.request
from pathlib import Path
from typing import Union

import requests

CURRENT_DIR = Path(__file__).resolve().parent
DEFAULT_CACHE_DIR = Path("~/.cache/namedivider-python").expanduser()
GBDT_MODEL_V1_URL = "https://github.com/rskmoi/namedivider-python/releases/download/Models/gbdt_model_v1.txt"
Expand Down Expand Up @@ -43,7 +42,8 @@ def download_family_name_pickle_if_needed(path: Union[str, Path]) -> None:
return None
DEFAULT_CACHE_DIR.mkdir(exist_ok=True, parents=True)
print("Download FamilyNameRepository from GitHub...")
content = requests.get(FAMILY_NAME_REPOSITORY_URL).content
with urllib.request.urlopen(FAMILY_NAME_REPOSITORY_URL) as response:
content = response.read()
with open(path, "wb") as f:
f.write(content)

Expand All @@ -59,6 +59,7 @@ def download_gbdt_model_v1_if_needed(path: Union[str, Path]) -> None:
return None
DEFAULT_CACHE_DIR.mkdir(exist_ok=True, parents=True)
print("Download GBDT Model from GitHub...")
content = requests.get(GBDT_MODEL_V1_URL).content
with urllib.request.urlopen(GBDT_MODEL_V1_URL) as response:
content = response.read()
with open(path, "wb") as f:
f.write(content)
2 changes: 1 addition & 1 deletion namedivider/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.0"
__version__ = "0.3.1"
8 changes: 3 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,17 @@ authors = [
]
classifiers = [
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
dependencies = [
"lightgbm>=3.3",
"numpy",
"pandas",
"regex",
"requests",
"tqdm",
"typer>=0.3.2",
]

Expand Down Expand Up @@ -62,15 +60,15 @@ module = "namedivider"
ignore_missing_imports = true

[tool.ruff]
select = [
lint.select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort
"C", # flake8-comprehensions
"B", # flake8-bugbear
]
ignore = [
lint.ignore = [
"E501", # line too long, handled by black
"B008", # do not perform function calls in argument defaults
"C901", # too complex
Expand Down
12 changes: 5 additions & 7 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
pytest >=7.1.3,<8.0.0
mypy == 1.4.0
ruff == 0.0.275
black == 23.3.0
coverage == 7.3.2
mypy == 1.11.2
ruff == 0.6.8
black == 23.12.1
coverage == 7.6.1

# stubs
types-requests
pandas-stubs
types-regex
types-tqdm
types-regex
2 changes: 1 addition & 1 deletion scripts/format.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ruff namedivider tests --fix
ruff check namedivider tests --fix
black namedivider tests
2 changes: 1 addition & 1 deletion scripts/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ set -e
set -x

mypy namedivider
ruff namedivider tests
ruff check namedivider tests
black namedivider tests --check

0 comments on commit 7f4edf3

Please sign in to comment.