Skip to content

Commit

Permalink
🐞 fix(TOK): 读取模型时强制utf-8编码
Browse files Browse the repository at this point in the history
  • Loading branch information
yansh97 committed Oct 12, 2024
1 parent 5556740 commit 3485185
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
2 changes: 1 addition & 1 deletion jieba3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from jieba3.tok import _cut_query, _cut_text

__version__ = "1.0.1"
__version__ = "1.0.2"


class jieba3(BaseModel):
Expand Down
8 changes: 4 additions & 4 deletions jieba3/tok.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ class ModelParams(BaseModel):


BASE_MODEL_PARAMS: Final[ModelParams] = ModelParams.model_validate_json(
json_data=(Path(__file__).parent / "model.base.json").read_text()
json_data=(Path(__file__).parent / "model.base.json").read_text(encoding="utf-8")
)
BASE_MODEL_FREQ: Final[dict[str, int]] = BASE_MODEL_PARAMS.freq
BASE_MODEL_TOTAL: Final[int] = BASE_MODEL_PARAMS.total

SMALL_MODEL_PARAMS: Final[ModelParams] = ModelParams.model_validate_json(
json_data=(Path(__file__).parent / "model.small.json").read_text()
json_data=(Path(__file__).parent / "model.small.json").read_text(encoding="utf-8")
)
SMALL_MODEL_FREQ: Final[dict[str, int]] = SMALL_MODEL_PARAMS.freq
SMALL_MODEL_TOTAL: Final[int] = SMALL_MODEL_PARAMS.total

LARGE_MODEL_PARAMS: Final[ModelParams] = ModelParams.model_validate_json(
json_data=(Path(__file__).parent / "model.large.json").read_text()
json_data=(Path(__file__).parent / "model.large.json").read_text(encoding="utf-8")
)
LARGE_MODEL_FREQ: Final[dict[str, int]] = LARGE_MODEL_PARAMS.freq
LARGE_MODEL_TOTAL: Final[int] = LARGE_MODEL_PARAMS.total
Expand All @@ -53,7 +53,7 @@ class HMMParams(BaseModel):


HMM_PARAMS: Final[HMMParams] = HMMParams.model_validate_json(
json_data=(Path(__file__).parent / "hmm.json").read_text()
json_data=(Path(__file__).parent / "hmm.json").read_text(encoding="utf-8")
)

HMM_STATE_PROB: Final[dict[State, float]] = HMM_PARAMS.state_prob
Expand Down
3 changes: 2 additions & 1 deletion test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Callable

import jieba

import jieba3

jieba.initialize()
Expand All @@ -30,7 +31,7 @@ def test_mode(
jieba_total_time: float = 0
jieba3_total_time: float = 0
size: float = path.stat().st_size / 1024 / 1024
lines: list[str] = path.read_text().splitlines()
lines: list[str] = path.read_text(encoding="utf-8").splitlines()
N = 10
for _ in range(N):
for line in lines:
Expand Down

0 comments on commit 3485185

Please sign in to comment.