Skip to content

Commit

Permalink
Upgrade to python3.11 fix
Browse files Browse the repository at this point in the history
Upgrade to python3.11
  • Loading branch information
michael-lerner authored Jan 17, 2023
2 parents a2f98d6 + 9f1bc36 commit c171ebb
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pywordsegment"
version = "0.4.0"
version = "0.4.1"
authors = ["Gal Ben David <[email protected]>"]
edition = "2021"
description = "Concatenated-word segmentation Python library written in Rust"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ sdist-include = [

[tool.poetry]
name = "pywordsegment"
version = "0.4.0"
version = "0.4.1"
authors = ["Gal Ben David <[email protected]>"]
description = "Concatenated-word segmentation Python library written in Rust"
readme = "README.md"
Expand Down
50 changes: 38 additions & 12 deletions pywordsegment/__init__.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,54 @@
import gzip
import importlib.resources
import sys
import typing

from . import pywordsegment

PY_VERSION_MAJOR = sys.version_info.major
PY_VERSION_MINOR = sys.version_info.minor

class WordSegmenter:
word_segmenter: pywordsegment.WordSegmenter = None

@staticmethod
def load() -> None:
if WordSegmenter.word_segmenter is None:
unigrams_serialized = gzip.decompress(
data=importlib.resources.read_binary(
package=__package__,
resource='unigrams.msgpack.gz',
),
)
bigrams_serialized = gzip.decompress(
data=importlib.resources.read_binary(
package=__package__,
resource='bigrams.msgpack.gz',
),
)
if PY_VERSION_MAJOR >= 3 and PY_VERSION_MINOR >= 11:
with importlib.resources.files(
__package__,
).joinpath(
'unigrams.msgpack.gz',
).open(
'rb',
) as unigrams_msgpack, importlib.resources.files(
__package__,
).joinpath(
'bigrams.msgpack.gz',
).open(
'rb',
) as bigrams_msgpack:
unigrams_serialized = gzip.decompress(
data=unigrams_msgpack.read(),
)
bigrams_serialized = gzip.decompress(
data=bigrams_msgpack.read(),
)

else:
unigrams_serialized = gzip.decompress(
data=importlib.resources.read_binary(
package=__package__,
resource='unigrams.msgpack.gz',
),
)

bigrams_serialized = gzip.decompress(
data=importlib.resources.read_binary(
package=__package__,
resource='bigrams.msgpack.gz',
),
)

WordSegmenter.word_segmenter = pywordsegment.WordSegmenter(
unigrams_serialized=unigrams_serialized,
Expand Down

0 comments on commit c171ebb

Please sign in to comment.