Skip to content

Commit

Permalink
Fix misuse Vocab() in Language instantiation (#42)
Browse files Browse the repository at this point in the history
* Fix misuse `Vocab()` in Language instantiation

Related: #41
Keep in mind not to use `Vocab()`, but to use `True` instead.

* bug is fixed for some reason

only replaced two lines.
  • Loading branch information
tamuhey authored Apr 10, 2020
1 parent ff98a92 commit ff6bdbc
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
5 changes: 3 additions & 2 deletions camphr/lang/juman/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from spacy.tokens import Doc, Token

from camphr.consts import JUMAN_LINES, KEY_FSTRING
from .tag_map import TAG_MAP
from camphr.lang.stop_words import STOP_WORDS
from camphr.utils import SerializationMixin, get_juman_command

from .tag_map import TAG_MAP

ShortUnitWord = namedtuple(
"ShortUnitWord", ["surface", "lemma", "pos", "fstring", "space"]
)
Expand Down Expand Up @@ -103,8 +104,8 @@ def _dtokens_to_doc(self, dtokens: List[ShortUnitWord]) -> Doc:
spaces = [x.space for x in dtokens]
doc = Doc(self.vocab, words=words, spaces=spaces)
for token, dtoken in zip(doc, dtokens):
token.lemma_ = dtoken.lemma
token.tag_ = dtoken.pos
token.lemma_ = dtoken.lemma
token._.set(self.key_fstring, dtoken.fstring)
doc.is_tagged = True
return doc
Expand Down
3 changes: 1 addition & 2 deletions camphr/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from omegaconf import OmegaConf
from spacy.language import Language
from spacy.pipeline import Pipe
from spacy.vocab import Vocab
from toolz import merge
from typing_extensions import Literal

Expand Down Expand Up @@ -81,7 +80,7 @@ def create_lang(cfg: LangConfig) -> Language:
)
if cfg.torch:
kwargs["meta"] = merge(kwargs.get("meta", {}), {"lang": cfg.name})
return TorchLanguage(Vocab(), optimizer_config=cfg.optimizer, **kwargs)
return TorchLanguage(True, optimizer_config=cfg.optimizer, **kwargs)
return spacy.blank(cfg.name, **kwargs)


Expand Down
2 changes: 1 addition & 1 deletion tests/lang/juman/test_lemmatization.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@
[("新しく", "新しい"), ("赤く", "赤い"), ("すごく", "すごい"), ("いただきました", "いただく"), ("なった", "なる")],
)
@pytest.mark.skipif(not check_juman(), reason="Is juman necessary?")
def test_mecab_lemmatizer_assigns(juman_tokenizer, word, lemma):
def test_lemmatizer_assigns(juman_tokenizer, word, lemma):
test_lemma = juman_tokenizer(word)[0].lemma_
assert test_lemma == lemma

0 comments on commit ff6bdbc

Please sign in to comment.