Fix misuse Vocab() in Language instantiation (#42)

* Fix misuse `Vocab()` in Language instantiation Related: #41 Keep in mind not to use `Vocab()`, but to use `True` instead. * bug is fixed for some reason only replaced two lines.
PKSHATechnology-Research · Apr 10, 2020 · ff6bdbc · ff6bdbc
1 parent ff98a92
commit ff6bdbc
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 5 deletions.
diff --git a/camphr/lang/juman/__init__.py b/camphr/lang/juman/__init__.py
@@ -8,10 +8,11 @@
 from spacy.tokens import Doc, Token
 
 from camphr.consts import JUMAN_LINES, KEY_FSTRING
-from .tag_map import TAG_MAP
 from camphr.lang.stop_words import STOP_WORDS
 from camphr.utils import SerializationMixin, get_juman_command
 
+from .tag_map import TAG_MAP
+
 ShortUnitWord = namedtuple(
     "ShortUnitWord", ["surface", "lemma", "pos", "fstring", "space"]
 )
@@ -103,8 +104,8 @@ def _dtokens_to_doc(self, dtokens: List[ShortUnitWord]) -> Doc:
         spaces = [x.space for x in dtokens]
         doc = Doc(self.vocab, words=words, spaces=spaces)
         for token, dtoken in zip(doc, dtokens):
-            token.lemma_ = dtoken.lemma
             token.tag_ = dtoken.pos
+            token.lemma_ = dtoken.lemma
             token._.set(self.key_fstring, dtoken.fstring)
         doc.is_tagged = True
         return doc

diff --git a/camphr/models.py b/camphr/models.py
@@ -11,7 +11,6 @@
 from omegaconf import OmegaConf
 from spacy.language import Language
 from spacy.pipeline import Pipe
-from spacy.vocab import Vocab
 from toolz import merge
 from typing_extensions import Literal
 
@@ -81,7 +80,7 @@ def create_lang(cfg: LangConfig) -> Language:
     )
     if cfg.torch:
         kwargs["meta"] = merge(kwargs.get("meta", {}), {"lang": cfg.name})
-        return TorchLanguage(Vocab(), optimizer_config=cfg.optimizer, **kwargs)
+        return TorchLanguage(True, optimizer_config=cfg.optimizer, **kwargs)
     return spacy.blank(cfg.name, **kwargs)
 
 

diff --git a/tests/lang/juman/test_lemmatization.py b/tests/lang/juman/test_lemmatization.py
@@ -9,6 +9,6 @@
     [("新しく", "新しい"), ("赤く", "赤い"), ("すごく", "すごい"), ("いただきました", "いただく"), ("なった", "なる")],
 )
 @pytest.mark.skipif(not check_juman(), reason="Is juman necessary?")
-def test_mecab_lemmatizer_assigns(juman_tokenizer, word, lemma):
+def test_lemmatizer_assigns(juman_tokenizer, word, lemma):
     test_lemma = juman_tokenizer(word)[0].lemma_
     assert test_lemma == lemma