Skip to content

Commit

Permalink
Remove recursive parser (#12)
Browse files Browse the repository at this point in the history
* remove recursive parser
* fix 🐛
  • Loading branch information
patrickphat authored Sep 2, 2020
1 parent e136f7c commit 9167792
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 14 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name='urbamt',
version='0.0.1-b1',
version='0.0.1-b2',
author="Patrick Phat Nguyen",
author_email="[email protected]",
description="Universal Rule-based Machine Translation Toolkit (URBaMT)",
Expand Down
35 changes: 23 additions & 12 deletions urbamt/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .utils.tree_manipulation import translate_tree_grammar
from .utils.misc import remove_trailing_space
import nltk
from nltk import RecursiveDescentParser as Parser
from nltk.parse.chart import BottomUpLeftCornerChartParser as Parser

class URBAMT_Translator:
""""""
Expand All @@ -11,7 +11,6 @@ def __init__(self,
src_to_tgt_grammar: Dict,
src_to_tgt_dictionary: Dict):
"""Initialize the translator
Args:
src_grammar (str): source language grammar written in nltk style
E.g: src_grammar = \"""
Expand All @@ -28,14 +27,10 @@ def __init__(self,
JJ -> 'good'
NN -> 'school'
\"""
src_to_tgt_grammar (Dict): Transition between source grammar and target grammar as a dict
E.g: src2target_grammar = {
"NP1 -> JJ NN": "NP1 -> NN JJ"
}
src_to_tgt_dictionary (Dict): Dictionary of word-by-word transition from src language to target language
E.g: en_to_vi_dict = {
"I":"tôi",
Expand All @@ -54,31 +49,47 @@ def __init__(self,
def __process_text_input(txt):
return remove_trailing_space(txt)

def translate(self, sentences: List[str] or str):
def translate(self, sentences: List[str] or str, allow_multiple_translation = False):
"""Translate a list of sentences
Args:
sentences (List[str]): A list of str-typed sentences
Returns:
List[str]: A list of translated sentences
"""
if isinstance(sentences,str):
sentences = [sentences]

translated_sentence = []
translated_sentences = []
failed_sentences = []

for sentence in sentences:
sentence = self.__process_text_input(sentence)
trees = self.parser.parse(sentence.split())

# Flag to check if there are trees in generator (grammar matched)
translated = False

for t in trees:
translated = True

# Translate grammar
trans_gram_sentence = translate_tree_grammar(t,self.src_to_tgt_grammar)

# Translate words
trans_lang_sentence = ' '.join([self.src_to_tgt_dictionary.get(word,word) for word in trans_gram_sentence.split()])

translated_sentence.append(trans_lang_sentence)
translated_sentences.append(trans_lang_sentence)

# Get 1 sentence only, will support multi sentence
break

if translated == False:
failed_sentences.append(sentence)

# String to display failed sentence
failed_sentences = '\n'.join(failed_sentences)

if len(failed_sentences) > 0:
raise ValueError(f"Please check your grammar again, failed to translated these sentence \n {failed_sentences}")

return translated_sentence
return translated_sentences
1 change: 0 additions & 1 deletion urbamt/utils/tree_manipulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def calculate_displacement(src_grammar, tgt_grammar):
src_grammar_lst = src_grammar.split()
tgt_grammar_lst = tgt_grammar.split()

print(src_grammar_lst.index("->"))
src_grammar_lst = src_grammar_lst[src_grammar_lst.index("->")+1:]
tgt_grammar_lst = tgt_grammar_lst[tgt_grammar_lst.index("->")+1:]
displacement = []
Expand Down

0 comments on commit 9167792

Please sign in to comment.