Skip to content

Commit

Permalink
Merge branch 'pre-release-02-06-2019'
Browse files Browse the repository at this point in the history
# Conflicts:
#	ken.py
  • Loading branch information
malakhovks committed Jun 2, 2019
2 parents 89f1ec0 + b28590f commit 4b89975
Show file tree
Hide file tree
Showing 5 changed files with 1,187 additions and 74 deletions.
35 changes: 3 additions & 32 deletions ken.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,6 @@ def text_normalization_default(raw_text):
# remove leading and ending spaces
line = line.strip()
raw_text_list.append(line)
# TODO Remove debug log in production release
# print('Included line: ' + line)
# else:
# # TODO Remove debug log in production release
# print('Excluded line: ' + line)
# yet_raw_text = '\n'.join(raw_text_list)
yet_raw_text = ' '.join(raw_text_list)
return yet_raw_text
Expand Down Expand Up @@ -271,12 +266,6 @@ def parcexml_Generator():
# default sentence normalization + spaCy doc init
doc = NLP_EN(text_normalized)

# TODO Remove debug log in production release
# print('''
# sentences\t{num_sent}
# '''.format(
# num_sent=len(list(doc.sents)),))

"""
# create the <parce.xml> file structure
"""
Expand Down Expand Up @@ -398,11 +387,8 @@ def parcexml_Generator():
# create full <parce.xml> file structure
root_element.append(new_sentence_element)

# TODO Remove debug log in production release
# print ET.tostring(root_element, encoding='utf8', method='xml')
return ET.tostring(root_element, encoding='utf8', method='xml')
except:
# print "Unexpected error:", sys.exc_info()
return abort(500)
file.close()
return abort(400)
Expand Down Expand Up @@ -604,10 +590,6 @@ def get_terms_list():
'''
if doc_for_tokens[0].pos_ not in ['DET', 'PUNCT']:

# print('two-word term lemma ---> ' + chunk.lemma_ +' POS[0]:'+ doc_for_tokens[0].pos_ + ' POS[0]:'+ doc_for_tokens[0].tag_ + ' HEAD[0]:' + doc_for_tokens[0].head.lower_ +' POS[1]:' + doc_for_tokens[1].pos_ + ' POS[1]:'+ doc_for_tokens[1].tag_ + ' HEAD[1]:' + doc_for_tokens[1].head.lower_)

# print('--------------------')

# If two-word term already exists in two_word_terms_help_list
# if chunk.lower_ in two_word_terms_help_list:
if chunk.lemma_ in two_word_terms_help_list:
Expand Down Expand Up @@ -760,9 +742,6 @@ def get_terms_list():
# add relup/reldown
if chunk.root.lemma_ not in one_word_terms_help_list:

# print('root NOUN not exists in one_word_terms_help_list --->> ' + chunk.root.lemma_)
# print('--------------------')

one_word_terms_help_list.append(chunk.root.lemma_)

# create and append <wcount>
Expand Down Expand Up @@ -814,8 +793,6 @@ def get_terms_list():
if t.lemma_ != chunk.root.lemma_:
if t.pos_ in ['NOUN']:

# print('-------->>>>>>' + t.lemma_)

if t.lemma_ in one_word_terms_help_list:

relup_index = 0
Expand Down Expand Up @@ -852,8 +829,6 @@ def get_terms_list():

if t.lemma_ not in one_word_terms_help_list:

# print('if t.lemma_ not in one_word_terms_help_list ----->>>>>>' + t.lemma_)

relup_index = 0
reldown_index = 0
sent_pos_helper = []
Expand Down Expand Up @@ -917,16 +892,13 @@ def get_terms_list():
'''
# extract three-word terms
'''
if len(doc_for_tokens) == 3:
# if len(doc_for_tokens) == 3:

# print('three-word term lemma ---> ' + chunk.lemma_ +' POS[0]:'+ doc_for_tokens[0].pos_ + ' POS[1]:' + doc_for_tokens[1].pos_ + ' POS[2]:' + doc_for_tokens[2].pos_)
print('--------------------')
# print('three-word term lemma ---> ' + chunk.lemma_ +' POS[0]:'+ doc_for_tokens[0].pos_ + ' POS[1]:' + doc_for_tokens[1].pos_ + ' POS[2]:' + doc_for_tokens[2].pos_)
# print('--------------------')

if len(doc_for_tokens) > 3:

# print('multi-word term lemma ---> ' + chunk.lemma_)
# print('--------------------')

if doc_for_tokens[0].pos_ not in ['DET', 'PUNCT']:

# If multiple-word term already exists in multiple_word_terms_help_list
Expand Down Expand Up @@ -985,7 +957,6 @@ def get_terms_list():

return ET.tostring(root_termsintext_element, encoding='utf8', method='xml')
except:
# print "Unexpected error:", sys.exc_info()
return abort(500)
file.close()
return abort(400)
Expand Down
Loading

0 comments on commit 4b89975

Please sign in to comment.