Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixpetrarch2 #5

Merged
merged 2 commits into from
Jul 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 47 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,29 +36,59 @@ Example Python Usage

```
import requests
import json
import json
from pprint import pprint

headers = {'Content-Type': 'application/json'}
data = {'text': "At least 37 people are dead after Islamist radical group Boko Haram assaulted a town in northeastern Nigeria.", 'id': 'abc123', 'date':
'20010101'}
data = {'text':"A Tunisian court has jailed a Nigerian student for two years for helping young militants join an armed Islamic group in Lebanon, his lawyer said Wednesday.", 'id': 'abc123', 'date':'20010101'}
data = json.dumps(data)
r = requests.get('http://localhost:5002/hypnos/extract', data=data,
headers=headers)
r.json()
r = requests.get('http://localhost:5002/hypnos/extract', data=data, headers=headers)
pprint(r.json())
```

Returns:

```
{u'abc123': {u'meta': {u'date': u'20010101'},
u'sents': {u'0': {u'content': u'At least 37 people are dead after Islamist
radical group Boko Haram assaulted a town in northeastern Nigeria .',
u'events': [[u'NGAREBMUS', u'NGA', u'190']],
u'issues': [[u'ID_EXTREMISM', 1], [u'NAMED_TERROR_GROUP', 1]],
u'parsed': u'(ROOT (S (NP (QP (IN AT ) (JJS LEAST ) (CD 37 ) )
(NNS PEOPLE ) ) (VP (VBP ARE ) (ADJP (JJ DEAD ) ) (SBAR (IN AFTER
) (S (NP (JJ ISLAMIST ) (JJ RADICAL ) (NN GROUP ) (NNP BOKO )
(NNP HARAM ) ) (VP (VBD ASSAULTED ) (NP (NP (DT A ) (NN TOWN ) )
(PP (IN IN ) (NP (JJ NORTHEASTERN ) (NNP NIGERIA ) ) ) ) ) ) ) )
(. . ) ) )'}}}}
{'abc123': {'meta': {'date': '20010101', 'verbs': []},
'sents': {'0': {'content': 'A Tunisian court has jailed a Nigerian '
'student for two years for helping '
'young militants join an armed Islamic '
'group in Lebanon , his lawyer said '
'Wednesday .',
'events': [['TUNJUD', 'NGAEDU', '173']],
'issues': [['STUDENTS', 1],
['NAMED_TERROR_GROUP', 1]],
'meta': {'actorroot': [['', '']],
'actortext': [['Tunisian court',
'Nigerian student']],
'eventtext': ['has jailed'],
'nouns': [[[' TUNISIAN', ' COURT'],
['TUNJUD'],
[['TUN', []], ['~']]],
[[' NIGERIAN', ' STUDENT'],
['NGAEDU'],
[['NGA', []], ['~']]],
[[' ARMED ISLAMIC GROUP'],
['DZAREB'],
[['DZAREB', []]]],
[[' LEBANON'],
['LBN'],
[['LBN', []]]],
[[' LAWYER'],
['~JUD'],
[['~']]]]},
'parsed': '(SBAR (S (NP (DT A ) (JJ TUNISIAN ) '
'(NN COURT ) ) (VP (VBZ HAS ) (VP '
'(VBN JAILED ) (NP (DT A ) (JJ '
'NIGERIAN ) (NN STUDENT ) ) (PP (IN '
'FOR ) (NP (CD TWO ) (NNS YEARS ) ) '
') (PP (IN FOR ) (S (VP (VBG HELPING '
') (NP (JJ YOUNG ) (NNS MILITANTS ) '
') ) ) ) ) ) ) (S (VP (VBP JOIN '
') (NP (DT AN ) (JJ ARMED ) (JJ '
'ISLAMIC ) (NN GROUP ) ) (PP (IN IN '
') (NP (NNP LEBANON ) ) ) ) ) (, , '
') (S (NP (PRP$ HIS ) (NN LAWYER ) ) '
'(VP (VBD SAID ) (NP (NNP WEDNESDAY ) '
') ) ) (. . ) ) '}}}}
```
22 changes: 16 additions & 6 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ def bad_request(error):
def not_found(error):
return make_response(jsonify({'error': 'Not found'}), 404)

def format_parsed_str(parsed_str):
if parsed_str.strip().startswith("(ROOT") and parsed_str.strip().endswith(")"):
parsed_str = parsed_str.strip()[5:-1].strip()
elif parsed_str.strip()[1:].strip().startswith("("):
parsed_str = parsed_str.strip()[1:-1]
parsed = parsed_str.split('\n')
parsed = [line.strip() + ' ' for line in [line1.strip() for line1 in
parsed if line1] if line]
parsed = [line.replace(')', ' ) ').upper() for line in parsed]
treestr = ''.join(parsed)
return treestr

class ExtractAPI(Resource):
def __init__(self):
Expand All @@ -39,9 +50,8 @@ def get(self):
date = args['date']

out = send_to_ccnlp(text)

event_dict = process_corenlp(out, date, storyid)

event_updated = send_to_petr(event_dict)

return event_updated
Expand All @@ -62,7 +72,7 @@ def send_to_petr(event_dict):
events_data = json.dumps({'events': event_dict})
petr_url = 'http://petrarch:5001/petrarch/code'
events_r = requests.post(petr_url, data=events_data, headers=headers)
event_updated = process_results(events_r.json())
event_updated = events_r.json()

return event_updated

Expand All @@ -73,10 +83,10 @@ def process_corenlp(output, date, STORYID):
event_dict[STORYID]['meta'] = {}
event_dict[STORYID]['meta']['date'] = date
for i, _ in enumerate(output['sentences']):
sents = output['sentences']
sent = output['sentences'][i]
event_dict[STORYID]['sents'][str(i)] = {}
event_dict[STORYID]['sents'][str(i)]['content'] = ' '.join(sents[i]['tokens'])
event_dict[STORYID]['sents'][str(i)]['parsed'] = sents[i]['parse'].upper().replace(')', ' )')
event_dict[STORYID]['sents'][str(i)]['content'] = ' '.join(sent['tokens'])
event_dict[STORYID]['sents'][str(i)]['parsed'] = format_parsed_str(sent['parse'])

return event_dict

Expand Down
30 changes: 25 additions & 5 deletions petrarch/petrarch_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

cwd = os.path.abspath(os.path.dirname(__file__))

config = petrarch2.utilities._get_data('data/config/','PETR_config.ini')
petrarch2.PETRreader.parse_Config(config)
petrarch2.read_dictionaries()
#print(config)
#print(getattr(petrarch2,"VerbDict"))

@app.errorhandler(400)
def bad_request(error):
Expand Down Expand Up @@ -39,18 +44,33 @@ def post(self):
except Exception as e:
sys.stderr.write("An error occurred with PETR. {}\n".format(e))
event_dict_updated = event_dict

for key in event_dict_updated:
event_dict_updated[key]['meta']['verbs']=[]
for sent in event_dict_updated[key]['sents']:
try:
temp_meta = event_dict_updated[key]['sents'][sent]['meta']
event_dict_updated[key]['sents'][sent]['meta']={'actortext':list(temp_meta['actortext'].values()),
'eventtext':list(temp_meta['eventtext'].values()),
'nouns':temp_meta['nouns'],
'actorroot':list(temp_meta['actorroot'].values())}
except:
event_dict_updated[key]['sents'][sent]['meta']={'actortext':[[]],
'eventtext':[[]],
'nouns':[],
'actorroot':[[]]}

return event_dict_updated


api.add_resource(CodeAPI, '/petrarch/code')

if __name__ == '__main__':
config = petrarch2.utilities._get_data('data/config/', 'PETR_config.ini')
print("reading config")
petrarch2.PETRreader.parse_Config(config)
print("reading dicts")
petrarch2.read_dictionaries()
#config = petrarch2.utilities._get_data('data/config/', 'PETR_config.ini')
#print("reading config")
#petrarch2.PETRreader.parse_Config(config)
#print("reading dicts")
#petrarch2.read_dictionaries()

http_server = HTTPServer(WSGIContainer(app))
http_server.listen(5001)
Expand Down