Skip to content

Commit

Permalink
Merge pull request #5 from benradford/fixpetrarch2
Browse files Browse the repository at this point in the history
Fixpetrarch2
  • Loading branch information
ahalterman authored Jul 9, 2019
2 parents 9ca2bd3 + f0730eb commit f9822ab
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 28 deletions.
64 changes: 47 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,29 +36,59 @@ Example Python Usage

```
import requests
import json
import json
from pprint import pprint
headers = {'Content-Type': 'application/json'}
data = {'text': "At least 37 people are dead after Islamist radical group Boko Haram assaulted a town in northeastern Nigeria.", 'id': 'abc123', 'date':
'20010101'}
data = {'text':"A Tunisian court has jailed a Nigerian student for two years for helping young militants join an armed Islamic group in Lebanon, his lawyer said Wednesday.", 'id': 'abc123', 'date':'20010101'}
data = json.dumps(data)
r = requests.get('http://localhost:5002/hypnos/extract', data=data,
headers=headers)
r.json()
r = requests.get('http://localhost:5002/hypnos/extract', data=data, headers=headers)
pprint(r.json())
```

Returns:

```
{u'abc123': {u'meta': {u'date': u'20010101'},
u'sents': {u'0': {u'content': u'At least 37 people are dead after Islamist
radical group Boko Haram assaulted a town in northeastern Nigeria .',
u'events': [[u'NGAREBMUS', u'NGA', u'190']],
u'issues': [[u'ID_EXTREMISM', 1], [u'NAMED_TERROR_GROUP', 1]],
u'parsed': u'(ROOT (S (NP (QP (IN AT ) (JJS LEAST ) (CD 37 ) )
(NNS PEOPLE ) ) (VP (VBP ARE ) (ADJP (JJ DEAD ) ) (SBAR (IN AFTER
) (S (NP (JJ ISLAMIST ) (JJ RADICAL ) (NN GROUP ) (NNP BOKO )
(NNP HARAM ) ) (VP (VBD ASSAULTED ) (NP (NP (DT A ) (NN TOWN ) )
(PP (IN IN ) (NP (JJ NORTHEASTERN ) (NNP NIGERIA ) ) ) ) ) ) ) )
(. . ) ) )'}}}}
{'abc123': {'meta': {'date': '20010101', 'verbs': []},
'sents': {'0': {'content': 'A Tunisian court has jailed a Nigerian '
'student for two years for helping '
'young militants join an armed Islamic '
'group in Lebanon , his lawyer said '
'Wednesday .',
'events': [['TUNJUD', 'NGAEDU', '173']],
'issues': [['STUDENTS', 1],
['NAMED_TERROR_GROUP', 1]],
'meta': {'actorroot': [['', '']],
'actortext': [['Tunisian court',
'Nigerian student']],
'eventtext': ['has jailed'],
'nouns': [[[' TUNISIAN', ' COURT'],
['TUNJUD'],
[['TUN', []], ['~']]],
[[' NIGERIAN', ' STUDENT'],
['NGAEDU'],
[['NGA', []], ['~']]],
[[' ARMED ISLAMIC GROUP'],
['DZAREB'],
[['DZAREB', []]]],
[[' LEBANON'],
['LBN'],
[['LBN', []]]],
[[' LAWYER'],
['~JUD'],
[['~']]]]},
'parsed': '(SBAR (S (NP (DT A ) (JJ TUNISIAN ) '
'(NN COURT ) ) (VP (VBZ HAS ) (VP '
'(VBN JAILED ) (NP (DT A ) (JJ '
'NIGERIAN ) (NN STUDENT ) ) (PP (IN '
'FOR ) (NP (CD TWO ) (NNS YEARS ) ) '
') (PP (IN FOR ) (S (VP (VBG HELPING '
') (NP (JJ YOUNG ) (NNS MILITANTS ) '
') ) ) ) ) ) ) (S (VP (VBP JOIN '
') (NP (DT AN ) (JJ ARMED ) (JJ '
'ISLAMIC ) (NN GROUP ) ) (PP (IN IN '
') (NP (NNP LEBANON ) ) ) ) ) (, , '
') (S (NP (PRP$ HIS ) (NN LAWYER ) ) '
'(VP (VBD SAID ) (NP (NNP WEDNESDAY ) '
') ) ) (. . ) ) '}}}}
```
22 changes: 16 additions & 6 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@ def bad_request(error):
def not_found(error):
return make_response(jsonify({'error': 'Not found'}), 404)

def format_parsed_str(parsed_str):
if parsed_str.strip().startswith("(ROOT") and parsed_str.strip().endswith(")"):
parsed_str = parsed_str.strip()[5:-1].strip()
elif parsed_str.strip()[1:].strip().startswith("("):
parsed_str = parsed_str.strip()[1:-1]
parsed = parsed_str.split('\n')
parsed = [line.strip() + ' ' for line in [line1.strip() for line1 in
parsed if line1] if line]
parsed = [line.replace(')', ' ) ').upper() for line in parsed]
treestr = ''.join(parsed)
return treestr

class ExtractAPI(Resource):
def __init__(self):
Expand All @@ -39,9 +50,8 @@ def get(self):
date = args['date']

out = send_to_ccnlp(text)

event_dict = process_corenlp(out, date, storyid)

event_updated = send_to_petr(event_dict)

return event_updated
Expand All @@ -62,7 +72,7 @@ def send_to_petr(event_dict):
events_data = json.dumps({'events': event_dict})
petr_url = 'http://petrarch:5001/petrarch/code'
events_r = requests.post(petr_url, data=events_data, headers=headers)
event_updated = process_results(events_r.json())
event_updated = events_r.json()

return event_updated

Expand All @@ -73,10 +83,10 @@ def process_corenlp(output, date, STORYID):
event_dict[STORYID]['meta'] = {}
event_dict[STORYID]['meta']['date'] = date
for i, _ in enumerate(output['sentences']):
sents = output['sentences']
sent = output['sentences'][i]
event_dict[STORYID]['sents'][str(i)] = {}
event_dict[STORYID]['sents'][str(i)]['content'] = ' '.join(sents[i]['tokens'])
event_dict[STORYID]['sents'][str(i)]['parsed'] = sents[i]['parse'].upper().replace(')', ' )')
event_dict[STORYID]['sents'][str(i)]['content'] = ' '.join(sent['tokens'])
event_dict[STORYID]['sents'][str(i)]['parsed'] = format_parsed_str(sent['parse'])

return event_dict

Expand Down
30 changes: 25 additions & 5 deletions petrarch/petrarch_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

cwd = os.path.abspath(os.path.dirname(__file__))

config = petrarch2.utilities._get_data('data/config/','PETR_config.ini')
petrarch2.PETRreader.parse_Config(config)
petrarch2.read_dictionaries()
#print(config)
#print(getattr(petrarch2,"VerbDict"))

@app.errorhandler(400)
def bad_request(error):
Expand Down Expand Up @@ -39,18 +44,33 @@ def post(self):
except Exception as e:
sys.stderr.write("An error occurred with PETR. {}\n".format(e))
event_dict_updated = event_dict

for key in event_dict_updated:
event_dict_updated[key]['meta']['verbs']=[]
for sent in event_dict_updated[key]['sents']:
try:
temp_meta = event_dict_updated[key]['sents'][sent]['meta']
event_dict_updated[key]['sents'][sent]['meta']={'actortext':list(temp_meta['actortext'].values()),
'eventtext':list(temp_meta['eventtext'].values()),
'nouns':temp_meta['nouns'],
'actorroot':list(temp_meta['actorroot'].values())}
except:
event_dict_updated[key]['sents'][sent]['meta']={'actortext':[[]],
'eventtext':[[]],
'nouns':[],
'actorroot':[[]]}

return event_dict_updated


api.add_resource(CodeAPI, '/petrarch/code')

if __name__ == '__main__':
config = petrarch2.utilities._get_data('data/config/', 'PETR_config.ini')
print("reading config")
petrarch2.PETRreader.parse_Config(config)
print("reading dicts")
petrarch2.read_dictionaries()
#config = petrarch2.utilities._get_data('data/config/', 'PETR_config.ini')
#print("reading config")
#petrarch2.PETRreader.parse_Config(config)
#print("reading dicts")
#petrarch2.read_dictionaries()

http_server = HTTPServer(WSGIContainer(app))
http_server.listen(5001)
Expand Down

0 comments on commit f9822ab

Please sign in to comment.