diff --git a/README.md b/README.md index 16ed1fd..05b8499 100644 --- a/README.md +++ b/README.md @@ -36,29 +36,59 @@ Example Python Usage ``` import requests -import json +import json +from pprint import pprint headers = {'Content-Type': 'application/json'} -data = {'text': "At least 37 people are dead after Islamist radical group Boko Haram assaulted a town in northeastern Nigeria.", 'id': 'abc123', 'date': -'20010101'} +data = {'text':"A Tunisian court has jailed a Nigerian student for two years for helping young militants join an armed Islamic group in Lebanon, his lawyer said Wednesday.", 'id': 'abc123', 'date':'20010101'} data = json.dumps(data) -r = requests.get('http://localhost:5002/hypnos/extract', data=data, - headers=headers) -r.json() +r = requests.get('http://localhost:5002/hypnos/extract', data=data, headers=headers) +pprint(r.json()) ``` Returns: ``` -{u'abc123': {u'meta': {u'date': u'20010101'}, - u'sents': {u'0': {u'content': u'At least 37 people are dead after Islamist - radical group Boko Haram assaulted a town in northeastern Nigeria .', - u'events': [[u'NGAREBMUS', u'NGA', u'190']], - u'issues': [[u'ID_EXTREMISM', 1], [u'NAMED_TERROR_GROUP', 1]], - u'parsed': u'(ROOT (S (NP (QP (IN AT ) (JJS LEAST ) (CD 37 ) ) - (NNS PEOPLE ) ) (VP (VBP ARE ) (ADJP (JJ DEAD ) ) (SBAR (IN AFTER - ) (S (NP (JJ ISLAMIST ) (JJ RADICAL ) (NN GROUP ) (NNP BOKO ) - (NNP HARAM ) ) (VP (VBD ASSAULTED ) (NP (NP (DT A ) (NN TOWN ) ) - (PP (IN IN ) (NP (JJ NORTHEASTERN ) (NNP NIGERIA ) ) ) ) ) ) ) ) - (. . ) ) )'}}}} +{'abc123': {'meta': {'date': '20010101', 'verbs': []}, + 'sents': {'0': {'content': 'A Tunisian court has jailed a Nigerian ' + 'student for two years for helping ' + 'young militants join an armed Islamic ' + 'group in Lebanon , his lawyer said ' + 'Wednesday .', + 'events': [['TUNJUD', 'NGAEDU', '173']], + 'issues': [['STUDENTS', 1], + ['NAMED_TERROR_GROUP', 1]], + 'meta': {'actorroot': [['', '']], + 'actortext': [['Tunisian court', + 'Nigerian student']], + 'eventtext': ['has jailed'], + 'nouns': [[[' TUNISIAN', ' COURT'], + ['TUNJUD'], + [['TUN', []], ['~']]], + [[' NIGERIAN', ' STUDENT'], + ['NGAEDU'], + [['NGA', []], ['~']]], + [[' ARMED ISLAMIC GROUP'], + ['DZAREB'], + [['DZAREB', []]]], + [[' LEBANON'], + ['LBN'], + [['LBN', []]]], + [[' LAWYER'], + ['~JUD'], + [['~']]]]}, + 'parsed': '(SBAR (S (NP (DT A ) (JJ TUNISIAN ) ' + '(NN COURT ) ) (VP (VBZ HAS ) (VP ' + '(VBN JAILED ) (NP (DT A ) (JJ ' + 'NIGERIAN ) (NN STUDENT ) ) (PP (IN ' + 'FOR ) (NP (CD TWO ) (NNS YEARS ) ) ' + ') (PP (IN FOR ) (S (VP (VBG HELPING ' + ') (NP (JJ YOUNG ) (NNS MILITANTS ) ' + ') ) ) ) ) ) ) (S (VP (VBP JOIN ' + ') (NP (DT AN ) (JJ ARMED ) (JJ ' + 'ISLAMIC ) (NN GROUP ) ) (PP (IN IN ' + ') (NP (NNP LEBANON ) ) ) ) ) (, , ' + ') (S (NP (PRP$ HIS ) (NN LAWYER ) ) ' + '(VP (VBD SAID ) (NP (NNP WEDNESDAY ) ' + ') ) ) (. . ) ) '}}}} ``` diff --git a/app.py b/app.py index af8ec2d..7a232cc 100644 --- a/app.py +++ b/app.py @@ -22,6 +22,17 @@ def bad_request(error): def not_found(error): return make_response(jsonify({'error': 'Not found'}), 404) +def format_parsed_str(parsed_str): + if parsed_str.strip().startswith("(ROOT") and parsed_str.strip().endswith(")"): + parsed_str = parsed_str.strip()[5:-1].strip() + elif parsed_str.strip()[1:].strip().startswith("("): + parsed_str = parsed_str.strip()[1:-1] + parsed = parsed_str.split('\n') + parsed = [line.strip() + ' ' for line in [line1.strip() for line1 in + parsed if line1] if line] + parsed = [line.replace(')', ' ) ').upper() for line in parsed] + treestr = ''.join(parsed) + return treestr class ExtractAPI(Resource): def __init__(self): @@ -39,9 +50,8 @@ def get(self): date = args['date'] out = send_to_ccnlp(text) - + event_dict = process_corenlp(out, date, storyid) - event_updated = send_to_petr(event_dict) return event_updated @@ -62,7 +72,7 @@ def send_to_petr(event_dict): events_data = json.dumps({'events': event_dict}) petr_url = 'http://petrarch:5001/petrarch/code' events_r = requests.post(petr_url, data=events_data, headers=headers) - event_updated = process_results(events_r.json()) + event_updated = events_r.json() return event_updated @@ -73,10 +83,10 @@ def process_corenlp(output, date, STORYID): event_dict[STORYID]['meta'] = {} event_dict[STORYID]['meta']['date'] = date for i, _ in enumerate(output['sentences']): - sents = output['sentences'] + sent = output['sentences'][i] event_dict[STORYID]['sents'][str(i)] = {} - event_dict[STORYID]['sents'][str(i)]['content'] = ' '.join(sents[i]['tokens']) - event_dict[STORYID]['sents'][str(i)]['parsed'] = sents[i]['parse'].upper().replace(')', ' )') + event_dict[STORYID]['sents'][str(i)]['content'] = ' '.join(sent['tokens']) + event_dict[STORYID]['sents'][str(i)]['parsed'] = format_parsed_str(sent['parse']) return event_dict diff --git a/petrarch/petrarch_app.py b/petrarch/petrarch_app.py index dd9f162..e6feb1a 100755 --- a/petrarch/petrarch_app.py +++ b/petrarch/petrarch_app.py @@ -12,6 +12,11 @@ cwd = os.path.abspath(os.path.dirname(__file__)) +config = petrarch2.utilities._get_data('data/config/','PETR_config.ini') +petrarch2.PETRreader.parse_Config(config) +petrarch2.read_dictionaries() +#print(config) +#print(getattr(petrarch2,"VerbDict")) @app.errorhandler(400) def bad_request(error): @@ -39,6 +44,21 @@ def post(self): except Exception as e: sys.stderr.write("An error occurred with PETR. {}\n".format(e)) event_dict_updated = event_dict + + for key in event_dict_updated: + event_dict_updated[key]['meta']['verbs']=[] + for sent in event_dict_updated[key]['sents']: + try: + temp_meta = event_dict_updated[key]['sents'][sent]['meta'] + event_dict_updated[key]['sents'][sent]['meta']={'actortext':list(temp_meta['actortext'].values()), + 'eventtext':list(temp_meta['eventtext'].values()), + 'nouns':temp_meta['nouns'], + 'actorroot':list(temp_meta['actorroot'].values())} + except: + event_dict_updated[key]['sents'][sent]['meta']={'actortext':[[]], + 'eventtext':[[]], + 'nouns':[], + 'actorroot':[[]]} return event_dict_updated @@ -46,11 +66,11 @@ def post(self): api.add_resource(CodeAPI, '/petrarch/code') if __name__ == '__main__': - config = petrarch2.utilities._get_data('data/config/', 'PETR_config.ini') - print("reading config") - petrarch2.PETRreader.parse_Config(config) - print("reading dicts") - petrarch2.read_dictionaries() + #config = petrarch2.utilities._get_data('data/config/', 'PETR_config.ini') + #print("reading config") + #petrarch2.PETRreader.parse_Config(config) + #print("reading dicts") + #petrarch2.read_dictionaries() http_server = HTTPServer(WSGIContainer(app)) http_server.listen(5001)