Skip to content

Commit

Permalink
Merge pull request #1 from openeventdata/petrarch2
Browse files Browse the repository at this point in the history
Petrarch2
  • Loading branch information
johnb30 authored Jul 20, 2016
2 parents 9925767 + d7eef42 commit 9ca2bd3
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 41 deletions.
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
[![Circle CI](https://circleci.com/gh/caerusassociates/hypnos.svg?style=svg)](https://circleci.com/gh/caerusassociates/hypnos)
[![Code Health](https://landscape.io/github/openeventdata/hypnos/petrarch2/landscape.svg?style=flat)](https://landscape.io/github/openeventdata/hypnos/petrarch2)

hypnos
======

A RESTful API around the [PETRARCH](https://github.com/openeventdata/petrarch)
**Note: hypnos now works with PETRARCH2 by default.**

A RESTful API around the [PETRARCH2](https://github.com/openeventdata/petrarch2)
event data coder. Using `docker compose`, this setup also integrates the
Stanford [CoreNLP](http://nlp.stanford.edu/software/corenlp.shtml) parser
using Casey Hilland's [docker container](https://github.com/chilland/ccNLP).
Expand All @@ -16,8 +19,7 @@ Data Alliance.
Running
-------

Running the system is as simple as `cd`ing into the `hypnos` directory and
using
Running the system is as simple as `cd`ing into the `hypnos` directory and using

`docker-compose up`

Expand All @@ -29,13 +31,15 @@ to run in the background.

This assumes that you have `docker-compose` and `docker` installed.

Usage
Example Python Usage
-----

```
import requests
import json
headers = {'Content-Type': 'application/json'}
data = {'text': "At least 37 people are dead after Islamist radical group Boko
Haram assaulted a town in northeastern Nigeria.", 'id': 'abc123', 'date':
data = {'text': "At least 37 people are dead after Islamist radical group Boko Haram assaulted a town in northeastern Nigeria.", 'id': 'abc123', 'date':
'20010101'}
data = json.dumps(data)
r = requests.get('http://localhost:5002/hypnos/extract', data=data,
Expand Down
41 changes: 26 additions & 15 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,45 @@ def get(self):
storyid = args['id']
date = args['date']

headers = {'Content-Type': 'application/json'}
core_data = json.dumps({'text': text})
ccnlp = os.environ.get('CCNLP_PORT_5000_TCP_ADDR')
ccnlp_url = 'http://{}:5000/process'.format(ccnlp)
r = requests.post(ccnlp_url, data=core_data, headers=headers)
out = r.json()
out = send_to_ccnlp(text)

event_dict = process_corenlp(out, date, storyid)

events_data = json.dumps({'events': event_dict})
petr = os.environ.get('PETRARCH_PORT_5001_TCP_ADDR')
petr_url = 'http://{}:5001/petrarch/code'.format(petr)
events_r = requests.post(petr_url, data=events_data, headers=headers)
event_updated = process_results(events_r.json())
event_updated = send_to_petr(event_dict)

return event_updated


def send_to_ccnlp(text):
headers = {'Content-Type': 'application/json'}
core_data = json.dumps({'text': text})
ccnlp_url = 'http://ccnlp:5000/process'
r = requests.post(ccnlp_url, data=core_data, headers=headers)
out = r.json()

return out


def send_to_petr(event_dict):
headers = {'Content-Type': 'application/json'}
events_data = json.dumps({'events': event_dict})
petr_url = 'http://petrarch:5001/petrarch/code'
events_r = requests.post(petr_url, data=events_data, headers=headers)
event_updated = process_results(events_r.json())

return event_updated


def process_corenlp(output, date, STORYID):
event_dict = {STORYID: {}}
event_dict[STORYID]['sents'] = {}
event_dict[STORYID]['meta'] = {}
event_dict[STORYID]['meta']['date'] = date
for i, sent in enumerate(output['sentences']):
for i, _ in enumerate(output['sentences']):
sents = output['sentences']
event_dict[STORYID]['sents'][i] = {}
event_dict[STORYID]['sents'][i]['content'] = ' '.join(sents[i]['tokens'])
event_dict[STORYID]['sents'][i]['parsed'] = sents[i]['parse'].upper().replace(')', ' )')
event_dict[STORYID]['sents'][str(i)] = {}
event_dict[STORYID]['sents'][str(i)]['content'] = ' '.join(sents[i]['tokens'])
event_dict[STORYID]['sents'][str(i)]['parsed'] = sents[i]['parse'].upper().replace(')', ' )')

return event_dict

Expand Down
24 changes: 13 additions & 11 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
hypnos:
build: .
links:
- ccnlp
- petrarch
ports:
- "5002:5002"
ccnlp:
image: caerusassociates/ccnlp:1.0.0
petrarch:
build: petrarch/.
version: '2'
services:
hypnos:
build: .
links:
- ccnlp
- petrarch
ports:
- "5002:5002"
ccnlp:
image: caerusassociates/ccnlp:1.0.0
petrarch:
build: petrarch/.
6 changes: 3 additions & 3 deletions petrarch/Dockerfile
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
FROM ubuntu:14.04

MAINTAINER John Beieler <jbeieler@caerusassociates.com>
MAINTAINER John Beieler <johnb30@gmail.com>

RUN apt-get update && apt-get install -y git python-dev python-pip
RUN apt-get update && apt-get install -y git python-dev python-pip

RUN pip install https://github.com/openeventdata/petrarch/archive/0.3.0.zip
RUN pip install git+https://github.com/openeventdata/petrarch2.git

ADD . /src

Expand Down
16 changes: 10 additions & 6 deletions petrarch/petrarch_app.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from petrarch import petrarch
import sys
from petrarch2 import petrarch2
from tornado.ioloop import IOLoop
from tornado.wsgi import WSGIContainer
from tornado.httpserver import HTTPServer
Expand Down Expand Up @@ -33,20 +34,23 @@ def post(self):
args = self.reqparse.parse_args()
event_dict = args['events']

print(event_dict)
event_dict_updated = petrarch.do_coding(event_dict, None)
try:
event_dict_updated = petrarch2.do_coding(event_dict)
except Exception as e:
sys.stderr.write("An error occurred with PETR. {}\n".format(e))
event_dict_updated = event_dict

return event_dict_updated


api.add_resource(CodeAPI, '/petrarch/code')

if __name__ == '__main__':
config = petrarch.utilities._get_data('data/config/', 'PETR_config.ini')
config = petrarch2.utilities._get_data('data/config/', 'PETR_config.ini')
print("reading config")
petrarch.PETRreader.parse_Config(config)
petrarch2.PETRreader.parse_Config(config)
print("reading dicts")
petrarch.read_dictionaries()
petrarch2.read_dictionaries()

http_server = HTTPServer(WSGIContainer(app))
http_server.listen(5001)
Expand Down
Empty file modified petrarch/requirements.txt
100644 → 100755
Empty file.
33 changes: 33 additions & 0 deletions usage/parse_mongo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import json
import requests
from pymongo import MongoClient

connection = MongoClient()
db = connection.lexisnexis
collection = db["test"]

t = collection.find().limit(500)

output = []
junk = []

# figure out /process and /code

headers = {'Content-Type': 'application/json'}

for i in t:
data = {'text': i['article_body'], 'id': i['doc_id'], 'date': '20010101'}
data = json.dumps(data)
r = requests.get('http://localhost:5002/hypnos/extract', data=data,
headers=headers)
rj = r.json()
if 'status' in rj:
junk.append(rj)
else:
output.append(rj)


for o in output:
for key, s in o[o.keys()[0]]['sents'].iteritems():
if s['events']:
print s['events']

0 comments on commit 9ca2bd3

Please sign in to comment.