-
Notifications
You must be signed in to change notification settings - Fork 0
/
serve.py
62 lines (46 loc) · 1.78 KB
/
serve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from logging import getLogger
import os
import requests
from fastapi import FastAPI, Request, HTTPException
from pydantic import BaseModel
from typing import Optional
from chemdataextractor import Document
LOGGER = getLogger(__name__)
app = FastAPI(root_path=os.environ.get('ROOT_PATH'))
class TextRequest(BaseModel):
type: str
features: Optional[dict] = None
content: str
mimeType: str
@app.post("/")
async def processPOST(request: TextRequest):
text = request.content
# get the binary data from the request body
doc = Document(text)
cems = [annotation(cem) for cem in doc.cems]
sentences = []
tokens = []
records = {}
for record in doc.records.serialize():
for name in record["names"]:
records[name] = record
for cem in cems:
if cem["features"]["string"] in records:
cem["features"] = {**cem["features"], **records[cem["features"]["string"]]}
for element in doc.elements:
for sentence in element.sentences:
sentences.append(annotation(sentence))
posTags = sentence.pos_tagged_tokens
for i, token in enumerate(sentence.tokens):
annot = annotation(token)
annot["features"]["category"]=posTags[i][1]
annot["features"]["normalized"]=token.lex.normalized
annot["features"]["length"]=token.lex.length
tokens.append(annot)
return dict(response = { 'type':'annotations', 'annotations':{'ChemicalEntity':cems, 'Sentence': sentences, 'Token':tokens} })
def annotation(cem):
"""Converts a Chemical Entity Mention (CME) into an ELG-compliant annotation"""
return {
'start':cem.start, 'end':cem.end,
'features':{'string':cem.text}
}