-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added impresso NER proxy service (#413)
- Loading branch information
Showing
9 changed files
with
334 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-07/schema#", | ||
"title": "Impresso Named Entity Recognition Entity", | ||
"description": "Impresso NER entity", | ||
"type": "object", | ||
"additionalProperties": false, | ||
"properties": { | ||
"id": { | ||
"type": "string", | ||
"description": "ID of the entity" | ||
}, | ||
"type": { | ||
"type": "string", | ||
"description": "Type of the entity", | ||
"enum": [ | ||
"pers", | ||
"pers.ind", | ||
"pers.coll", | ||
"pers.ind.articleauthor", | ||
"org", | ||
"org.adm", | ||
"org.ent", | ||
"org.ent.pressagency", | ||
"prod", | ||
"prod.media", | ||
"prod.doctr", | ||
"time", | ||
"time.date.abs", | ||
"loc", | ||
"loc.adm.town", | ||
"loc.adm.reg", | ||
"loc.adm.nat", | ||
"loc.adm.sup", | ||
"loc.phys.geo", | ||
"loc.phys.hydro", | ||
"loc.phys.astro", | ||
"loc.oro", | ||
"loc.fac", | ||
"loc.add.phys", | ||
"loc.add.elec", | ||
"loc.unk" | ||
] | ||
}, | ||
"surfaceForm": { | ||
"type": "string", | ||
"description": "Surface form of the entity" | ||
}, | ||
"offset": { | ||
"type": "object", | ||
"additionalProperties": false, | ||
"properties": { | ||
"start": { | ||
"type": "integer", | ||
"description": "Start offset of the entity in the text" | ||
}, | ||
"end": { | ||
"type": "integer", | ||
"description": "End offset of the entity in the text" | ||
} | ||
}, | ||
"required": ["start", "end"] | ||
}, | ||
"isTypeNested": { | ||
"type": "boolean", | ||
"description": "Whether the entity type is nested" | ||
}, | ||
"confidence": { | ||
"type": "object", | ||
"additionalProperties": false, | ||
"properties": { | ||
"ner": { | ||
"type": "number", | ||
"description": "Confidence score for the named entity recognition" | ||
}, | ||
"nel": { | ||
"type": "number", | ||
"description": "Confidence score for the named entity linking" | ||
} | ||
}, | ||
"required": ["ner"] | ||
} | ||
}, | ||
"wikidata": { | ||
"type": "object", | ||
"additionalProperties": false, | ||
"properties": { | ||
"id": { | ||
"type": "string", | ||
"description": "Wikidata ID of the entity" | ||
}, | ||
"wikipediaPageName": { | ||
"type": "string", | ||
"description": "Wikipedia page name of the entity" | ||
} | ||
}, | ||
"required": ["id"] | ||
}, | ||
"function": { | ||
"type": "string", | ||
"description": "Function of the entity" | ||
}, | ||
"name": { | ||
"type": "string", | ||
"description": "Name of the entity" | ||
}, | ||
"required": ["id", "type", "surfaceForm", "offset", "isTypeNested", "confidence"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-07/schema#", | ||
"title": "Impresso Named Entity Recognition Request", | ||
"description": "Request body for the Impresso NER endpoint", | ||
"type": "object", | ||
"additionalProperties": false, | ||
"properties": { | ||
"text": { | ||
"type": "string", | ||
"description": "Text to be processed for named entity recognition" | ||
} | ||
}, | ||
"required": ["text"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-07/schema#", | ||
"title": "Impresso Named Entity Recognition Response", | ||
"description": "Response of the Impresso NER endpoint", | ||
"type": "object", | ||
"additionalProperties": false, | ||
"properties": { | ||
"modelId": { | ||
"type": "string", | ||
"description": "ID of the model used for the named entity recognition" | ||
}, | ||
"text": { | ||
"type": "string", | ||
"description": "Text processed for named entity recognition" | ||
}, | ||
"timestamp": { | ||
"type": "string", | ||
"format": "date-time", | ||
"description": "Timestamp of when named entity recognition was performed" | ||
}, | ||
"entities": { | ||
"type": "array", | ||
"items": { | ||
"$ref": "ImpressoNerEntity.json" | ||
} | ||
} | ||
}, | ||
"required": ["modelId", "text", "timestamp", "entities"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
import type { Params } from '@feathersjs/feathers' | ||
import axios, { AxiosResponse } from 'axios' | ||
|
||
export interface RequestPayload { | ||
text: string | ||
} | ||
|
||
interface DownstreamRequestBody { | ||
data: string | ||
} | ||
|
||
// See | ||
// https://github.com/impresso/impresso-annotation/blob/740a31e2c925e4a4d59be97710e390871754674d/frontend/impresso_annotation/templates/landing_page.html#L157 | ||
type NerType = | ||
| 'pers' | ||
| 'pers.ind' | ||
| 'pers.coll' | ||
| 'pers.ind.articleauthor' | ||
| 'org' | ||
| 'org.adm' | ||
| 'org.ent' | ||
| 'org.ent.pressagency' | ||
| 'prod' | ||
| 'prod.media' | ||
| 'prod.doctr' | ||
| 'time' | ||
| 'time.date.abs' | ||
| 'loc' | ||
| 'loc.adm.town' | ||
| 'loc.adm.reg' | ||
| 'loc.adm.nat' | ||
| 'loc.adm.sup' | ||
| 'loc.phys.geo' | ||
| 'loc.phys.hydro' | ||
| 'loc.phys.astro' | ||
| 'loc.oro' | ||
| 'loc.fac' | ||
| 'loc.add.phys' | ||
| 'loc.add.elec' | ||
| 'loc.unk' | ||
|
||
interface DownstreamNes { | ||
confidence_nel?: number // named entity linking confidence score | ||
confidence_ner: number // named entity recognition confidence score | ||
id: string | ||
lOffset: number // left offset | ||
nested: boolean // is nested | ||
rOffset: number // right offset | ||
surface: string // surface form (text) | ||
type: NerType | ||
|
||
wkd_id?: string // Wikidata ID | ||
wkpedia_pagename?: string // Wikipedia page name | ||
|
||
function?: string // function | ||
name?: string // entity name | ||
} | ||
|
||
interface DownstreamResponse { | ||
sys_id: string // model id | ||
text: string // input text | ||
ts: string // ISO timestamp | ||
nes: DownstreamNes[] | ||
} | ||
|
||
export interface ImpressoNerEntity { | ||
id: string | ||
type: NerType | ||
surfaceForm: string | ||
offset: { start: number; end: number } | ||
isTypeNested: boolean | ||
confidence: { ner: number; nel?: number } | ||
wikidata?: { | ||
id: string | ||
wikipediaPageName?: string | ||
} | ||
function?: string | ||
name?: string | ||
} | ||
|
||
export interface ImpressoNerResponse { | ||
modelId: string | ||
text: string | ||
timestamp: string | ||
entities: ImpressoNerEntity[] | ||
} | ||
|
||
export interface ImpressoNerServiceOptions { | ||
impressoNerServiceUrl: string | ||
} | ||
|
||
export class ImpressoNerService { | ||
url: string | ||
|
||
constructor(options: ImpressoNerServiceOptions) { | ||
this.url = options.impressoNerServiceUrl | ||
} | ||
|
||
async create(data: RequestPayload, params: Params) { | ||
const { text } = data | ||
const response = await axios.post<DownstreamResponse, AxiosResponse<DownstreamResponse>, DownstreamRequestBody>( | ||
this.url, | ||
{ data: text } | ||
) | ||
if (response.status !== 200) { | ||
console.error(`Failed to fetch downstream data. Error (${response.status}): `, response.data) | ||
throw new Error('Failed to fetch downstream data') | ||
} | ||
return convertDownstreamResponse(response.data) | ||
} | ||
} | ||
|
||
const convertDownstreamResponse = (response: DownstreamResponse): ImpressoNerResponse => ({ | ||
modelId: response.sys_id, | ||
text: response.text, | ||
timestamp: response.ts, | ||
entities: response.nes.map(convertDownstreamEntity), | ||
}) | ||
|
||
const convertDownstreamEntity = (entity: DownstreamNes): ImpressoNerEntity => ({ | ||
id: entity.id, | ||
type: entity.type, | ||
surfaceForm: entity.surface, | ||
offset: { start: entity.lOffset, end: entity.rOffset }, | ||
isTypeNested: entity.nested, | ||
confidence: { ner: entity.confidence_ner, nel: entity.confidence_nel }, | ||
...(entity.wkd_id != null && entity.wkd_id != 'NIL' | ||
? { | ||
wikidata: { id: entity.wkd_id, wikipediaPageName: entity.wkpedia_pagename }, | ||
} | ||
: {}), | ||
...(entity.function != null ? { function: entity.function } : {}), | ||
...(entity.name != null ? { name: entity.name } : {}), | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import { authenticateAround as authenticate } from '../../hooks/authenticate' | ||
import { rateLimit } from '../../hooks/rateLimiter' | ||
|
||
export default { | ||
around: { | ||
all: [authenticate({ allowUnauthenticated: false }), rateLimit()], | ||
}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import type { ServiceSwaggerOptions } from 'feathers-swagger' | ||
import { getRequestBodyContent, getStandardResponses } from '../../util/openapi' | ||
|
||
export const docs: ServiceSwaggerOptions = { | ||
description: 'Impresso Named Entity Recognition', | ||
securities: ['create'], | ||
operations: { | ||
create: { | ||
operationId: 'performNer', | ||
description: 'Perform Named Entity Recognition of a text', | ||
requestBody: { | ||
content: getRequestBodyContent('ImpressoNerRequest'), | ||
}, | ||
responses: getStandardResponses({ | ||
method: 'create', | ||
schema: 'ImpressoNerResponse', | ||
standardPagination: false, | ||
}), | ||
}, | ||
}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import { ServiceOptions } from '@feathersjs/feathers' | ||
import { createSwaggerServiceOptions } from 'feathers-swagger' | ||
import { ImpressoApplication } from '../../types' | ||
import { ImpressoNerService } from './impresso-ner.class' | ||
import hooks from './impresso-ner.hooks' | ||
import { docs } from './impresso-ner.schema' | ||
|
||
export default (app: ImpressoApplication) => { | ||
const url = app.get('impressoNerServiceUrl') ?? 'https://impresso-annotation.epfl.ch/api/ner/' | ||
const service = new ImpressoNerService({ impressoNerServiceUrl: url }) | ||
|
||
app.use('/tools/ner', service, { | ||
events: [], | ||
methods: ['create'], | ||
docs: createSwaggerServiceOptions({ schemas: {}, docs }), | ||
} as ServiceOptions) | ||
app.service('/tools/ner').hooks(hooks) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters