Skip to content

Commit

Permalink
added impresso NER proxy service (#413)
Browse files Browse the repository at this point in the history
  • Loading branch information
theorm authored Aug 30, 2024
1 parent 98abb53 commit 056f54a
Show file tree
Hide file tree
Showing 9 changed files with 334 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ export interface Configuration {
celeryClient?: CeleryClient
media?: MediaConfiguration
solr: SolrConfiguration

impressoNerServiceUrl?: string
}

const configurationSchema: JSONSchemaDefinition = {
Expand Down
107 changes: 107 additions & 0 deletions src/schema/schemas/ImpressoNerEntity.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Impresso Named Entity Recognition Entity",
"description": "Impresso NER entity",
"type": "object",
"additionalProperties": false,
"properties": {
"id": {
"type": "string",
"description": "ID of the entity"
},
"type": {
"type": "string",
"description": "Type of the entity",
"enum": [
"pers",
"pers.ind",
"pers.coll",
"pers.ind.articleauthor",
"org",
"org.adm",
"org.ent",
"org.ent.pressagency",
"prod",
"prod.media",
"prod.doctr",
"time",
"time.date.abs",
"loc",
"loc.adm.town",
"loc.adm.reg",
"loc.adm.nat",
"loc.adm.sup",
"loc.phys.geo",
"loc.phys.hydro",
"loc.phys.astro",
"loc.oro",
"loc.fac",
"loc.add.phys",
"loc.add.elec",
"loc.unk"
]
},
"surfaceForm": {
"type": "string",
"description": "Surface form of the entity"
},
"offset": {
"type": "object",
"additionalProperties": false,
"properties": {
"start": {
"type": "integer",
"description": "Start offset of the entity in the text"
},
"end": {
"type": "integer",
"description": "End offset of the entity in the text"
}
},
"required": ["start", "end"]
},
"isTypeNested": {
"type": "boolean",
"description": "Whether the entity type is nested"
},
"confidence": {
"type": "object",
"additionalProperties": false,
"properties": {
"ner": {
"type": "number",
"description": "Confidence score for the named entity recognition"
},
"nel": {
"type": "number",
"description": "Confidence score for the named entity linking"
}
},
"required": ["ner"]
}
},
"wikidata": {
"type": "object",
"additionalProperties": false,
"properties": {
"id": {
"type": "string",
"description": "Wikidata ID of the entity"
},
"wikipediaPageName": {
"type": "string",
"description": "Wikipedia page name of the entity"
}
},
"required": ["id"]
},
"function": {
"type": "string",
"description": "Function of the entity"
},
"name": {
"type": "string",
"description": "Name of the entity"
},
"required": ["id", "type", "surfaceForm", "offset", "isTypeNested", "confidence"]
}
14 changes: 14 additions & 0 deletions src/schema/schemas/ImpressoNerRequest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Impresso Named Entity Recognition Request",
"description": "Request body for the Impresso NER endpoint",
"type": "object",
"additionalProperties": false,
"properties": {
"text": {
"type": "string",
"description": "Text to be processed for named entity recognition"
}
},
"required": ["text"]
}
29 changes: 29 additions & 0 deletions src/schema/schemas/ImpressoNerResponse.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Impresso Named Entity Recognition Response",
"description": "Response of the Impresso NER endpoint",
"type": "object",
"additionalProperties": false,
"properties": {
"modelId": {
"type": "string",
"description": "ID of the model used for the named entity recognition"
},
"text": {
"type": "string",
"description": "Text processed for named entity recognition"
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "Timestamp of when named entity recognition was performed"
},
"entities": {
"type": "array",
"items": {
"$ref": "ImpressoNerEntity.json"
}
}
},
"required": ["modelId", "text", "timestamp", "entities"]
}
134 changes: 134 additions & 0 deletions src/services/impresso-ner/impresso-ner.class.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import type { Params } from '@feathersjs/feathers'
import axios, { AxiosResponse } from 'axios'

export interface RequestPayload {
text: string
}

interface DownstreamRequestBody {
data: string
}

// See
// https://github.com/impresso/impresso-annotation/blob/740a31e2c925e4a4d59be97710e390871754674d/frontend/impresso_annotation/templates/landing_page.html#L157
type NerType =
| 'pers'
| 'pers.ind'
| 'pers.coll'
| 'pers.ind.articleauthor'
| 'org'
| 'org.adm'
| 'org.ent'
| 'org.ent.pressagency'
| 'prod'
| 'prod.media'
| 'prod.doctr'
| 'time'
| 'time.date.abs'
| 'loc'
| 'loc.adm.town'
| 'loc.adm.reg'
| 'loc.adm.nat'
| 'loc.adm.sup'
| 'loc.phys.geo'
| 'loc.phys.hydro'
| 'loc.phys.astro'
| 'loc.oro'
| 'loc.fac'
| 'loc.add.phys'
| 'loc.add.elec'
| 'loc.unk'

interface DownstreamNes {
confidence_nel?: number // named entity linking confidence score
confidence_ner: number // named entity recognition confidence score
id: string
lOffset: number // left offset
nested: boolean // is nested
rOffset: number // right offset
surface: string // surface form (text)
type: NerType

wkd_id?: string // Wikidata ID
wkpedia_pagename?: string // Wikipedia page name

function?: string // function
name?: string // entity name
}

interface DownstreamResponse {
sys_id: string // model id
text: string // input text
ts: string // ISO timestamp
nes: DownstreamNes[]
}

export interface ImpressoNerEntity {
id: string
type: NerType
surfaceForm: string
offset: { start: number; end: number }
isTypeNested: boolean
confidence: { ner: number; nel?: number }
wikidata?: {
id: string
wikipediaPageName?: string
}
function?: string
name?: string
}

export interface ImpressoNerResponse {
modelId: string
text: string
timestamp: string
entities: ImpressoNerEntity[]
}

export interface ImpressoNerServiceOptions {
impressoNerServiceUrl: string
}

export class ImpressoNerService {
url: string

constructor(options: ImpressoNerServiceOptions) {
this.url = options.impressoNerServiceUrl
}

async create(data: RequestPayload, params: Params) {
const { text } = data
const response = await axios.post<DownstreamResponse, AxiosResponse<DownstreamResponse>, DownstreamRequestBody>(
this.url,
{ data: text }
)
if (response.status !== 200) {
console.error(`Failed to fetch downstream data. Error (${response.status}): `, response.data)
throw new Error('Failed to fetch downstream data')
}
return convertDownstreamResponse(response.data)
}
}

const convertDownstreamResponse = (response: DownstreamResponse): ImpressoNerResponse => ({
modelId: response.sys_id,
text: response.text,
timestamp: response.ts,
entities: response.nes.map(convertDownstreamEntity),
})

const convertDownstreamEntity = (entity: DownstreamNes): ImpressoNerEntity => ({
id: entity.id,
type: entity.type,
surfaceForm: entity.surface,
offset: { start: entity.lOffset, end: entity.rOffset },
isTypeNested: entity.nested,
confidence: { ner: entity.confidence_ner, nel: entity.confidence_nel },
...(entity.wkd_id != null && entity.wkd_id != 'NIL'
? {
wikidata: { id: entity.wkd_id, wikipediaPageName: entity.wkpedia_pagename },
}
: {}),
...(entity.function != null ? { function: entity.function } : {}),
...(entity.name != null ? { name: entity.name } : {}),
})
8 changes: 8 additions & 0 deletions src/services/impresso-ner/impresso-ner.hooks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { authenticateAround as authenticate } from '../../hooks/authenticate'
import { rateLimit } from '../../hooks/rateLimiter'

export default {
around: {
all: [authenticate({ allowUnauthenticated: false }), rateLimit()],
},
}
21 changes: 21 additions & 0 deletions src/services/impresso-ner/impresso-ner.schema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import type { ServiceSwaggerOptions } from 'feathers-swagger'
import { getRequestBodyContent, getStandardResponses } from '../../util/openapi'

export const docs: ServiceSwaggerOptions = {
description: 'Impresso Named Entity Recognition',
securities: ['create'],
operations: {
create: {
operationId: 'performNer',
description: 'Perform Named Entity Recognition of a text',
requestBody: {
content: getRequestBodyContent('ImpressoNerRequest'),
},
responses: getStandardResponses({
method: 'create',
schema: 'ImpressoNerResponse',
standardPagination: false,
}),
},
},
}
18 changes: 18 additions & 0 deletions src/services/impresso-ner/impresso-ner.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { ServiceOptions } from '@feathersjs/feathers'
import { createSwaggerServiceOptions } from 'feathers-swagger'
import { ImpressoApplication } from '../../types'
import { ImpressoNerService } from './impresso-ner.class'
import hooks from './impresso-ner.hooks'
import { docs } from './impresso-ner.schema'

export default (app: ImpressoApplication) => {
const url = app.get('impressoNerServiceUrl') ?? 'https://impresso-annotation.epfl.ch/api/ner/'
const service = new ImpressoNerService({ impressoNerServiceUrl: url })

app.use('/tools/ner', service, {
events: [],
methods: ['create'],
docs: createSwaggerServiceOptions({ schemas: {}, docs }),
} as ServiceOptions)
app.service('/tools/ner').hooks(hooks)
}
1 change: 1 addition & 0 deletions src/services/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const publicApiServices = [
'newspapers',
'search-facets',
'entities',
'impresso-ner',
]

const internalApiServices = [
Expand Down

0 comments on commit 056f54a

Please sign in to comment.