Skip to content

Commit

Permalink
[feature] Entities Wikidata id filter (#406)
Browse files Browse the repository at this point in the history
* added patch endpoint to collectable items

* small change to make it redeploy

* added wikidata id filter
  • Loading branch information
theorm authored Aug 16, 2024
1 parent ae3a047 commit 28b162c
Show file tree
Hide file tree
Showing 9 changed files with 327 additions and 231 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,4 @@ The 'impresso - Media Monitoring of the Past' project is funded by the Swiss Nat
Copyright (C) 2020 The _impresso_ team. Contributors to this program include: [Daniele Guido](https://github.com/danieleguido), [Roman Kalyakin](https://github.com/theorm), [Thijs van Beek](https://github.com/tvanbeek).
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. See the [GNU Affero General Public License](https://github.com/impresso/impresso-middle-layer/blob/master/LICENSE) for more details.

17 changes: 9 additions & 8 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
"graphology-pagerank": "^1.1.0",
"helmet": "^3.21.1",
"http-proxy-middleware": "^2.0.1",
"impresso-jscommons": "https://github.com/impresso/impresso-jscommons/tarball/v1.4.1",
"impresso-jscommons": "https://github.com/impresso/impresso-jscommons/tarball/v1.4.3",
"json2csv": "^4.3.3",
"jsonschema": "^1.4.1",
"lodash": "^4.17.21",
Expand Down Expand Up @@ -131,13 +131,13 @@
},
"devDependencies": {
"@openapi-contrib/json-schema-to-openapi-schema": "3.0.1",
"@stoplight/spectral-cli": "6.11.1",
"@stoplight/spectral-owasp-ruleset": "2.0.1",
"@types/cache-manager": "^2.10.3",
"@types/ioredis": "^4.28.5",
"@types/generic-pool": "^3.1.9",
"@types/ioredis": "^4.28.5",
"@types/mocha": "10.0.6",
"@types/node-fetch": "^2.5.6",
"@stoplight/spectral-cli": "6.11.1",
"@stoplight/spectral-owasp-ruleset": "2.0.1",
"eslint": "^8.18.0",
"eslint-config-standard": "^17.0.0",
"eslint-plugin-import": "^2.26.0",
Expand All @@ -147,8 +147,8 @@
"mocha": "10.4.0",
"nodemon": "3.1.0",
"prettier": "3.2.5",
"ts-node": "10.9.2",
"typescript": "5.4.3",
"typescript-cp": "0.1.9",
"ts-node": "10.9.2"
"typescript-cp": "0.1.9"
}
}
3 changes: 3 additions & 0 deletions src/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ export interface Configuration {
useDbUserInRequestContext?: boolean
problemUriBase?: string
features?: FeaturesConfiguration
paginate?: PaginationConfiguration

// TODO: move to services:
authentication: LocalAuthenticationConfiguration
/**
* Configuration for the auth strategy in Public API
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
import { CachedSolrClient } from '../../cachedSolr'
import { ImpressoApplication } from '../../types'
import { Service as SequelizeService } from '../sequelize.service'
import User from '../../models/users.model'
import { Params } from '@feathersjs/feathers'
import { Filter } from 'impresso-jscommons'
import { buildSequelizeWikidataIdFindEntitiesCondition, sortFindEntitiesFilters } from './util'

/* eslint-disable no-unused-vars */
const debug = require('debug')('impresso/services:entities')
const lodash = require('lodash')
Expand All @@ -7,35 +15,88 @@ const { NotFound } = require('@feathersjs/errors')
const wikidata = require('../wikidata')

const Entity = require('../../models/entities.model')
const SequelizeService = require('../sequelize.service')
const { measureTime } = require('../../util/instruments')
const { buildSearchEntitiesSolrQuery } = require('./logic')

interface Sanitized<T> {
sanitized: T
originalQuery: any
}

interface WithUser {
user?: User
}

interface FindQuery {
filters: Filter[]
limit?: number
offset?: number
order_by?: string
resolve?: string
}

class Service {
constructor({ app }) {
app: ImpressoApplication
name: string
sequelizeService: SequelizeService
solr: CachedSolrClient

constructor({ app }: { app: ImpressoApplication }) {
this.app = app
this.name = 'entities'
this.sequelizeService = new SequelizeService({
app,
app: app as any as null,
name: this.name,
})
/** @type {import('../../cachedSolr').CachedSolrClient} */
this.solr = app.service('cachedSolr')
}

async create(data, params) {
async create(data: any, params: any) {
params.query = data
return this.find(params)
}

async find(params) {
debug('[find] with params:', params.query)
async find(params: Params<FindQuery> & Sanitized<FindQuery> & WithUser) {
const qp = params.query!
debug('[find] with params:', qp)

// split filters into solr and sequelize filters
const { solrFilters, sequelizeFilters } = sortFindEntitiesFilters(qp.filters)
// build sequelize condition for wikidata IDs
const sequelizeWikidataFindEntitiesCondition = buildSequelizeWikidataIdFindEntitiesCondition(sequelizeFilters)

// if condition was built - run the query against the db
// and collect matched entity ids
let constraintIds: string[] | undefined = undefined
if (sequelizeWikidataFindEntitiesCondition != null) {
const records = await this.sequelizeService.find({
findAllOnly: true,
query: {
limit: 1000000,
offset: 0,
},
where: sequelizeWikidataFindEntitiesCondition,
})

constraintIds = records.data.map((d: any) => d.uid)
}

debug('[find] constraintIds:', constraintIds)

// if ids were collected - add them as a filter for solr
const uidFilter: Filter | undefined =
constraintIds != null
? {
type: 'uid',
q: constraintIds,
}
: undefined

const query = buildSearchEntitiesSolrQuery({
filters: params.query.filters,
orderBy: params.query.order_by,
limit: params.query.limit,
offset: params.query.offset,
filters: uidFilter != null ? [uidFilter, ...solrFilters] : solrFilters,
orderBy: qp.order_by,
limit: qp.limit,
offset: qp.offset,
})
debug('[find] solr query:', query)

Expand All @@ -52,8 +113,8 @@ class Service {
return {
total: 0,
data: [],
limit: params.query.limit,
offset: params.query.offset,
limit: qp.limit,
offset: qp.offset,
info: {
...params.originalQuery,
},
Expand All @@ -62,7 +123,7 @@ class Service {
// generate the sequelize clause.
const where = {
id: {
[Op.in]: entities.map(d => d.uid),
[Op.in]: entities.map((d: any) => d.uid),
},
}
// get sequelize results
Expand All @@ -83,9 +144,9 @@ class Service {
const sequelizeEntitiesIndex = lodash.keyBy(sequelizeResult.data, 'uid')
const result = {
total: solrResult.response.numFound,
limit: params.query.limit,
offset: params.query.offset,
data: entities.map(d => {
limit: qp.limit,
offset: qp.offset,
data: entities.map((d: any) => {
if (sequelizeEntitiesIndex[d.uid]) {
// enrich with wikidataID
d.wikidataId = sequelizeEntitiesIndex[d.uid].wikidataId
Expand All @@ -112,18 +173,18 @@ class Service {
const wkdIds = lodash(sequelizeEntitiesIndex).map('wikidataId').compact().value()

debug('[find] wikidata loading:', wkdIds.length)
const resolvedEntities = {}
const resolvedEntities: Record<string, any> = {}

return Promise.all(
wkdIds.map(wkdId =>
wkdIds.map((wkdId: string) =>
measureTime(
() =>
wikidata
.resolve({
ids: [wkdId],
cache: this.app.service('redisClient').client,
})
.then(resolved => {
.then((resolved: any) => {
resolvedEntities[wkdId] = resolved[wkdId]
}),
'entities.find.wikidata.get'
Expand All @@ -132,7 +193,7 @@ class Service {
)
.then(res => {
debug('[find] wikidata success!')
result.data = result.data.map(d => {
result.data = result.data.map((d: any) => {
if (d.wikidataId) {
d.wikidata = resolvedEntities[d.wikidataId]
}
Expand All @@ -146,7 +207,7 @@ class Service {
})
}

async get(id, params) {
async get(id: string, params: any) {
return this.find({
...params,
query: {
Expand All @@ -168,20 +229,20 @@ class Service {
})
}

async update(id, data, params) {
async update(id: string, data: any, params: any) {
return data
}

async patch(id, data, params) {
async patch(id: string, data: any, params: any) {
return data
}

async remove(id, params) {
async remove(id: string, params: any) {
return { id }
}
}

module.exports = function (options) {
module.exports = function (options: any) {
return new Service(options)
}

Expand Down
8 changes: 6 additions & 2 deletions src/services/entities/entities.hooks.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { SolrNamespaces } from '../../solr'

const { validate, validateEach, queryWithCommonParams, utils } = require('../../hooks/params')
const { qToSolrFilter, filtersToSolrQuery } = require('../../hooks/search')

Expand Down Expand Up @@ -49,7 +51,7 @@ export default {
defaultValue: 'OR',
},
type: {
choices: ['string', 'type', 'uid'],
choices: ['string', 'type', 'uid', 'wikidataId'],
required: true,
// trasform is required because they shoyd be related to entities namespace.
// transform: (d) => {
Expand All @@ -65,7 +67,9 @@ export default {
}
),
qToSolrFilter('string'),
filtersToSolrQuery(),
filtersToSolrQuery({
solrIndexProvider: () => SolrNamespaces.Entities,
}),
queryWithCommonParams(),
],
get: [],
Expand Down
30 changes: 30 additions & 0 deletions src/services/entities/util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { Filter } from 'impresso-jscommons'
import { Op } from 'sequelize'

interface FilterTuple {
solrFilters: Filter[]
sequelizeFilters: Filter[]
}

export const sortFindEntitiesFilters = (filters: Filter[]): FilterTuple => {
const solrFilters = filters.filter(f => f.type !== 'wikidataId')
const sequelizeFilters = filters.filter(f => f.type === 'wikidataId')
return { solrFilters, sequelizeFilters }
}

export const buildSequelizeWikidataIdFindEntitiesCondition = (filters: Filter[]): Record<string, any> | undefined => {
const supportedFilters = filters.filter(f => f.type === 'wikidataId' && f.q != null)

const items = supportedFilters.map(f => {
const operator = f.context === 'exclude' ? Op.notIn : Op.in
return {
[operator]: typeof f.q === 'string' ? [f.q] : f.q,
}
})

if (items.length === 0) {
return undefined
}

return { wikidataId: { [Op.and]: items } }
}
Loading

0 comments on commit 28b162c

Please sign in to comment.