From 8ffae6277fd4b081a12ab2b56bb73ac1ad80490a Mon Sep 17 00:00:00 2001 From: Roman Kalyakin Date: Thu, 2 May 2024 20:40:25 +0200 Subject: [PATCH] search-facets service refactored and schema added --- src/configuration.ts | 4 + src/hooks/resolvers.js | 98 --- src/hooks/resolvers.ts | 116 +++ src/hooks/search.js | 225 +++--- src/models/common.ts | 4 + src/models/generated/schemas.d.ts | 745 ++++++++++++++++++ src/schema/schemas/SearchFacet.json | 39 + src/schema/schemas/SearchFacetBucket.json | 32 + .../schemas/SearchFacetRangeBucket.json | 26 + src/schema/schemas/Topic.json | 18 + src/schema/schemas/Year.json | 22 + src/schema/schemas/YearWeights.json | 29 + src/services/index.ts | 5 +- .../search-facets/search-facets.class.js | 186 ----- .../search-facets/search-facets.class.ts | 233 ++++++ .../search-facets/search-facets.hooks.js | 125 --- .../search-facets/search-facets.hooks.ts | 105 +++ .../search-facets/search-facets.schema.ts | 156 ++++ .../search-facets/search-facets.service.js | 16 - .../search-facets/search-facets.service.ts | 31 + src/types.ts | 2 +- src/util/{instruments.js => instruments.ts} | 4 +- 22 files changed, 1677 insertions(+), 544 deletions(-) delete mode 100644 src/hooks/resolvers.js create mode 100644 src/hooks/resolvers.ts create mode 100644 src/models/common.ts create mode 100644 src/schema/schemas/SearchFacet.json create mode 100644 src/schema/schemas/SearchFacetBucket.json create mode 100644 src/schema/schemas/SearchFacetRangeBucket.json create mode 100644 src/schema/schemas/Topic.json create mode 100644 src/schema/schemas/Year.json create mode 100644 src/schema/schemas/YearWeights.json delete mode 100644 src/services/search-facets/search-facets.class.js create mode 100644 src/services/search-facets/search-facets.class.ts delete mode 100644 src/services/search-facets/search-facets.hooks.js create mode 100644 src/services/search-facets/search-facets.hooks.ts create mode 100644 src/services/search-facets/search-facets.schema.ts delete mode 100644 src/services/search-facets/search-facets.service.js create mode 100644 src/services/search-facets/search-facets.service.ts rename src/util/{instruments.js => instruments.ts} (83%) diff --git a/src/configuration.ts b/src/configuration.ts index 74ad204f..1edab70b 100644 --- a/src/configuration.ts +++ b/src/configuration.ts @@ -3,6 +3,7 @@ import type { FromSchema, JSONSchemaDefinition } from '@feathersjs/schema' import { Ajv, getValidator } from '@feathersjs/schema' import type { RedisClientOptions } from 'redis' import type { RateLimiterConfiguration } from './services/internal/rateLimiter/redis' +import { Sequelize } from 'sequelize' export type RedisConfiguration = RedisClientOptions & { enable?: boolean; host?: string } @@ -12,6 +13,9 @@ export interface Configuration { redis?: RedisConfiguration rateLimiter?: RateLimiterConfiguration & { enabled?: boolean } publicApiPrefix?: string + + // TODO: move to services: + sequelizeClient?: Sequelize } const configurationSchema: JSONSchemaDefinition = { diff --git a/src/hooks/resolvers.js b/src/hooks/resolvers.js deleted file mode 100644 index 4f419bd2..00000000 --- a/src/hooks/resolvers.js +++ /dev/null @@ -1,98 +0,0 @@ -const lodash = require('lodash') -const debug = require('debug')('impresso/hooks/resolvers') -const Collection = require('../models/collections.model') - -const resolveTextReuseClusters = () => async (context) => { - if (context.path !== 'search-facets' || context.method !== 'get') { - throw new Error( - 'resolveTextReuseClusters hook can only be used with search-facets service' - ) - } - const uids = context.result - .filter((d) => d.type === 'textReuseCluster') - .reduce((acc, d) => acc.concat(d.buckets.map((di) => di.val)), []) - - if (!uids.length) { - return - } - debug('resolveTextReuseClusters uids:', uids) - // get text reuse clusters as dictionary from text-reuse-clusters service - const index = await context.app - .service('text-reuse-passages') - .find({ - query: { - filters: [{ type: 'textReuseCluster', q: uids }], - groupby: 'textReuseClusterId', - limit: uids.length, - }, - }) - .then(({ data }) => { - debug('resolveTextReuseClusters data:', data.length) - return lodash.keyBy(data, 'textReuseCluster.id') - }) - .catch((err) => { - console.error('hook resolveTextReuseClusters ERROR') - console.error(err) - }) - debug('resolveTextReuseClusters index keys:', Object.keys(index)) - context.result = context.result.map((d) => { - if (d.type !== 'textReuseCluster') { - return d - } - d.buckets = d.buckets.map((b) => { - b.item = index[b.val] - return b - }) - return d - }) -} - -const resolveCollections = () => async (context) => { - let uids = [] - // collect the uids list based on the different service - if (context.path === 'search-facets') { - uids = context.result - .filter((d) => d.type === 'collection') - .reduce((acc, d) => acc.concat(d.buckets.map((di) => di.uid)), []) - } else { - throw new Error( - 'context path is not registered to be used with resolveCollections hook' - ) - } - - if (!uids.length) { - return - } - // get collections as dictionary - const index = await Collection.sequelize(context.app.get('sequelizeClient')) - .findAll({ - where: { - uid: uids, - }, - }) - .then((rows) => - lodash.keyBy( - rows.map((r) => r.toJSON()), - 'uid' - ) - ) - .catch((err) => { - console.error('hook resolveCollections ERROR') - console.error(err) - }) - - if (context.path === 'search-facets') { - context.result = context.result.map((d) => { - if (d.type !== 'collection') { - return d - } - d.buckets = d.buckets.map((b) => { - b.item = index[b.uid] - return b - }) - return d - }) - } -} - -module.exports = { resolveCollections, resolveTextReuseClusters } diff --git a/src/hooks/resolvers.ts b/src/hooks/resolvers.ts new file mode 100644 index 00000000..6aa8ab28 --- /dev/null +++ b/src/hooks/resolvers.ts @@ -0,0 +1,116 @@ +import lodash from 'lodash' +import Collection from '../models/collections.model' +import { HookContext } from '@feathersjs/feathers' +import { Service as SearchFacetService } from '../services/search-facets/search-facets.class' +import { ImpressoApplication } from '../types' +import { FindResponse } from '../models/common' +import { SearchFacet, SearchFacetBucket } from '../models/generated/schemas' +const debug = require('debug')('impresso/hooks/resolvers') + +const supportedMethods = ['get', 'find'] + +const isSearchFacetBucket = (bucket: any): bucket is SearchFacetBucket => { + return typeof bucket.val === 'string' +} + +const resultAsList = (result: FindResponse | SearchFacet | undefined): SearchFacet[] => { + if (result == null) return [] + + if ('data' in result && Array.isArray(result.data)) { + return result.data + } else { + return [result as SearchFacet] + } +} + +const assertCorrectServiceAndMethods = ( + hookName: string, + context: HookContext +) => { + if (!(context.service instanceof SearchFacetService)) + throw new Error(`${hookName} hook can only be used with ${SearchFacetService.name} service`) + + if (!supportedMethods.includes(context.method)) + throw new Error(`${hookName} hook can only be used with methods: ${supportedMethods}. Got: ${context.method}`) +} + +export const resolveTextReuseClusters = () => async (context: HookContext) => { + assertCorrectServiceAndMethods(resolveTextReuseClusters.name, context) + + const items = resultAsList(context.result) + + const uids = items + .filter(d => d.type === 'textReuseCluster') + .reduce((acc, d) => acc.concat(d.buckets.filter(isSearchFacetBucket).map(di => di.val)), [] as string[]) + + if (!uids.length) return + + debug('resolveTextReuseClusters uids:', uids) + // get text reuse clusters as dictionary from text-reuse-clusters service + const index = await context.app + .service('text-reuse-passages') + .find({ + query: { + filters: [{ type: 'textReuseCluster', q: uids }], + groupby: 'textReuseClusterId', + limit: uids.length, + }, + }) + .then(({ data }: { data: any }) => { + debug('resolveTextReuseClusters data:', data.length) + return lodash.keyBy(data, 'textReuseCluster.id') + }) + .catch((err: Error) => { + console.error('hook resolveTextReuseClusters ERROR') + console.error(err) + }) + debug('resolveTextReuseClusters index keys:', Object.keys(index)) + + items.forEach(d => { + if (d.type !== 'textReuseCluster') return + d.buckets.forEach(b => { + if (isSearchFacetBucket(b)) { + b.item = index[b.val] + } + }) + }) +} + +export const resolveCollections = () => async (context: HookContext) => { + assertCorrectServiceAndMethods(resolveTextReuseClusters.name, context) + + const items = resultAsList(context.result) + + const uids = items + .filter(d => d.type === 'collection') + .reduce((acc, d) => acc.concat(d.buckets.filter(isSearchFacetBucket).map(di => di.val)), [] as string[]) + + if (!uids.length) return + + // get collections as dictionary + const client = context.app.get('sequelizeClient') + + const index = await Collection.sequelize(client) + .findAll({ + where: { + uid: uids, + }, + }) + .then((rows: any[]) => + lodash.keyBy( + rows.map(r => r.toJSON()), + 'uid' + ) + ) + .catch((err: Error) => { + console.error('hook resolveCollections ERROR') + console.error(err) + }) + + items.forEach(d => { + if (d.type !== 'collection') return + d.buckets.filter(isSearchFacetBucket).forEach(b => { + b.item = index[b.val] + }) + }) +} diff --git a/src/hooks/search.js b/src/hooks/search.js index 5a7a0b3c..e1d1b9e8 100644 --- a/src/hooks/search.js +++ b/src/hooks/search.js @@ -1,37 +1,37 @@ -const debug = require('debug')('impresso/hooks:search'); -const lodash = require('lodash'); -const config = require('@feathersjs/configuration')()(); +const debug = require('debug')('impresso/hooks:search') +const lodash = require('lodash') +const config = require('@feathersjs/configuration')()() -const { - filtersToQueryAndVariables, -} = require('../util/solr'); -const { SolrNamespaces } = require('../solr'); +const { filtersToQueryAndVariables } = require('../util/solr') +const { SolrNamespaces } = require('../solr') /** * Transform q param in a nice string filter. * @param {String} type filter type, gets transkated to actual solr fields. * @return {null} [description] */ -const qToSolrFilter = (type = 'string') => (context) => { - if (context.type !== 'before') { - throw new Error('[qToSolrFilter] hook should only be used as a \'before\' hook.'); - } - if (typeof context.params.sanitized !== 'object') { - throw new Error('[qToSolrFilter] hook should be used after a \'validate\' hook.'); - } - if (!Array.isArray(context.params.sanitized.filters)) { - context.params.sanitized.filters = []; - } - if (context.params.sanitized.q) { - context.params.sanitized.filters.unshift({ - context: 'include', - type, - fuzzy: false, - standalone: false, - q: context.params.sanitized.q, - }); +// prettier-ignore +const qToSolrFilter = (type = 'string') => + context => { + if (context.type !== 'before') { + throw new Error("[qToSolrFilter] hook should only be used as a 'before' hook.") + } + if (typeof context.params.sanitized !== 'object') { + throw new Error("[qToSolrFilter] hook should be used after a 'validate' hook.") + } + if (!Array.isArray(context.params.sanitized.filters)) { + context.params.sanitized.filters = [] + } + if (context.params.sanitized.q) { + context.params.sanitized.filters.unshift({ + context: 'include', + type, + fuzzy: false, + standalone: false, + q: context.params.sanitized.q, + }) + } } -}; /** * filtersToSolrQuery transform string filters @@ -40,116 +40,115 @@ const qToSolrFilter = (type = 'string') => (context) => { * @param {function} solrIndexProvider - a function that takes context * and returns the Solr index filters should be validated against. */ -const filtersToSolrQuery = ({ - overrideOrderBy = true, - prop = 'params', - solrIndexProvider = () => SolrNamespaces.Search, -} = {}) => async (context) => { - const prefix = `[filtersToSolrQuery (${context.path}.${context.method})]`; - if (context.type !== 'before') { - throw new Error(`${prefix} hook should only be used as a 'before' hook.`); - } - if (typeof context[prop].sanitized !== 'object') { - context[prop].sanitized = {}; - } - if (!Array.isArray(context[prop].sanitized.filters)) { - context[prop].sanitized.filters = []; - } - if (!context[prop].sanitized.filters.length && !context[prop].sanitized.q) { - // nothing is give, wildcard then. - debug(`${prefix} with 'solr query': *:*`); - context[prop].sanitized.sq = '*:*'; - context[prop].sanitized.queryComponents = []; - return; - } +// prettier-ignore +const filtersToSolrQuery = + ({ + overrideOrderBy = true, + prop = 'params', + solrIndexProvider = () => SolrNamespaces.Search + } = {}) => async context => { + const prefix = `[filtersToSolrQuery (${context.path}.${context.method})]` + if (context.type !== 'before') { + throw new Error(`${prefix} hook should only be used as a 'before' hook.`) + } + if (typeof context[prop].sanitized !== 'object') { + context[prop].sanitized = {} + } + if (!Array.isArray(context[prop].sanitized.filters)) { + context[prop].sanitized.filters = [] + } + if (!context[prop].sanitized.filters.length && !context[prop].sanitized.q) { + // nothing is give, wildcard then. + debug(`${prefix} with 'solr query': *:*`) + context[prop].sanitized.sq = '*:*' + context[prop].sanitized.queryComponents = [] + return + } - const { query, variables: vars } = filtersToQueryAndVariables( - context[prop].sanitized.filters, - solrIndexProvider(context), - ); + const { query, variables: vars } = filtersToQueryAndVariables( + context[prop].sanitized.filters, + solrIndexProvider(context) + ) - // prepend order by if it is not relevance - if (overrideOrderBy && Object.keys(vars).length) { - // relevance direction - let direction = 'desc'; - if (context[prop].sanitized.order_by && context[prop].sanitized.order_by.indexOf('score asc') > -1) { - direction = 'asc'; - } - const varsOrderBy = Object.keys(vars).map(v => `\${${v}} ${direction}`); - // if order by is by relevance: - if (context[prop].sanitized.order_by && context[prop].sanitized.order_by.indexOf('score') === 0) { - context[prop].sanitized.order_by = varsOrderBy - .concat(context[prop].sanitized.order_by.split(',')) - .join(','); - } else if (context[prop].sanitized.order_by) { - context[prop].sanitized.order_by = context[prop].sanitized.order_by - .split(',') - .concat(varsOrderBy) - .join(','); - } else { - context[prop].sanitized.order_by = varsOrderBy.join(','); + // prepend order by if it is not relevance + if (overrideOrderBy && Object.keys(vars).length) { + // relevance direction + let direction = 'desc' + if (context[prop].sanitized.order_by && context[prop].sanitized.order_by.indexOf('score asc') > -1) { + direction = 'asc' + } + const varsOrderBy = Object.keys(vars).map(v => `\${${v}} ${direction}`) + // if order by is by relevance: + if (context[prop].sanitized.order_by && context[prop].sanitized.order_by.indexOf('score') === 0) { + context[prop].sanitized.order_by = varsOrderBy.concat(context[prop].sanitized.order_by.split(',')).join(',') + } else if (context[prop].sanitized.order_by) { + context[prop].sanitized.order_by = context[prop].sanitized.order_by.split(',').concat(varsOrderBy).join(',') + } else { + context[prop].sanitized.order_by = varsOrderBy.join(',') + } } - } - debug(`${prefix} query order_by:`, context[prop].sanitized.order_by); - debug(`${prefix} vars =`, vars, context[prop].sanitized); + debug(`${prefix} query order_by:`, context[prop].sanitized.order_by) + debug(`${prefix} vars =`, vars, context[prop].sanitized) - // context[prop].query.order_by.push() + // context[prop].query.order_by.push() - context[prop].sanitized.sq = query; - // context[prop].sanitized.sfq = filterQueries.join(' AND '); - context[prop].sanitized.sv = vars; - // NOTE: `queryComponents` should be deprecated - const filters = lodash.groupBy(context[prop].sanitized.filters, 'type'); - context[prop].sanitized.queryComponents = [].concat( - filters.isFront, - filters.years, - filters.newspaper, - filters.topic, - filters.person, - filters.location, - filters.collection, - filters.language, - filters.daterange, - filters.type, - filters.country, - filters.string, - filters.title, - filters.issue, - filters.page, - ).filter(d => typeof d !== 'undefined'); - debug(`${prefix} with 'solr query': ${context[prop].sanitized.sq}`); -}; + context[prop].sanitized.sq = query + // context[prop].sanitized.sfq = filterQueries.join(' AND '); + context[prop].sanitized.sv = vars + // NOTE: `queryComponents` should be deprecated + const filters = lodash.groupBy(context[prop].sanitized.filters, 'type') + context[prop].sanitized.queryComponents = [] + .concat( + filters.isFront, + filters.years, + filters.newspaper, + filters.topic, + filters.person, + filters.location, + filters.collection, + filters.language, + filters.daterange, + filters.type, + filters.country, + filters.string, + filters.title, + filters.issue, + filters.page + ) + .filter(d => typeof d !== 'undefined') + debug(`${prefix} with 'solr query': ${context[prop].sanitized.sq}`) + } /** * check if there are any params to be added to our beloved facets. should follow facets validation * @return {[type]} [description] */ -const filtersToSolrFacetQuery = () => async (context) => { +const filtersToSolrFacetQuery = () => async context => { if (!context.params.sanitized.facets) { - debug('[filtersToSolrFacetQuery] WARN no facets requested.'); - return; + debug('[filtersToSolrFacetQuery] WARN no facets requested.') + return } if (typeof context.params.sanitized !== 'object') { - throw new Error('[filtersToSolrFacetQuery] hook should be used after a \'validate\' hook.'); + throw new Error("[filtersToSolrFacetQuery] hook should be used after a 'validate' hook.") } - const facets = JSON.parse(context.params.sanitized.facets); - debug('[filtersToSolrFacetQuery] on facets:', facets); + const facets = JSON.parse(context.params.sanitized.facets) + debug('[filtersToSolrFacetQuery] on facets:', facets) if (!Array.isArray(context.params.sanitized.facetfilters)) { - context.params.sanitized.facetfilters = []; + context.params.sanitized.facetfilters = [] } // apply facets recursively based on facet name - Object.keys(facets).forEach((key) => { - const filter = context.params.sanitized.facetfilters.find(d => d.name === key); + Object.keys(facets).forEach(key => { + const filter = context.params.sanitized.facetfilters.find(d => d.name === key) if (filter) { - debug(`[filtersToSolrFacetQuery] on facet ${key}:`, filter); + debug(`[filtersToSolrFacetQuery] on facet ${key}:`, filter) } - }); -}; + }) +} module.exports = { queries: config.solr.queries, filtersToSolrQuery, qToSolrFilter, filtersToSolrFacetQuery, -}; +} diff --git a/src/models/common.ts b/src/models/common.ts new file mode 100644 index 00000000..cd2ecec8 --- /dev/null +++ b/src/models/common.ts @@ -0,0 +1,4 @@ +import { BaseFind } from './generated/schemas' +export interface FindResponse extends Omit { + data: T[] +} diff --git a/src/models/generated/schemas.d.ts b/src/models/generated/schemas.d.ts index 5c9a7fdf..6eaf6488 100644 --- a/src/models/generated/schemas.d.ts +++ b/src/models/generated/schemas.d.ts @@ -622,6 +622,646 @@ export interface Page { } +export type StatusOfTheCollection = string; +export type NumberOfItemsInTheCollection = number | string; +export type UniqueIdentifierForTheUser = string; +export type UniqueUsernameForTheUserForOtherHumans = string; + +/** + * An object containing search results for a facet + */ +export interface SearchFacet { + /** + * The type of facet + */ + type: string; + /** + * The number of buckets in the facet + */ + numBuckets: number; + buckets: SearchFacetBucket[] | SearchFacetRangeBucket[]; + /** + * TODO + */ + min?: { + [k: string]: unknown; + }; + /** + * TODO + */ + max?: { + [k: string]: unknown; + }; + /** + * TODO + */ + gap?: { + [k: string]: unknown; + }; +} +/** + * Facet bucket + */ +export interface SearchFacetBucket { + /** + * Number of items in the bucket + */ + count: number; + /** + * Value of the 'type' element + */ + val: string; + /** + * UID of the 'type' element. Same as 'val' + */ + uid?: string; + /** + * The item in the bucket. Particular objct schema depends on the facet type + */ + item?: Newspaper | Collection | Entity | Topic | Year; +} +/** + * A newspaper + */ +export interface Newspaper { + /** + * The unique identifier of the newspaper + */ + uid: string; + /** + * The acronym of the newspaper + */ + acronym: string; + /** + * The labels of the newspaper + */ + labels: string[]; + /** + * Language codes of the languages used in the newspaper + */ + languages: string[]; + /** + * TODO + */ + properties?: NewspaperProperty[]; + /** + * TODO + */ + included: boolean; + /** + * Title of the newspaper + */ + name: string; + /** + * Last available year of the newspaper articles + */ + endYear: string; + /** + * First available year of the newspaper articles + */ + startYear: string; + firstIssue: NewspaperIssue; + lastIssue: NewspaperIssue1; + /** + * The number of articles in the newspaper + */ + countArticles: number; + /** + * The number of issues in the newspaper + */ + countIssues: number; + /** + * The number of pages in the newspaper + */ + countPages: number; + /** + * TODO + */ + fetched?: boolean; + /** + * The number of years of the newspaper available + */ + deltaYear: number; +} +export interface NewspaperProperty { + /** + * The name of the property + */ + name: string; + /** + * The value of the property + */ + value: string; + /** + * The label of the property + */ + label: string; + /** + * Whether the value is a URL + */ + isUrl?: boolean; + [k: string]: unknown; +} +/** + * First available issue of the newspaper + */ +export interface NewspaperIssue { + /** + * The unique identifier of the issue + */ + uid: string; + /** + * TODO + */ + cover: string; + /** + * The labels of the issue + */ + labels: string[]; + /** + * TODO + */ + fresh: boolean; + /** + * TODO: list available options + */ + accessRights: string; + /** + * The date of the issue + */ + date: string; + /** + * The year of the issue + */ + year: string; +} +/** + * Last available issue of the newspaper + */ +export interface NewspaperIssue1 { + /** + * The unique identifier of the issue + */ + uid: string; + /** + * TODO + */ + cover: string; + /** + * The labels of the issue + */ + labels: string[]; + /** + * TODO + */ + fresh: boolean; + /** + * TODO: list available options + */ + accessRights: string; + /** + * The date of the issue + */ + date: string; + /** + * The year of the issue + */ + year: string; +} +/** + * Description of the collection object (Collection class) + */ +export interface Collection { + uid: string; + name: string; + description: string; + status: StatusOfTheCollection; + creationDate: string; + lastModifiedDate: string; + countItems: NumberOfItemsInTheCollection; + creator: BaseUser; + labels?: string[]; +} +export interface BaseUser { + uid: UniqueIdentifierForTheUser; + username: UniqueUsernameForTheUserForOtherHumans; + [k: string]: unknown; +} +/** + * An entity like location, person, etc + */ +export interface Entity { + /** + * Unique identifier of the entity + */ + uid: string; + /** + * Relevance of the entity in the document + */ + relevance: number; +} +/** + * A topic (TODO) + */ +export interface Topic { + /** + * The unique identifier of the topic + */ + uid: string; + /** + * The language code of the topic + */ + language: string; +} +/** + * A year (TODO) + */ +export interface Year { + /** + * Numeric representation of the year + */ + uid?: number; + values?: YearWeights; + refs?: YearWeights1; +} +/** + * Weights values (TODO) + */ +export interface YearWeights { + /** + * Number of content items + */ + c?: number; + /** + * Number of articles + */ + a?: number; + /** + * Number of pages + */ + p?: number; + /** + * Number of issues + */ + i?: number; + /** + * Number of images (with or without vectors) + */ + m?: number; +} +/** + * Weights references (TODO) + */ +export interface YearWeights1 { + /** + * Number of content items + */ + c?: number; + /** + * Number of articles + */ + a?: number; + /** + * Number of pages + */ + p?: number; + /** + * Number of issues + */ + i?: number; + /** + * Number of images (with or without vectors) + */ + m?: number; +} +/** + * Facet bucket + */ +export interface SearchFacetRangeBucket { + /** + * Number of items in the bucket + */ + count: number; + /** + * Value of the 'type' element + */ + val: number; + /** + * Lower bound of the range + */ + lower?: number; + /** + * Lower bound of the range + */ + upper?: number; +} + + +export type StatusOfTheCollection = string; +export type NumberOfItemsInTheCollection = number | string; +export type UniqueIdentifierForTheUser = string; +export type UniqueUsernameForTheUserForOtherHumans = string; + +/** + * Facet bucket + */ +export interface SearchFacetBucket { + /** + * Number of items in the bucket + */ + count: number; + /** + * Value of the 'type' element + */ + val: string; + /** + * UID of the 'type' element. Same as 'val' + */ + uid?: string; + /** + * The item in the bucket. Particular objct schema depends on the facet type + */ + item?: Newspaper | Collection | Entity | Topic | Year; +} +/** + * A newspaper + */ +export interface Newspaper { + /** + * The unique identifier of the newspaper + */ + uid: string; + /** + * The acronym of the newspaper + */ + acronym: string; + /** + * The labels of the newspaper + */ + labels: string[]; + /** + * Language codes of the languages used in the newspaper + */ + languages: string[]; + /** + * TODO + */ + properties?: NewspaperProperty[]; + /** + * TODO + */ + included: boolean; + /** + * Title of the newspaper + */ + name: string; + /** + * Last available year of the newspaper articles + */ + endYear: string; + /** + * First available year of the newspaper articles + */ + startYear: string; + firstIssue: NewspaperIssue; + lastIssue: NewspaperIssue1; + /** + * The number of articles in the newspaper + */ + countArticles: number; + /** + * The number of issues in the newspaper + */ + countIssues: number; + /** + * The number of pages in the newspaper + */ + countPages: number; + /** + * TODO + */ + fetched?: boolean; + /** + * The number of years of the newspaper available + */ + deltaYear: number; +} +export interface NewspaperProperty { + /** + * The name of the property + */ + name: string; + /** + * The value of the property + */ + value: string; + /** + * The label of the property + */ + label: string; + /** + * Whether the value is a URL + */ + isUrl?: boolean; + [k: string]: unknown; +} +/** + * First available issue of the newspaper + */ +export interface NewspaperIssue { + /** + * The unique identifier of the issue + */ + uid: string; + /** + * TODO + */ + cover: string; + /** + * The labels of the issue + */ + labels: string[]; + /** + * TODO + */ + fresh: boolean; + /** + * TODO: list available options + */ + accessRights: string; + /** + * The date of the issue + */ + date: string; + /** + * The year of the issue + */ + year: string; +} +/** + * Last available issue of the newspaper + */ +export interface NewspaperIssue1 { + /** + * The unique identifier of the issue + */ + uid: string; + /** + * TODO + */ + cover: string; + /** + * The labels of the issue + */ + labels: string[]; + /** + * TODO + */ + fresh: boolean; + /** + * TODO: list available options + */ + accessRights: string; + /** + * The date of the issue + */ + date: string; + /** + * The year of the issue + */ + year: string; +} +/** + * Description of the collection object (Collection class) + */ +export interface Collection { + uid: string; + name: string; + description: string; + status: StatusOfTheCollection; + creationDate: string; + lastModifiedDate: string; + countItems: NumberOfItemsInTheCollection; + creator: BaseUser; + labels?: string[]; +} +export interface BaseUser { + uid: UniqueIdentifierForTheUser; + username: UniqueUsernameForTheUserForOtherHumans; + [k: string]: unknown; +} +/** + * An entity like location, person, etc + */ +export interface Entity { + /** + * Unique identifier of the entity + */ + uid: string; + /** + * Relevance of the entity in the document + */ + relevance: number; +} +/** + * A topic (TODO) + */ +export interface Topic { + /** + * The unique identifier of the topic + */ + uid: string; + /** + * The language code of the topic + */ + language: string; +} +/** + * A year (TODO) + */ +export interface Year { + /** + * Numeric representation of the year + */ + uid?: number; + values?: YearWeights; + refs?: YearWeights1; +} +/** + * Weights values (TODO) + */ +export interface YearWeights { + /** + * Number of content items + */ + c?: number; + /** + * Number of articles + */ + a?: number; + /** + * Number of pages + */ + p?: number; + /** + * Number of issues + */ + i?: number; + /** + * Number of images (with or without vectors) + */ + m?: number; +} +/** + * Weights references (TODO) + */ +export interface YearWeights1 { + /** + * Number of content items + */ + c?: number; + /** + * Number of articles + */ + a?: number; + /** + * Number of pages + */ + p?: number; + /** + * Number of issues + */ + i?: number; + /** + * Number of images (with or without vectors) + */ + m?: number; +} + + +/** + * Facet bucket + */ +export interface SearchFacetRangeBucket { + /** + * Number of items in the bucket + */ + count: number; + /** + * Value of the 'type' element + */ + val: number; + /** + * Lower bound of the range + */ + lower?: number; + /** + * Lower bound of the range + */ + upper?: number; +} + + /** * ID of the text reuse passage */ @@ -857,6 +1497,21 @@ export interface ClusterDetails { } +/** + * A topic (TODO) + */ +export interface Topic { + /** + * The unique identifier of the topic + */ + uid: string; + /** + * The language code of the topic + */ + language: string; +} + + /** * User details */ @@ -910,3 +1565,93 @@ export interface APIVersion { }; }; } + + +/** + * A year (TODO) + */ +export interface Year { + /** + * Numeric representation of the year + */ + uid?: number; + values?: YearWeights; + refs?: YearWeights1; +} +/** + * Weights values (TODO) + */ +export interface YearWeights { + /** + * Number of content items + */ + c?: number; + /** + * Number of articles + */ + a?: number; + /** + * Number of pages + */ + p?: number; + /** + * Number of issues + */ + i?: number; + /** + * Number of images (with or without vectors) + */ + m?: number; +} +/** + * Weights references (TODO) + */ +export interface YearWeights1 { + /** + * Number of content items + */ + c?: number; + /** + * Number of articles + */ + a?: number; + /** + * Number of pages + */ + p?: number; + /** + * Number of issues + */ + i?: number; + /** + * Number of images (with or without vectors) + */ + m?: number; +} + + +/** + * Total items counts within a year + */ +export interface YearWeights { + /** + * Number of content items + */ + c?: number; + /** + * Number of articles + */ + a?: number; + /** + * Number of pages + */ + p?: number; + /** + * Number of issues + */ + i?: number; + /** + * Number of images (with or without vectors) + */ + m?: number; +} diff --git a/src/schema/schemas/SearchFacet.json b/src/schema/schemas/SearchFacet.json new file mode 100644 index 00000000..a1b790cd --- /dev/null +++ b/src/schema/schemas/SearchFacet.json @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "Search Facet", + "description": "An object containing search results for a facet", + "additionalProperties": false, + "required": ["type", "numBuckets", "buckets"], + "properties": { + "type": { + "type": "string", + "description": "The type of facet" + }, + "numBuckets": { + "type": "integer", + "description": "The number of buckets in the facet" + }, + "buckets": { + "oneOf": [ + { + "type": "array", + "items": { "$ref": "./SearchFacetBucket.json" } + }, + { + "type": "array", + "items": { "$ref": "./SearchFacetRangeBucket.json" } + } + ] + }, + "min": { + "description": "TODO" + }, + "max": { + "description": "TODO" + }, + "gap": { + "description": "TODO" + } + } +} diff --git a/src/schema/schemas/SearchFacetBucket.json b/src/schema/schemas/SearchFacetBucket.json new file mode 100644 index 00000000..1d2947b4 --- /dev/null +++ b/src/schema/schemas/SearchFacetBucket.json @@ -0,0 +1,32 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "Search Facet Bucket", + "description": "Facet bucket", + "additionalProperties": false, + "required": ["count", "val"], + "properties": { + "count": { + "type": "integer", + "description": "Number of items in the bucket" + }, + "val": { + "type": "string", + "description": "Value of the 'type' element" + }, + "uid": { + "type": "string", + "description": "UID of the 'type' element. Same as 'val'" + }, + "item": { + "description": "The item in the bucket. Particular objct schema depends on the facet type", + "oneOf": [ + { "$ref": "./Newspaper.json" }, + { "$ref": "./Collection.json" }, + { "$ref": "./Entity.json" }, + { "$ref": "./Topic.json" }, + { "$ref": "./Year.json" } + ] + } + } +} diff --git a/src/schema/schemas/SearchFacetRangeBucket.json b/src/schema/schemas/SearchFacetRangeBucket.json new file mode 100644 index 00000000..b08a8eb0 --- /dev/null +++ b/src/schema/schemas/SearchFacetRangeBucket.json @@ -0,0 +1,26 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "Search Facet Range Bucket", + "description": "Facet bucket", + "additionalProperties": false, + "required": ["count", "val"], + "properties": { + "count": { + "type": "integer", + "description": "Number of items in the bucket" + }, + "val": { + "type": "integer", + "description": "Value of the 'type' element" + }, + "lower": { + "type": "integer", + "description": "Lower bound of the range" + }, + "upper": { + "type": "integer", + "description": "Lower bound of the range" + } + } +} diff --git a/src/schema/schemas/Topic.json b/src/schema/schemas/Topic.json new file mode 100644 index 00000000..6bebd179 --- /dev/null +++ b/src/schema/schemas/Topic.json @@ -0,0 +1,18 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "Topic", + "description": "A topic (TODO)", + "additionalProperties": false, + "required": ["uid", "language"], + "properties": { + "uid": { + "type": "string", + "description": "The unique identifier of the topic" + }, + "language": { + "type": "string", + "description": "The language code of the topic" + } + } +} diff --git a/src/schema/schemas/Year.json b/src/schema/schemas/Year.json new file mode 100644 index 00000000..d5ac0745 --- /dev/null +++ b/src/schema/schemas/Year.json @@ -0,0 +1,22 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "Year", + "description": "A year (TODO)", + "additionalProperties": false, + "required": ["y"], + "properties": { + "uid": { + "type": "integer", + "description": "Numeric representation of the year" + }, + "values": { + "description": "Weights values (TODO)", + "$ref": "./YearWeights.json" + }, + "refs": { + "description": "Weights references (TODO)", + "$ref": "./YearWeights.json" + } + } +} diff --git a/src/schema/schemas/YearWeights.json b/src/schema/schemas/YearWeights.json new file mode 100644 index 00000000..b1406c7c --- /dev/null +++ b/src/schema/schemas/YearWeights.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "title": "Year Weights", + "description": "Total items counts within a year", + "additionalProperties": false, + "properties": { + "c": { + "type": "number", + "description": "Number of content items" + }, + "a": { + "type": "number", + "description": "Number of articles" + }, + "p": { + "type": "number", + "description": "Number of pages" + }, + "i": { + "type": "number", + "description": "Number of issues" + }, + "m": { + "type": "number", + "description": "Number of images (with or without vectors)" + } + } +} diff --git a/src/services/index.ts b/src/services/index.ts index df2c6507..a15a5929 100644 --- a/src/services/index.ts +++ b/src/services/index.ts @@ -16,6 +16,7 @@ const publicApiServices = [ 'text-reuse-clusters', 'version', 'newspapers', + 'search-facets', ] const internalApiServices = [ @@ -41,7 +42,6 @@ const internalApiServices = [ 'mentions', 'filepond', 'embeddings', - 'search-facets', 'table-of-contents', 'search-queries-comparison', 'me', @@ -70,6 +70,7 @@ export default (app: ImpressoApplication) => { services.forEach((service: string) => { const path = `./${service}/${service}.service` const module = require(path) - app.configure(module) + if (typeof module === 'function') app.configure(module) + else app.configure(module.default) }) } diff --git a/src/services/search-facets/search-facets.class.js b/src/services/search-facets/search-facets.class.js deleted file mode 100644 index 80f201e7..00000000 --- a/src/services/search-facets/search-facets.class.js +++ /dev/null @@ -1,186 +0,0 @@ -const lodash = require('lodash') -const { NotFound, NotImplemented, BadRequest } = require('@feathersjs/errors') -const debug = require('debug')('impresso/services:search-facets') -const SearchFacet = require('../../models/search-facets.model') -const { SolrMappings } = require('../../data/constants') -const { measureTime } = require('../../util/instruments') -const { areCacheableFacets, isCacheableQuery } = require('../../util/cache') - -const getFacetTypes = (typeString, index) => { - const validTypes = Object.keys(SolrMappings[index].facets) - const types = typeString.split(',') - - types.forEach(type => { - if (!validTypes.includes(type)) { - throw new BadRequest(`Unknown facet type in index ${index}: ${type}`) - } - }) - - if (!types.length) { - throw new NotFound() - } else if (types.length > 2) { - // limit number of facets per requests. - throw new NotImplemented() - } - return types -} - -const getRangeFacetMetadata = facet => { - if (facet.type !== 'range') return {} - return { - min: facet.start, - max: facet.end, - gap: facet.gap, - } -} - -class Service { - constructor({ app, name }) { - this.app = app - this.name = name - - /** @type {import('../../cachedSolr').CachedSolrClient} */ - this.solr = app.service('cachedSolr') - } - - async get(type, params) { - const { index } = params.query - const types = getFacetTypes(type, index) - - // init with limit and skip - const facetsq = { - offset: params.query.skip, - limit: params.query.limit, - sort: params.query.order_by, - } - if (params.sanitized.rangeStart) { - facetsq.start = params.sanitized.rangeStart - } - if (params.sanitized.rangeEnd) { - facetsq.end = params.sanitized.rangeEnd - } - if (params.sanitized.rangeGap) { - facetsq.gap = params.sanitized.rangeGap - } - if (params.sanitized.rangeInclude) { - facetsq.include = params.sanitized.rangeInclude - } - - const canBeCached = areCacheableFacets(types) && isCacheableQuery(params.sanitized.filters) - - // facets is an Object, will be stringified for the solr query. - // eslint-disable-next-line max-len - // '{"newspaper":{"type":"terms","field":"meta_journal_s","mincount":1,"limit":20,"numBuckets":true}}' - const facets = lodash(types) - .map(d => { - const facet = { - k: d, - ...SolrMappings[index].facets[d], - ...facetsq, - other: 'all', - } - if (type === 'collection') { - facet.prefix = params.authenticated ? params.user.uid : '-' - } - return facet - }) - .keyBy('k') - .mapValues(v => lodash.omit(v, 'k')) - .value() - - debug( - `[get] "${type}" (${canBeCached ? 'cached' : 'not cached'}):`, - `index: ${index}`, - 'facets:', - facets, - 'groupby', - params.sanitized.groupby || 'none' - ) - const query = { - q: params.sanitized.sq, - 'json.facet': JSON.stringify(facets), - start: 0, - rows: 0, - hl: false, - vars: params.sanitized.sv, - } - - if (params.sanitized.groupby) { - query.fq = `{!collapse field=${params.sanitized.groupby}}` - } - const result = await measureTime( - () => this.solr.get(query, index, { skipCache: true }), //! canBeCached }), - 'search-facets.get.solr.facets' - ) - return types.map(t => { - const rangeFacetMetadata = getRangeFacetMetadata(SolrMappings[index].facets[t]) - // check that facetsq params are all defined - if (!isNaN(facetsq.start)) { - rangeFacetMetadata.min = facetsq.start - } - if (!isNaN(facetsq.end)) { - rangeFacetMetadata.max = facetsq.end - } - if (!isNaN(facetsq.gap)) { - rangeFacetMetadata.gap = facetsq.gap - } - return new SearchFacet({ - type: t, - // default min max and gap values from default solr config - ...result.facets[t], - ...rangeFacetMetadata, - numBuckets: result.facets[t] ? result.facets[t].numBuckets || result.facets[t].buckets.length : 0, - }) - }) - } - - async find(params) { - debug(`find '${this.name}': query:`, params.sanitized, params.sanitized.sv) - - // TODO: we may want to skip caching if facets requested contain 'collection' - // However I (RK) could not find where this endpoint is used to understand what `facets` is. - const canBeCached = isCacheableQuery(params.sanitized.filters) - - // TODO: transform params.query.filters to match solr syntax - const result = await await measureTime( - () => - this.app.get('solrClient').findAll( - { - q: params.sanitized.sq, - // fq: params.sanitized.sfq, - facets: params.query.facets, - limit: 0, - skip: 0, - fl: 'id', - vars: params.sanitized.sv, - }, - { skipCache: !canBeCached } - ), - 'search-facets.find.solr.facets' - ) - - const total = result.response.numFound - - debug(`find '${this.name}': SOLR found ${total} using SOLR params:`, result.responseHeader.params) - return { - data: Object.keys(result.facets).map(type => { - if (typeof result.facets[type] === 'object') { - return new SearchFacet({ - type, - ...result.facets[type], - }) - } - return { - type, - count: result.facets[type], - } - }), - } - } -} - -module.exports = function (options) { - return new Service(options) -} - -module.exports.Service = Service diff --git a/src/services/search-facets/search-facets.class.ts b/src/services/search-facets/search-facets.class.ts new file mode 100644 index 00000000..138f84fb --- /dev/null +++ b/src/services/search-facets/search-facets.class.ts @@ -0,0 +1,233 @@ +import { Params } from '@feathersjs/feathers' +import lodash from 'lodash' +import { CachedSolrClient } from '../../cachedSolr' +import { SolrMappings } from '../../data/constants' +import { FindResponse } from '../../models/common' +import type { Filter, SearchFacet } from '../../models/generated/schemas' +import SearchFacetModel from '../../models/search-facets.model' +import { ImpressoApplication } from '../../types' +import { areCacheableFacets, isCacheableQuery } from '../../util/cache' +import { measureTime } from '../../util/instruments' +import { IndexId } from './search-facets.schema' + +const debug = require('debug')('impresso/services:search-facets') + +type FacetMetadata = any + +export const getIndexMeta = (indexId: IndexId) => { + switch (indexId) { + case 'search': + return SolrMappings.search + case 'tr-clusters': + return SolrMappings['tr_clusters'] + case 'tr-passages': + return SolrMappings['tr_passages'] + default: + throw new Error(`Unknown index: ${indexId}`) + } +} + +const getRangeFacetMetadata = (facet: FacetMetadata) => { + if (facet.type !== 'range') return {} + return { + min: facet.start, + max: facet.end, + gap: facet.gap, + } +} + +interface GetQuery { + skip?: number + limit?: number + order_by?: string +} + +interface FindQuery extends GetQuery { + facets: string[] +} + +interface SanitizedGetParams { + rangeStart?: number + rangeEnd?: number + rangeGap?: number + rangeInclude?: any + filters?: Filter[] + groupby?: string + sq?: string + sv?: string[] + facets?: string[] +} + +interface FacetsQueryPart { + offset?: number + limit?: number + sort?: string + start?: number + end?: number + gap?: number + include?: any +} + +interface ServiceOptions { + app: ImpressoApplication + name: string + index: IndexId +} + +export class Service { + app: ImpressoApplication + name: string + index: IndexId + solr: CachedSolrClient + + constructor({ app, name, index }: ServiceOptions) { + this.app = app + this.name = name + this.index = index + this.solr = app.service('cachedSolr') + } + + async get(type: string, params: Params): Promise { + // const { index } = params.query + // const types = getFacetTypes(type, this.index) + + // init with limit and skip + const facetsq: FacetsQueryPart = { + offset: params.query?.skip, + limit: params.query?.limit, + sort: params.query?.order_by, + } + + const sanitizedParams = (params as any).sanitized as SanitizedGetParams + + const result = await this._getFacetsFromSolr( + [type], + this.index, + facetsq, + params.authenticated ?? false, + (params as any)?.user?.uid, + sanitizedParams + ) + + return result[0] + } + + async find(params: Params): Promise> { + const facetsq: FacetsQueryPart = { + offset: params.query?.skip, + limit: params.query?.limit, + sort: params.query?.order_by, + } + + const sanitizedParams = (params as any).sanitized as SanitizedGetParams + + const result = await this._getFacetsFromSolr( + sanitizedParams.facets ?? [], + this.index, + facetsq, + params.authenticated ?? false, + (params as any)?.user?.uid, + sanitizedParams + ) + + return { + data: result, + limit: facetsq.limit ?? 0, + skip: facetsq.offset ?? 0, + total: 0, + info: {}, + } + } + + async _getFacetsFromSolr( + types: string[], + index: IndexId, + facetsQueryPart: FacetsQueryPart, + isAuthenticated: boolean, + userId: string, + sanitizedParams: SanitizedGetParams + ): Promise { + if (types.length === 0) return [] + + const facetsq = { ...facetsQueryPart } + + if (sanitizedParams.rangeStart) { + facetsq.start = sanitizedParams.rangeStart + } + if (sanitizedParams.rangeEnd) { + facetsq.end = sanitizedParams.rangeEnd + } + if (sanitizedParams.rangeGap) { + facetsq.gap = sanitizedParams.rangeGap + } + if (sanitizedParams.rangeInclude) { + facetsq.include = sanitizedParams.rangeInclude + } + + const canBeCached = areCacheableFacets(types) && isCacheableQuery(sanitizedParams.filters ?? []) + + const indexFacets = getIndexMeta(index).facets as Record + + const facets = lodash(types) + .map((d: string) => { + const facet = { + k: d, + ...indexFacets[d], + ...facetsq, + other: 'all', + } + if (types.includes('collection')) { + facet.prefix = isAuthenticated ? userId : '-' + } + return facet + }) + .keyBy('k') + .mapValues((v: Record) => lodash.omit(v, 'k')) + .value() + + debug( + `[get] "${types.join(', ')}" (${canBeCached ? 'cached' : 'not cached'}):`, + `index: ${index}`, + 'facets:', + facets, + 'groupby', + sanitizedParams.groupby || 'none' + ) + const query: Record = { + q: sanitizedParams.sq, + 'json.facet': JSON.stringify(facets), + start: 0, + rows: 0, + hl: false, + vars: sanitizedParams.sv, + } + + if (sanitizedParams.groupby) { + query.fq = `{!collapse field=${sanitizedParams.groupby}}` + } + const result = await measureTime( + () => this.solr.get(query, index, { skipCache: true }), //! canBeCached }), + 'search-facets.get.solr.facets' + ) + return types.map(t => { + const rangeFacetMetadata = getRangeFacetMetadata(indexFacets[t]) + // check that facetsq params are all defined + if (facetsQueryPart.start == null || !isNaN(facetsQueryPart.start)) { + rangeFacetMetadata.min = facetsQueryPart.start + } + if (facetsQueryPart.end == null || !isNaN(facetsQueryPart.end)) { + rangeFacetMetadata.max = facetsQueryPart.end + } + if (facetsQueryPart.gap == null || !isNaN(facetsQueryPart.gap)) { + rangeFacetMetadata.gap = facetsQueryPart.gap + } + return new SearchFacetModel({ + type: t, + // default min max and gap values from default solr config + ...result.facets[t], + ...rangeFacetMetadata, + numBuckets: result.facets[t] ? result.facets[t].numBuckets || result.facets[t].buckets.length : 0, + }) + }) + } +} diff --git a/src/services/search-facets/search-facets.hooks.js b/src/services/search-facets/search-facets.hooks.js deleted file mode 100644 index 3d15a230..00000000 --- a/src/services/search-facets/search-facets.hooks.js +++ /dev/null @@ -1,125 +0,0 @@ -const { authenticate } = require('../../hooks/authenticate') -const { - eachFilterValidator, - paramsValidator, -} = require('../search/search.validators') -const { - validate, - validateEach, - queryWithCommonParams, - utils, -} = require('../../hooks/params') -const { filtersToSolrQuery } = require('../../hooks/search') -const { - resolveCollections, - resolveTextReuseClusters, -} = require('../../hooks/resolvers') -const { SolrMappings } = require('../../data/constants') -const { SolrNamespaces } = require('../../solr') - -const DefaultIndex = 'search' -const SupportedIndexes = Object.keys(SolrMappings) - -module.exports = { - before: { - all: [], - get: [ - authenticate('jwt', { - allowUnauthenticated: true, - }), - - validate({ - index: { - choices: SupportedIndexes, - defaultValue: DefaultIndex, - }, - q: paramsValidator.q, - order_by: { - before: (d) => (Array.isArray(d) ? d.pop() : d), - defaultValue: '-count', - choices: ['-count', 'count'], - transform: (d) => - utils.translate(d, { - '-count': { - count: 'desc', - }, - count: { - count: 'asc', - }, - }), - }, - }), - - // validate groupby params against index - (context) => { - const { index, groupby } = context.params.query - // if group by exists and it is a string - if (typeof groupby === 'string' && groupby.length > 0) { - if (!Object.keys(SolrMappings[index].facets).includes(groupby)) { - throw new Error( - `Invalid groupby parameter for index ${index}: ${groupby}` - ) - } - context.params.groupby = context.params.sanitized.groupby = - SolrMappings[index].facets[groupby].field - } - }, - - validateEach('filters', eachFilterValidator), - filtersToSolrQuery({ - overrideOrderBy: false, - solrIndexProvider: (context) => - context.params.query.index || SolrNamespaces.Search, - }), - (context) => { - const { rangeStart, rangeEnd, rangeGap, rangeInclude } = - context.params.query - if (['edge', 'all', 'upper'].includes(rangeInclude)) { - context.params.sanitized.rangeInclude = rangeInclude - } - // if they are all provided, verify that they are integer - if (!isNaN(rangeStart) && !isNaN(rangeEnd) && !isNaN(rangeGap)) { - if ( - !Number.isInteger(Number(rangeStart)) || - !Number.isInteger(Number(rangeEnd)) || - !Number.isInteger(Number(rangeGap)) - ) { - throw new Error( - `Invalid range parameters: rangeStart=${rangeStart}, rangeEnd=${rangeEnd}, rangeGap=${rangeGap}` - ) - } - context.params.sanitized.rangeGap = context.params.query.rangeGap - context.params.sanitized.rangeStart = context.params.query.rangeStart - context.params.sanitized.rangeEnd = context.params.query.rangeEnd - } - }, - queryWithCommonParams(), - ], - create: [], - update: [], - patch: [], - remove: [], - }, - - after: { - all: [], - find: [ - // resolve(), - ], - get: [resolveCollections(), resolveTextReuseClusters()], - create: [], - update: [], - patch: [], - remove: [], - }, - - error: { - all: [], - find: [], - get: [], - create: [], - update: [], - patch: [], - remove: [], - }, -} diff --git a/src/services/search-facets/search-facets.hooks.ts b/src/services/search-facets/search-facets.hooks.ts new file mode 100644 index 00000000..6f5ad241 --- /dev/null +++ b/src/services/search-facets/search-facets.hooks.ts @@ -0,0 +1,105 @@ +import { HookContext } from '@feathersjs/feathers' +import { authenticateAround as authenticate } from '../../hooks/authenticate' +import { queryWithCommonParams, utils, validate, validateEach } from '../../hooks/params' +import { rateLimit } from '../../hooks/rateLimiter' +import { resolveCollections, resolveTextReuseClusters } from '../../hooks/resolvers' +import { filtersToSolrQuery } from '../../hooks/search' +import { ImpressoApplication } from '../../types' +import { eachFilterValidator, paramsValidator } from '../search/search.validators' +import { getIndexMeta } from './search-facets.class' +import { IndexId, OrderByChoices, facetTypes } from './search-facets.schema' + +const getAndFindHooks = (index: IndexId) => [ + validate({ + q: paramsValidator.q, + order_by: { + before: (d: any) => (Array.isArray(d) ? d.pop() : d), + defaultValue: '-count', + choices: OrderByChoices, + transform: (d: any) => + utils.translate(d, { + '-count': { + count: 'desc', + }, + count: { + count: 'asc', + }, + }), + }, + groupby: { + required: false, + fn: (value?: string) => { + if (typeof value === 'string' && value.length > 0) { + if (!facetTypes[index].includes(value)) { + return false + } + } + return true + }, + message: `Invalid groupby parameter for index ${index}`, + transform(value: string) { + const meta = getIndexMeta(index) + const facets: Record = meta.facets + return facets[value].field + }, + }, + }), + validateEach('filters', eachFilterValidator), + + filtersToSolrQuery({ + overrideOrderBy: false, + solrIndexProvider: (_: HookContext) => index.replace('-', '_'), + } as unknown as any), + + (context: HookContext) => { + const { rangeStart, rangeEnd, rangeGap, rangeInclude } = context.params.query + if (['edge', 'all', 'upper'].includes(rangeInclude)) { + context.params.sanitized.rangeInclude = rangeInclude + } + // if they are all provided, verify that they are integer + if (!isNaN(rangeStart) && !isNaN(rangeEnd) && !isNaN(rangeGap)) { + if ( + !Number.isInteger(Number(rangeStart)) || + !Number.isInteger(Number(rangeEnd)) || + !Number.isInteger(Number(rangeGap)) + ) { + throw new Error( + `Invalid range parameters: rangeStart=${rangeStart}, rangeEnd=${rangeEnd}, rangeGap=${rangeGap}` + ) + } + context.params.sanitized.rangeGap = context.params.query.rangeGap + context.params.sanitized.rangeStart = context.params.query.rangeStart + context.params.sanitized.rangeEnd = context.params.query.rangeEnd + } + }, +] + +export const getHooks = (index: IndexId) => ({ + around: { + all: [authenticate({ allowUnauthenticated: true }), rateLimit()], + }, + before: { + get: [...getAndFindHooks(index), queryWithCommonParams()], + find: [ + ...getAndFindHooks(index), + (context: HookContext) => { + const value = context.params.query.facets + + if (Array.isArray(value) && value.length > 0 && typeof value[0] === 'string') { + const unknownFacets = value.filter(d => !facetTypes[index].includes(d)) + + if (unknownFacets.length > 0) { + throw new Error(`Invalid facets for index ${index}: ${unknownFacets}`) + } + } + context.params.sanitized.facets = value + }, + queryWithCommonParams(), + ], + }, + + after: { + find: [resolveCollections(), resolveTextReuseClusters()], + get: [resolveCollections(), resolveTextReuseClusters()], + }, +}) diff --git a/src/services/search-facets/search-facets.schema.ts b/src/services/search-facets/search-facets.schema.ts new file mode 100644 index 00000000..4676f3b9 --- /dev/null +++ b/src/services/search-facets/search-facets.schema.ts @@ -0,0 +1,156 @@ +import type { ServiceSwaggerOptions } from 'feathers-swagger' +import { SolrMappings } from '../../data/constants' +import { QueryParameter, getSchemaRef, getStandardParameters, getStandardResponses } from '../../util/openapi' + +const SupportedIndexes = Object.keys(SolrMappings) + +export type IndexId = 'search' | 'tr-clusters' | 'tr-passages' + +export const facetTypes: Record = { + search: Object.keys(SolrMappings.search.facets), + 'tr-clusters': Object.keys(SolrMappings['tr_clusters'].facets), + 'tr-passages': Object.keys(SolrMappings['tr_passages'].facets), +} + +const facetNames: Record = { + search: 'search index', + 'tr-clusters': 'text reuse clusters index', + 'tr-passages': 'text reuse passages index', +} + +export const OrderByChoices = ['-count', 'count'] + +const getGetParameters = (index: IndexId): QueryParameter[] => [ + { + in: 'query', + name: 'q', + required: false, + schema: { + type: 'string', + }, + description: 'Search term', + }, + { + in: 'query', + name: 'order_by', + required: false, + schema: { + type: 'string', + enum: OrderByChoices, + }, + description: 'Order by', + }, + { + in: 'query', + name: 'groupby', + required: false, + schema: { + type: 'string', + enum: facetTypes[index], + }, + description: 'Group by', + }, + { + in: 'query', + name: 'filters', + required: false, + schema: { + type: 'array', + items: getSchemaRef('Filter'), + }, + description: 'Filters to apply', + }, + { + in: 'query', + name: 'rangeStart', + required: false, + schema: { + type: 'number', + }, + description: 'Range start', + }, + { + in: 'query', + name: 'rangeEnd', + required: false, + schema: { + type: 'number', + }, + description: 'Range end', + }, + { + in: 'query', + name: 'rangeGap', + required: false, + schema: { + type: 'number', + }, + description: 'Range gap', + }, + { + in: 'query', + name: 'rangeInclude', + required: false, + schema: { + type: 'string', + enum: ['edge', 'all', 'upper'], + }, + description: 'Range include', + }, +] + +const getFindParameters = (index: IndexId): QueryParameter[] => [ + { + in: 'query', + name: 'facets[]', + required: true, + schema: { + type: 'array', + items: { + type: 'string', + enum: facetTypes[index], + }, + }, + description: 'Facets to return', + }, +] + +export const getDocs = (index: IndexId): ServiceSwaggerOptions => ({ + description: `${facetNames[index]} facets`, + securities: ['get', 'find'], + operations: { + find: { + description: `Get mutliple ${facetNames[index]} facets`, + parameters: [ + ...getFindParameters(index), + ...getGetParameters(index), + ...getStandardParameters({ method: 'find' }), + ], + responses: getStandardResponses({ + method: 'find', + schema: 'SearchFacet', + }), + }, + get: { + description: `Get a single ${facetNames[index]} facet`, + parameters: [ + { + in: 'path', + name: 'id', + required: true, + schema: { + type: 'string', + enum: facetTypes[index], + }, + description: 'Type of the facet', + }, + ...getGetParameters(index), + ...getStandardParameters({ method: 'find' }), + ], + responses: getStandardResponses({ + method: 'get', + schema: 'SearchFacet', + }), + }, + }, +}) diff --git a/src/services/search-facets/search-facets.service.js b/src/services/search-facets/search-facets.service.js deleted file mode 100644 index b5b9cdfe..00000000 --- a/src/services/search-facets/search-facets.service.js +++ /dev/null @@ -1,16 +0,0 @@ -// Initializes the `search-facets` service on path `/search-facets` -const createService = require('./search-facets.class.js'); -const hooks = require('./search-facets.hooks'); - -module.exports = function (app) { - // Initialize our service with any options it requires - app.use('/search-facets', createService({ - app, - name: 'search-facets', - })); - - // Get our initialized service so that we can register hooks - const service = app.service('search-facets'); - - service.hooks(hooks); -}; diff --git a/src/services/search-facets/search-facets.service.ts b/src/services/search-facets/search-facets.service.ts new file mode 100644 index 00000000..b0ca7de8 --- /dev/null +++ b/src/services/search-facets/search-facets.service.ts @@ -0,0 +1,31 @@ +import { createSwaggerServiceOptions } from 'feathers-swagger' +import { getDocs, IndexId } from './search-facets.schema' +import { SolrMappings } from '../../data/constants' +import { Service } from './search-facets.class' +import { getHooks } from './search-facets.hooks' +import { ImpressoApplication } from '../../types' +import { ServiceOptions } from '@feathersjs/feathers' + +const SupportedIndexes: IndexId[] = Object.keys(SolrMappings).map(key => key.replace('_', '-')) as IndexId[] + +export default (app: ImpressoApplication) => { + // Initialize our service with any options it requires + + SupportedIndexes.forEach(index => { + app.use( + `search-facets/${index}`, + new Service({ + app, + index, + name: `search-facets-${index}`, + }), + { + events: [], + docs: createSwaggerServiceOptions({ schemas: {}, docs: getDocs(index) }), + } as ServiceOptions + ) + // Get our initialized service so that we can register hooks + const service = app.service(`search-facets/${index}`) + service.hooks(getHooks(index)) + }) +} diff --git a/src/types.ts b/src/types.ts index 0b78d831..ef3d450a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -10,4 +10,4 @@ interface AppServices { cachedSolr: CachedSolrClient } -export type ImpressoApplication = Application +export type ImpressoApplication = Application, Configuration> diff --git a/src/util/instruments.js b/src/util/instruments.ts similarity index 83% rename from src/util/instruments.js rename to src/util/instruments.ts index 4ed4022a..12978561 100644 --- a/src/util/instruments.js +++ b/src/util/instruments.ts @@ -1,6 +1,6 @@ import { logger } from '../logger' -async function measureTime(fn, label, doLog = undefined) { +export async function measureTime(fn: () => Promise, label: string, doLog = undefined) { const hrstart = process.hrtime() const onEnd = () => { const hrend = process.hrtime(hrstart) @@ -21,5 +21,3 @@ async function measureTime(fn, label, doLog = undefined) { throw e }) } - -module.exports = { measureTime }