Skip to content

Commit

Permalink
added image facets, filters and order by for images
Browse files Browse the repository at this point in the history
  • Loading branch information
theorm committed Jan 18, 2025
1 parent d5ca242 commit 098a08d
Show file tree
Hide file tree
Showing 13 changed files with 147 additions and 48 deletions.
18 changes: 18 additions & 0 deletions src/data/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,24 @@ export const SolrMappings: Record<string, ISolrMappings> = Object.freeze({
},
},
},
images: {
facets: {
newspaper: {
type: 'terms',
field: 'meta_journal_s',
mincount: 1,
limit: 20,
numBuckets: true,
},
year: {
type: 'terms',
field: 'meta_year_i',
mincount: 1,
limit: 400, // 400 years
numBuckets: true,
},
},
},
})

/* Check that facets are a subset of filter types */
Expand Down
9 changes: 9 additions & 0 deletions src/data/stats.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,12 @@ indexes:
date:
field: meta_date_dt
limit: 186
images:
facets:
term:
newspaper:
field: meta_journal_s
limit: 100
year:
field: meta_year_i
limit: 400
15 changes: 15 additions & 0 deletions src/hooks/parameters.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import type { Application, HookContext } from '@feathersjs/feathers'
import { parseFilters } from '../util/queryParameters'
import { AppServices, ImpressoApplication } from '../types'

export const decodeJsonQueryParameters = (parametersNames: string[]) => async (context: HookContext<Application>) => {
const { query } = context.params
Expand Down Expand Up @@ -31,3 +33,16 @@ export const decodePathParameters = (parametersNames: string[]) => async (contex
}
}
}

/**
* Converts filters in query parameters to canonical format.
*/
export const sanitizeFilters =
(queryParameter = 'filters') =>
(context: HookContext<ImpressoApplication, AppServices>) => {
if (context.type !== 'before') {
throw new Error('The sanitizeFilters hook should be used as a before hook only')
}

context.params.query[queryParameter] = parseFilters(context.params.query[queryParameter])
}
4 changes: 2 additions & 2 deletions src/middleware/openApiValidator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ const installMiddleware = (app: ImpressoApplication & Application) => {
app.set('openApiMiddlewareOpts', options)
app.set('openApiValidatorMiddlewares', middlewares)

// TODO: an ugly way to handle `filters` query parameter before it reqches validation
// TODO: an ugly way to handle `filters` query parameter before it reaches validation
// Move this somewhere where it's more explicit
app.use((req, res, next) => {
if (req.query.filters != null) {
req.query.filters = parseFilters(req.query.filters)
req.query.filters = parseFilters(req.query.filters) as any as string[]
}
next()
})
Expand Down
25 changes: 21 additions & 4 deletions src/services/images/images.class.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
import { NotFound } from '@feathersjs/errors'
import { ClientService, Id, Params } from '@feathersjs/feathers'
import { SimpleSolrClient } from '../../internalServices/simpleSolr'
import { Filter } from '../../models'
import { PublicFindResponse } from '../../models/common'
import { Image } from '../../models/generated/schemas'
import { Image as ImageDocument } from '../../models/generated/solr'
import { SolrNamespaces } from '../../solr'
import { filtersToSolrQueries } from '../../util/solr'

const DefaultLimit = 10
const ImageSimilarityVectorField: keyof ImageDocument = 'dinov2_emb_v1024'

const OrderByParamToSolrFieldMap = {
date: 'meta_date_dt asc',
'-date': 'meta_date_dt desc',
}
type OrderByParam = keyof typeof OrderByParamToSolrFieldMap
export const OrderByChoices: OrderByParam[] = Object.keys(OrderByParamToSolrFieldMap) as OrderByParam[]

export interface FindQuery {
similar_to_image_id?: string
term?: string
limit?: number
offset?: number
filters?: Filter[]
order_by?: OrderByParam
}

export class Images implements Pick<ClientService<Image, unknown, unknown, PublicFindResponse<Image>>, 'find' | 'get'> {
Expand All @@ -22,6 +33,10 @@ export class Images implements Pick<ClientService<Image, unknown, unknown, Publi
async find(params?: Params<FindQuery>): Promise<PublicFindResponse<Image>> {
const limit = params?.query?.limit ?? DefaultLimit
const offset = params?.query?.offset ?? 0
const filters = params?.query?.filters ?? []
const sort = params?.query?.order_by != null ? OrderByParamToSolrFieldMap[params?.query?.order_by] : undefined

const filterQueryParts = filtersToSolrQueries(filters, SolrNamespaces.Images)

const queryParts: string[] = []

Expand Down Expand Up @@ -52,17 +67,19 @@ export class Images implements Pick<ClientService<Image, unknown, unknown, Publi
const results = await this.solrClient.select<ImageDocument>(SolrNamespaces.Images, {
body: {
query,
filter: filterQueryParts.join(' AND '),
limit,
offset,
...(sort != null ? { sort } : {}),
},
})

return {
data: results?.response?.docs?.map(toImage) ?? [],
pagination: {
limit: 0,
offset: 0,
total: 0,
limit,
offset,
total: results?.response?.numFound ?? 0,
},
}
}
Expand All @@ -86,7 +103,7 @@ const toImage = (doc: ImageDocument): Image => {
uid: doc.id!,
...(doc.linked_ci_s != null ? { contentItemUid: doc.linked_ci_s } : {}),
issueUid: doc.meta_issue_id_s!,
previewUrl: doc.iiif_url_s ?? doc.iiif_link_s!,
previewUrl: doc.iiif_link_s! ?? doc.iiif_url_s!,
...(doc.caption_txt != null ? { caption: doc.caption_txt.join('\n') } : {}),
}
}
8 changes: 5 additions & 3 deletions src/services/images/images.hooks.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import { HookMap } from '@feathersjs/feathers'
import { authenticateAround as authenticate } from '../../hooks/authenticate'
import { rateLimit } from '../../hooks/rateLimiter'
import { AppServices, ImpressoApplication } from '../../types'
import { sanitizeFilters } from '../../hooks/parameters'

export default {
around: {
all: [authenticate({ allowUnauthenticated: true }), rateLimit()],
},
} satisfies HookMap<ImpressoApplication, AppServices>
before: {
find: [sanitizeFilters('filters')],
},
}
22 changes: 21 additions & 1 deletion src/services/images/images.schema.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
import { ServiceSwaggerOptions } from 'feathers-swagger'
import { getStandardParameters, getStandardResponses, MethodParameter, QueryParameter } from '../../util/openapi'
import {
filtersQueryParameter,
getStandardParameters,
getStandardResponses,
MethodParameter,
QueryParameter,
} from '../../util/openapi'
import { OrderByChoices } from './images.class'

const parameterTerm: QueryParameter = {
in: 'query',
Expand All @@ -23,9 +30,22 @@ const similarToImageId: QueryParameter = {
description: 'Find images similar to the image with the given ID',
}

const parameterOrderBy: QueryParameter = {
in: 'query',
name: 'order_by',
required: false,
schema: {
type: 'string',
enum: OrderByChoices,
},
description: 'Order by',
}

const findParameters: MethodParameter[] = [
parameterTerm,
similarToImageId,
parameterOrderBy,
filtersQueryParameter,
...getStandardParameters({ method: 'find', maxPageSize: 100 }),
]
const getParameters: MethodParameter[] = [...getStandardParameters({ method: 'get' })]
Expand Down
2 changes: 2 additions & 0 deletions src/services/search-facets/search-facets.class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ export const getIndexMeta = (indexId: IndexId) => {
return SolrMappings['tr_clusters']
case 'tr-passages':
return SolrMappings['tr_passages']
case 'images':
return SolrMappings.images
default:
throw new Error(`Unknown index: ${indexId}`)
}
Expand Down
4 changes: 3 additions & 1 deletion src/services/search-facets/search-facets.schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,20 @@ import {

const SupportedIndexes = Object.keys(SolrMappings)

export type IndexId = 'search' | 'tr-clusters' | 'tr-passages'
export type IndexId = 'search' | 'tr-clusters' | 'tr-passages' | 'images'

export const facetTypes: Record<IndexId, string[]> = {
search: Object.keys(SolrMappings.search.facets),
'tr-clusters': Object.keys(SolrMappings['tr_clusters'].facets),
'tr-passages': Object.keys(SolrMappings['tr_passages'].facets),
images: Object.keys(SolrMappings.images.facets),
}

const facetNames: Record<IndexId, string> = {
search: 'search index',
'tr-clusters': 'text reuse clusters index',
'tr-passages': 'text reuse passages index',
images: 'images index',
}

export const OrderByChoices = ['-count', 'count', '-value', 'value']
Expand Down
14 changes: 4 additions & 10 deletions src/services/text-reuse-clusters/text-reuse-clusters.class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ import type {
GetTextReuseClusterResponse,
} from './models/generated'
import { FindQueyParameters } from './text-reuse-clusters.schema'
import { NewspapersService } from '../newspapers/newspapers.class'
import { SimpleSolrClient } from '../../internalServices/simpleSolr'
import { getToSelect } from '../../util/solr/adapters'
import { MediaSources } from '../media-sources/media-sources.class'
import { OpenPermissions } from '../../util/bigint'
import { filtersToSolrQueries } from '../../util/solr'
import { Filter } from '../../models'

const { mapValues, groupBy, values, uniq, clone, get } = require('lodash')
const { mapValues, groupBy, clone, get } = require('lodash')
const { NotFound } = require('@feathersjs/errors')
const { protobuf } = require('impresso-jscommons')
const {
Expand All @@ -31,9 +32,7 @@ const {
parseConnectedClustersCountResponse,
} = require('../../logic/textReuse/solr')
const { parseOrderBy } = require('../../util/queryParameters')
const { sameTypeFiltersToQuery } = require('../../util/solr')
const { SolrNamespaces } = require('../../solr')
const Newspaper = require('../../models/newspapers.model')

interface ClusterIdAndTextAndPermission {
id: any
Expand All @@ -60,11 +59,6 @@ function buildResponseClusters(

const deserializeFilters = (serializedFilters: string) => protobuf.searchQuery.deserialize(serializedFilters).filters

function filtersToSolrQueries(filters: any, namespace = SolrNamespaces.TextReusePassages) {
const filtersGroupsByType = values(groupBy(filters, 'type'))
return uniq(filtersGroupsByType.map((f: any) => sameTypeFiltersToQuery(f, namespace)))
}

export const OrderByKeyToField = {
'passages-count': PassageFields.ClusterSize,
}
Expand Down Expand Up @@ -148,7 +142,7 @@ export class TextReuseClusters {
async find(params: Params<FindQueyParameters>): Promise<FindTextReuseClustersResponse> {
const { text, offset = 0, limit = 10, order_by: orderBy } = params.query ?? {}
const { filters }: Pick<FindQueyParameters, 'filters'> = (params as any).sanitized ?? {}
const filterQueryParts = filtersToSolrQueries(filters, SolrNamespaces.TextReusePassages)
const filterQueryParts = filtersToSolrQueries(filters as Filter[], SolrNamespaces.TextReusePassages)
const [orderByField, orderByDescending] = parseOrderBy(orderBy, OrderByKeyToField)
const query = getTextReusePassagesClusterIdsSearchRequestForText(
text,
Expand Down
11 changes: 7 additions & 4 deletions src/util/queryParameters.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import lodash from 'lodash'
import { protobuf } from 'impresso-jscommons'
import { Filter } from '../models'

export const parseOrderBy = (orderBy: string, keyFieldMap: Record<string, string> = {}) => {
if (orderBy == null) return []
Expand All @@ -19,21 +20,23 @@ export const parseOrderBy = (orderBy: string, keyFieldMap: Record<string, string
*
* @return {object[]} List of filters as objects
*/
export const parseFilters = (value?: string | string[] | object | object[]) => {
export const parseFilters = (value?: string | string[] | object | object[]): Filter[] => {
if (value == null) return []
if (Array.isArray(value) && value.every(item => lodash.isObjectLike(item))) return value
if (lodash.isObjectLike(value) && !Array.isArray(value)) return [value]
if (Array.isArray(value) && value.every(item => lodash.isObjectLike(item))) return value as Filter[]
if (lodash.isObjectLike(value) && !Array.isArray(value)) return [value] as Filter[]

if (lodash.isString(value)) {
try {
return [JSON.parse(value)]
} catch (error) {
const decoded = protobuf.searchQuery.deserialize(value)
return decoded.filters
return decoded.filters as Filter[]
}
}

if (Array.isArray(value) && value.every(item => lodash.isString(item))) {
return value.map(item => JSON.parse(item as unknown as string))
}

return value as Filter[]
}
Loading

0 comments on commit 098a08d

Please sign in to comment.