From 080faf893489c912942259ce8e90e61156e09d8f Mon Sep 17 00:00:00 2001 From: Ash Date: Mon, 18 Nov 2024 21:25:49 +0000 Subject: [PATCH] wip(sanity): groq2024 search --- .../common/deriveSearchWeightsFromType2024.ts | 223 ++++++++++++++++++ .../sanity/src/core/search/common/index.ts | 1 + .../sanity/src/core/search/common/types.ts | 13 +- .../search/groq2024/createGroq2024Search.ts | 171 ++++++++++++++ .../sanity/src/core/search/groq2024/index.ts | 1 + packages/sanity/src/core/search/search.ts | 9 +- 6 files changed, 415 insertions(+), 3 deletions(-) create mode 100644 packages/sanity/src/core/search/common/deriveSearchWeightsFromType2024.ts create mode 100644 packages/sanity/src/core/search/groq2024/createGroq2024Search.ts create mode 100644 packages/sanity/src/core/search/groq2024/index.ts diff --git a/packages/sanity/src/core/search/common/deriveSearchWeightsFromType2024.ts b/packages/sanity/src/core/search/common/deriveSearchWeightsFromType2024.ts new file mode 100644 index 000000000000..9e9c58095611 --- /dev/null +++ b/packages/sanity/src/core/search/common/deriveSearchWeightsFromType2024.ts @@ -0,0 +1,223 @@ +import {type CrossDatasetType, type SchemaType, type SearchConfiguration} from '@sanity/types' +import {toString as pathToString} from '@sanity/util/paths' + +import {isRecord} from '../../util' +import {type DeriveSearchWeightsFromTypeOptions} from './deriveSearchWeightsFromType' +import {type SearchSpec} from './types' + +interface SearchWeightEntry { + path: string + weight: number + type?: 'string' | 'pt' +} + +const CACHE = new WeakMap() +const PREVIEW_FIELD_WEIGHT_MAP = { + title: 10, + subtitle: 5, + description: 1.5, +} +const BASE_WEIGHTS: Record> = { + _id: {weight: 1}, + _type: {weight: 1}, +} +const builtInObjectTypes = ['reference', 'crossDatasetReference'] + +const getTypeChain = (type: SchemaType | undefined): SchemaType[] => + type ? [type, ...getTypeChain(type.type)] : [] + +const isPtField = (type: SchemaType | undefined) => + type?.jsonType === 'array' && + type.of.some((arrType) => getTypeChain(arrType).some(({name}) => name === 'block')) + +const isStringField = (schemaType: SchemaType | undefined): boolean => + schemaType ? schemaType?.jsonType === 'string' : false + +const isSearchConfiguration = (options: unknown): options is SearchConfiguration => + isRecord(options) && 'search' in options && isRecord(options.search) + +function isSchemaType(input: SchemaType | CrossDatasetType | undefined): input is SchemaType { + return typeof input !== 'undefined' && 'name' in input +} + +function getLeafWeights( + schemaType: SchemaType | CrossDatasetType | undefined, + maxDepth: number, + getWeight: (schemaType: SchemaType, path: string) => number | null, +): Record { + function traverse( + type: SchemaType | undefined, + path: string, + depth: number, + ): SearchWeightEntry[] { + if (!type) return [] + if (depth > maxDepth) return [] + + const typeChain = getTypeChain(type) + + if (isStringField(type) || isPtField(type)) { + const weight = getWeight(type, path) + + if (typeof weight !== 'number') return [] + return [{path, weight}] + } + + const results: SearchWeightEntry[] = [] + const objectTypes = typeChain.filter( + (t): t is Extract => + t.jsonType === 'object' && !!t.fields?.length && !builtInObjectTypes.includes(t.name), + ) + for (const objectType of objectTypes) { + // TODO: Allow override of aliased types. + for (const field of objectType.fields) { + const nextPath = pathToString([path, field.name].filter(Boolean)) + results.push(...traverse(field.type, nextPath, depth + 1)) + } + } + + const arrayTypes = typeChain.filter( + (t): t is Extract => + t.jsonType === 'array' && !!t.of?.length, + ) + for (const arrayType of arrayTypes) { + for (const arrayItemType of arrayType.of) { + const nextPath = `${path}[]` + results.push(...traverse(arrayItemType, nextPath, depth + 1)) + } + } + + return results + } + + // Cross Dataset Reference are not part of the schema, so we should not attempt to reconcile them. + if (!isSchemaType(schemaType)) { + return {} + } + + return traverse(schemaType, '', 0).reduce>( + (acc, {path, weight, type}) => { + acc[path] = {weight, type, path} + return acc + }, + {}, + ) +} + +const getUserSetWeight = (schemaType: SchemaType) => { + const searchOptions = getTypeChain(schemaType) + .map((type) => type.options) + .find(isSearchConfiguration) + + return typeof searchOptions?.search?.weight === 'number' ? searchOptions.search.weight : null +} + +const getHiddenWeight = (schemaType: SchemaType) => { + const hidden = getTypeChain(schemaType).some((type) => type.hidden) + return hidden ? 0 : null +} + +const getDefaultWeights = (schemaType: SchemaType) => { + // if there is no user set weight or a `0` weight due to be hidden, + // then we can return the default weight of `1` + const result = getUserSetWeight(schemaType) ?? getHiddenWeight(schemaType) + return typeof result === 'number' ? null : 1 +} + +const getPreviewWeights = ( + schemaType: SchemaType | CrossDatasetType | undefined, + maxDepth: number, + isCrossDataset?: boolean, +): Record | null => { + const select = schemaType?.preview?.select + if (!select) return null + + const selectionKeysBySelectionPath = Object.fromEntries( + Object.entries(select).map(([selectionKey, selectionPath]) => [ + // replace indexed paths with `[]` + // e.g. `arrayOfObjects.0.myField` becomes `arrayOfObjects[].myField` + selectionPath.replace(/\.\d+/g, '[]'), + selectionKey, + ]), + ) + + const defaultWeights = getLeafWeights(schemaType, maxDepth, getDefaultWeights) + const nestedWeightsBySelectionPath = Object.fromEntries( + Object.entries(defaultWeights) + .map(([path, {type}]) => ({path, type})) + .filter(({path}) => selectionKeysBySelectionPath[path]) + .map(({path, type}) => [ + path, + { + type, + weight: + PREVIEW_FIELD_WEIGHT_MAP[ + selectionKeysBySelectionPath[path] as keyof typeof PREVIEW_FIELD_WEIGHT_MAP + ], + }, + ]), + ) + + if (isCrossDataset) { + return Object.fromEntries( + Object.entries(selectionKeysBySelectionPath).map(([path, previewFieldName]) => { + return [ + path, + { + path, + type: 'string', + weight: + PREVIEW_FIELD_WEIGHT_MAP[previewFieldName as keyof typeof PREVIEW_FIELD_WEIGHT_MAP], + }, + ] + }), + ) + } + + return getLeafWeights(schemaType, maxDepth, (_, path) => { + const nested = nestedWeightsBySelectionPath[path] + return nested ? nested.weight : null + }) +} + +// export interface DeriveSearchWeightsFromTypeOptions { +// schemaType: SchemaType | CrossDatasetType +// maxDepth: number +// isCrossDataset?: boolean +// processPaths?: (paths: SearchPath[]) => SearchPath[] +// } + +export function deriveSearchWeightsFromType2024({ + schemaType, + maxDepth, + isCrossDataset, + processPaths = (paths) => paths, +}: DeriveSearchWeightsFromTypeOptions): SearchSpec { + const cached = CACHE.get(schemaType) + if (cached) return cached + + const userSetWeights = getLeafWeights(schemaType, maxDepth, getUserSetWeight) + const hiddenWeights = getLeafWeights(schemaType, maxDepth, getHiddenWeight) + const defaultWeights = getLeafWeights(schemaType, maxDepth, getDefaultWeights) + const previewWeights = getPreviewWeights(schemaType, maxDepth, isCrossDataset) + + const weights: Record> = { + ...BASE_WEIGHTS, + ...defaultWeights, + ...hiddenWeights, + ...previewWeights, + ...userSetWeights, + } + + const result = { + typeName: isSchemaType(schemaType) ? schemaType.name : schemaType.type, + paths: processPaths( + Object.entries(weights).map(([path, {type, weight}]) => ({ + path, + weight, + })), + ), + } + + CACHE.set(schemaType, result) + return result +} diff --git a/packages/sanity/src/core/search/common/index.ts b/packages/sanity/src/core/search/common/index.ts index c8242bfd8a42..927a375afc13 100644 --- a/packages/sanity/src/core/search/common/index.ts +++ b/packages/sanity/src/core/search/common/index.ts @@ -1,3 +1,4 @@ export * from './deriveSearchWeightsFromType' +export * from './deriveSearchWeightsFromType2024' export * from './getSearchableTypes' export * from './types' diff --git a/packages/sanity/src/core/search/common/types.ts b/packages/sanity/src/core/search/common/types.ts index 38b35536d869..6bf04c794a7c 100644 --- a/packages/sanity/src/core/search/common/types.ts +++ b/packages/sanity/src/core/search/common/types.ts @@ -93,7 +93,18 @@ export interface WeightedSearchResults { /** * @internal */ -export type SearchStrategyFactory = ( +export interface Groq2024SearchResults { + type: 'groq2024' + hits: SearchHit[] + nextCursor?: never +} + +/** + * @internal + */ +export type SearchStrategyFactory< + TResult extends TextSearchResults | WeightedSearchResults | Groq2024SearchResults, +> = ( types: (SchemaType | CrossDatasetType)[], client: SanityClient, commonOpts: SearchFactoryOptions, diff --git a/packages/sanity/src/core/search/groq2024/createGroq2024Search.ts b/packages/sanity/src/core/search/groq2024/createGroq2024Search.ts new file mode 100644 index 000000000000..f9939b0e3dcb --- /dev/null +++ b/packages/sanity/src/core/search/groq2024/createGroq2024Search.ts @@ -0,0 +1,171 @@ +import {DEFAULT_MAX_FIELD_DEPTH} from '@sanity/schema/_internal' +import {type CrossDatasetType, type SanityDocumentLike, type SchemaType} from '@sanity/types' +import {map, tap} from 'rxjs' + +import {removeDupes} from '../../util/draftUtils' +import {deriveSearchWeightsFromType2024} from '../common/deriveSearchWeightsFromType2024' +import { + type Groq2024SearchResults, + type SearchSort, + type SearchStrategyFactory, + type SearchTerms, +} from '../common/types' + +interface SearchParams extends Record { + __types: string[] + __limit: number + __query: string +} + +const FINDABILITY_MVI = 5 +const DEFAULT_LIMIT = 1_000 + +function getSearchTerms( + searchParams: string | SearchTerms, + types: (SchemaType | CrossDatasetType)[], +) { + if (typeof searchParams === 'string') { + return { + query: searchParams, + types: types, + } + } + return searchParams.types.length ? searchParams : {...searchParams, types} +} + +function isSchemaType( + maybeSchemaType: SchemaType | CrossDatasetType | undefined, +): maybeSchemaType is SchemaType { + return typeof maybeSchemaType !== 'undefined' && 'name' in maybeSchemaType +} + +function toOrderClause(orderBy: SearchSort[]): string { + function wrapFieldWithFn(ordering: SearchSort): string { + return ordering.mapWith ? `${ordering.mapWith}(${ordering.field})` : ordering.field + } + + return (orderBy || []) + .map((ordering) => + [wrapFieldWithFn(ordering), (ordering.direction || '').toLowerCase()] + .map((str) => str.trim()) + .filter(Boolean) + .join(' '), + ) + .join(',') +} + +/** + * @internal + */ +export const createGroq2024Search: SearchStrategyFactory = ( + typesFromFactory, + client, + factoryOptions, +) => { + return function search(searchParams, searchOptions = {}) { + const searchTerms = getSearchTerms(searchParams, typesFromFactory) + + const specs = searchTerms.types + .map((schemaType) => + deriveSearchWeightsFromType2024({ + schemaType, + maxDepth: searchOptions.maxDepth || DEFAULT_MAX_FIELD_DEPTH, + isCrossDataset: searchOptions.isCrossDataset, + processPaths: (paths) => paths.filter(({weight}) => weight !== 1), + }), + ) + .filter(({paths}) => paths.length !== 0) + + const score = specs + .map((spec) => { + return spec.paths.map((path) => { + if (path.weight === 0) { + // If the weight is zero, exclude it from matching. + // + // References are not implicitly matched. Therefore, if the path is inside a dereference and the weight is zero, + // we can simply omit it. + // if (path.fullyQualifiedPath !== path.path) { + // if (path.fullyQualifiedPath.includes('->')) { + // return null + // } + // TODO: Switch back on when CL bug fixed. + // debugger + // return `!(_type == "${spec.typeName}" && text::matchQuery(${path.path}, $__query))` + } + // If the weight is greater than zero, boost it. + // return `boost(_type == "${spec.typeName}" && text::matchQuery(${path.fullyQualifiedPath}, $__query), ${path.weight})` + return `boost(_type == "${spec.typeName}" && text::matchQuery(${path.path}, $__query), ${path.weight})` + }) + }) + .flat() + .filter((spec) => spec !== null) + .concat([`text::matchQuery(nestedAttributes(), $__query)`]) + + // debugger + + const filters = [ + '_type in $__types', + // TODO: Can perspectives help here? + searchOptions.includeDrafts === false && "!(_id in path('drafts.**'))", + factoryOptions.filter ? `(${factoryOptions.filter})` : false, + searchTerms.filter ? `(${searchTerms.filter})` : false, + '!(_id in path("versions.**"))', + ].filter((baseFilter) => typeof baseFilter === 'string') + + const sortOrder = toOrderClause(searchOptions?.sort ?? [{field: '_score', direction: 'desc'}]) + // TODO: `_score` not required. + const projectionFields = ['_type', '_id', '_score'] + const projection = projectionFields.join(', ') + + const query = [ + `*[${filters.join(' && ')}]`, + ['|', `score(${score.join(', ')})`], + ['|', `order(${sortOrder})`], + `[_score > 0]`, + `[0...$__limit]`, + `{${projection}}`, + ] + .flat() + .join(' ') + + // if (searchOptions?.__unstable_extendedProjection) { + // debugger + // } + + // ...new Set( + // typesFromFactory.concat( + // typeof searchParams === 'object' && 'types' in searchParams ? searchParams.types : [], + // ), + // ), + + const params: SearchParams = { + __types: searchTerms.types.map((type) => (isSchemaType(type) ? type.name : type.type)), + __limit: searchOptions?.limit ?? DEFAULT_LIMIT, + __query: typeof searchParams === 'string' ? searchParams : searchParams.query, + ...factoryOptions.params, + // ...searchOptions.params, + } + + const pragma = [`findability-mvi:${FINDABILITY_MVI}`] + .concat(searchOptions?.comments || []) + .map((s) => `// ${s}`) + .join('\n') + + const options = {} + + return client.observable + .withConfig({ + // TODO: Use stable API version. + apiVersion: 'vX', + }) + .fetch([pragma, query].join('\n'), params, options) + .pipe( + // TODO: Can perspectives help here? + factoryOptions.unique ? map(removeDupes) : tap(), + map((hits) => ({ + type: 'groq2024', + hits: hits.map((hit) => ({hit})), + })), + ) + } +} diff --git a/packages/sanity/src/core/search/groq2024/index.ts b/packages/sanity/src/core/search/groq2024/index.ts new file mode 100644 index 000000000000..63755a3c0b6f --- /dev/null +++ b/packages/sanity/src/core/search/groq2024/index.ts @@ -0,0 +1 @@ +export {createGroq2024Search} from './createGroq2024Search' diff --git a/packages/sanity/src/core/search/search.ts b/packages/sanity/src/core/search/search.ts index 393c403d900e..9b599a2a1d8d 100644 --- a/packages/sanity/src/core/search/search.ts +++ b/packages/sanity/src/core/search/search.ts @@ -1,18 +1,23 @@ import {type SearchStrategy} from '@sanity/types' import { + type Groq2024SearchResults, type SearchStrategyFactory, type TextSearchResults, type WeightedSearchResults, } from './common' +import {createGroq2024Search} from './groq2024' import {createTextSearch} from './text-search' import {createWeightedSearch} from './weighted' const searchStrategies = { groqLegacy: createWeightedSearch, textSearch: createTextSearch, - groq2024: createWeightedSearch, -} satisfies Record> + groq2024: createGroq2024Search, +} satisfies Record< + SearchStrategy, + SearchStrategyFactory +> const DEFAULT_SEARCH_STRATEGY: SearchStrategy = 'groqLegacy'