diff --git a/dev/test-studio/sanity.config.ts b/dev/test-studio/sanity.config.ts index 31b82b3dea3..8b85fb5ead5 100644 --- a/dev/test-studio/sanity.config.ts +++ b/dev/test-studio/sanity.config.ts @@ -78,7 +78,9 @@ const sharedSettings = definePlugin({ enabled: true, }, }, - + search: { + strategy: 'groq2024', + }, document: { actions: documentActions, inspectors: (prev, ctx) => { diff --git a/packages/@sanity/types/src/search/types.ts b/packages/@sanity/types/src/search/types.ts index 6025fc16dc5..1eacc85da7d 100644 --- a/packages/@sanity/types/src/search/types.ts +++ b/packages/@sanity/types/src/search/types.ts @@ -1,7 +1,7 @@ /** * @public */ -export const searchStrategies = ['groqLegacy', 'textSearch'] as const +export const searchStrategies = ['groqLegacy', 'textSearch', 'groq2024'] as const /** * @public diff --git a/packages/sanity/src/core/config/types.ts b/packages/sanity/src/core/config/types.ts index 8fc8064afd4..1cf519a9edb 100644 --- a/packages/sanity/src/core/config/types.ts +++ b/packages/sanity/src/core/config/types.ts @@ -396,6 +396,8 @@ export interface PluginOptions { * - `"groqLegacy"` (default): Use client-side tokenization and schema introspection to search * using the GROQ Query API. * - `"textSearch"` (deprecated): Perform full text searching using the Text Search API. + * - `"groq2024"`: (experimental) Perform full text searching using the GROQ Query API and its + * new `text::matchQuery` function. */ strategy?: SearchStrategy diff --git a/packages/sanity/src/core/search/common/__tests__/tokens.test.ts b/packages/sanity/src/core/search/common/__tests__/tokens.test.ts new file mode 100644 index 00000000000..b2dd9184a96 --- /dev/null +++ b/packages/sanity/src/core/search/common/__tests__/tokens.test.ts @@ -0,0 +1,65 @@ +import {describe, expect, it} from 'vitest' + +import {isExactMatchToken, isNegationToken, isPrefixToken, prefixLast} from '../token' + +describe('isNegationToken', () => { + it('identifies negation tokens', () => { + expect(isNegationToken('-test')).toBe(true) + expect(isNegationToken('--')).toBe(true) + expect(isNegationToken('test')).toBe(false) + expect(isNegationToken('test-')).toBe(false) + expect(isNegationToken(undefined)).toBe(false) + }) +}) + +describe('isPrefixToken', () => { + it('identifies prefix tokens', () => { + expect(isPrefixToken('test*')).toBe(true) + expect(isPrefixToken('test')).toBe(false) + expect(isPrefixToken('*test')).toBe(false) + expect(isPrefixToken(undefined)).toBe(false) + }) +}) + +describe('prefixLast', () => { + it('transforms the final non-negation token into a wildcard prefix', () => { + expect(prefixLast('a')).toBe('a*') + expect(prefixLast('a b')).toBe('a b*') + expect(prefixLast('a -b')).toBe('a* -b') + expect(prefixLast('a "bc" d')).toBe('a "bc" d*') + expect(prefixLast('ab "cd"')).toBe('ab* "cd"') + expect(prefixLast('a --')).toBe('a* --') + }) + + it('does not transform the final non-negation token if it is already a wildcard prefix', () => { + expect(prefixLast('a*')).toBe('a*') + expect(prefixLast('a* -b')).toBe('a* -b') + }) + + it('does not transform any tokens if only negation tokens are present', () => { + expect(prefixLast('-a -b')).toBe('-a -b') + expect(prefixLast('--')).toBe('--') + }) + + it('trims tokens', () => { + expect(prefixLast('a "ab c" d')).toBe('a "ab c" d*') + }) + + it('preserves quoted tokens', () => { + expect(prefixLast('"a b" c d')).toBe('"a b" c d*') + expect(prefixLast('"a b" c d "ef" "g "')).toBe('"a b" c d* "ef" "g "') + expect(prefixLast('"a " b" c d')).toBe('"a " b c d*') + }) +}) + +describe('isExactMatchToken', () => { + it('recognises that a token is encased in quote marks', () => { + expect(isExactMatchToken(undefined)).toBe(false) + expect(isExactMatchToken('"a"')).toBe(true) + expect(isExactMatchToken('"a b"')).toBe(true) + expect(isExactMatchToken('"a')).toBe(false) + expect(isExactMatchToken('a"')).toBe(false) + expect(isExactMatchToken('"a b')).toBe(false) + expect(isExactMatchToken('a b"')).toBe(false) + }) +}) diff --git a/packages/sanity/src/core/search/common/deriveSearchWeightsFromType2024.ts b/packages/sanity/src/core/search/common/deriveSearchWeightsFromType2024.ts new file mode 100644 index 00000000000..9a8ade9904b --- /dev/null +++ b/packages/sanity/src/core/search/common/deriveSearchWeightsFromType2024.ts @@ -0,0 +1,257 @@ +import { + type CrossDatasetType, + type SchemaType, + type SearchConfiguration, + type SlugSchemaType, +} from '@sanity/types' +import {toString as pathToString} from '@sanity/util/paths' + +import {isRecord} from '../../util' +import {type SearchPath, type SearchSpec} from './types' + +interface SearchWeightEntry { + path: string + weight: number + type?: 'string' | 'pt' +} + +const CACHE = new WeakMap() + +const PREVIEW_FIELD_WEIGHT_MAP = { + title: 10, + subtitle: 5, + description: 1.5, +} +const BASE_WEIGHTS: Record> = { + _id: {weight: 1}, + _type: {weight: 1}, +} + +// Any object type whose fields should not be considered for custom weighting. +// +// Search may still match on their fields, but will not traverse their schema to find custom +// weights. +// +// Some types, such as `slug`, may instead determine weights using a specialised implementation. +const ignoredBuiltInObjectTypes = ['reference', 'crossDatasetReference', 'slug'] + +const getTypeChain = (type: SchemaType | undefined): SchemaType[] => + type ? [type, ...getTypeChain(type.type)] : [] + +const isPtField = (type: SchemaType | undefined) => + type?.jsonType === 'array' && + type.of.some((arrType) => getTypeChain(arrType).some(({name}) => name === 'block')) + +const isStringField = (schemaType: SchemaType | undefined): boolean => + schemaType ? schemaType?.jsonType === 'string' : false + +const isSlugField = (schemaType: SchemaType | undefined): schemaType is SlugSchemaType => { + const typeChain = getTypeChain(schemaType) + return typeChain.some(({jsonType, name}) => jsonType === 'object' && name === 'slug') +} + +const isSearchConfiguration = (options: unknown): options is SearchConfiguration => + isRecord(options) && 'search' in options && isRecord(options.search) + +function isSchemaType(input: SchemaType | CrossDatasetType | undefined): input is SchemaType { + return typeof input !== 'undefined' && 'name' in input +} + +function getFullyQualifiedPath(schemaType: SchemaType, path: string): string { + // Slug field weights should be applied to the object's `current` field. + if (isSlugField(schemaType)) { + return [path, 'current'].join('.') + } + + return path +} + +function getLeafWeights( + schemaType: SchemaType | CrossDatasetType | undefined, + maxDepth: number, + getWeight: (schemaType: SchemaType, path: string) => number | null, +): Record { + function traverse( + type: SchemaType | undefined, + path: string, + depth: number, + ): SearchWeightEntry[] { + if (!type) return [] + if (depth > maxDepth) return [] + + const typeChain = getTypeChain(type) + + if (isStringField(type) || isPtField(type)) { + const weight = getWeight(type, path) + + if (typeof weight !== 'number') return [] + return [{path, weight}] + } + + if (isSlugField(type)) { + const weight = getWeight(type, path) + if (typeof weight !== 'number') return [] + return [{path: getFullyQualifiedPath(type, path), weight}] + } + + const results: SearchWeightEntry[] = [] + + const objectTypes = typeChain.filter( + (t): t is Extract => + t.jsonType === 'object' && + !!t.fields?.length && + !ignoredBuiltInObjectTypes.includes(t.name), + ) + for (const objectType of objectTypes) { + for (const field of objectType.fields) { + const nextPath = pathToString([path, field.name].filter(Boolean)) + results.push(...traverse(field.type, nextPath, depth + 1)) + } + } + + const arrayTypes = typeChain.filter( + (t): t is Extract => + t.jsonType === 'array' && !!t.of?.length, + ) + for (const arrayType of arrayTypes) { + for (const arrayItemType of arrayType.of) { + const nextPath = `${path}[]` + results.push(...traverse(arrayItemType, nextPath, depth + 1)) + } + } + + return results + } + + // Cross Dataset Reference are not part of the schema, so we should not attempt to reconcile them. + if (!isSchemaType(schemaType)) { + return {} + } + + return traverse(schemaType, '', 0).reduce>( + (acc, {path, weight, type}) => { + acc[path] = {weight, type, path} + return acc + }, + {}, + ) +} + +const getUserSetWeight = (schemaType: SchemaType) => { + const searchOptions = getTypeChain(schemaType) + .map((type) => type.options) + .find(isSearchConfiguration) + + return typeof searchOptions?.search?.weight === 'number' ? searchOptions.search.weight : null +} + +const getHiddenWeight = (schemaType: SchemaType) => { + const hidden = getTypeChain(schemaType).some((type) => type.hidden) + return hidden ? 0 : null +} + +const getDefaultWeights = (schemaType: SchemaType) => { + // if there is no user set weight or a `0` weight due to be hidden, + // then we can return the default weight of `1` + const result = getUserSetWeight(schemaType) ?? getHiddenWeight(schemaType) + return typeof result === 'number' ? null : 1 +} + +const getPreviewWeights = ( + schemaType: SchemaType | CrossDatasetType | undefined, + maxDepth: number, + isCrossDataset?: boolean, +): Record | null => { + const select = schemaType?.preview?.select + if (!select) return null + + const selectionKeysBySelectionPath = Object.fromEntries( + Object.entries(select).map(([selectionKey, selectionPath]) => [ + // replace indexed paths with `[]` + // e.g. `arrayOfObjects.0.myField` becomes `arrayOfObjects[].myField` + selectionPath.replace(/\.\d+/g, '[]'), + selectionKey, + ]), + ) + + const defaultWeights = getLeafWeights(schemaType, maxDepth, getDefaultWeights) + const nestedWeightsBySelectionPath = Object.fromEntries( + Object.entries(defaultWeights) + .map(([path, {type}]) => ({path, type})) + .filter(({path}) => selectionKeysBySelectionPath[path]) + .map(({path, type}) => [ + path, + { + type, + weight: + PREVIEW_FIELD_WEIGHT_MAP[ + selectionKeysBySelectionPath[path] as keyof typeof PREVIEW_FIELD_WEIGHT_MAP + ], + }, + ]), + ) + + if (isCrossDataset) { + return Object.fromEntries( + Object.entries(selectionKeysBySelectionPath).map(([path, previewFieldName]) => { + return [ + path, + { + path, + type: 'string', + weight: + PREVIEW_FIELD_WEIGHT_MAP[previewFieldName as keyof typeof PREVIEW_FIELD_WEIGHT_MAP], + }, + ] + }), + ) + } + + return getLeafWeights(schemaType, maxDepth, (type, path) => { + const nested = nestedWeightsBySelectionPath[getFullyQualifiedPath(type, path)] + return nested ? nested.weight : null + }) +} + +interface DeriveSearchWeightsFromTypeOptions { + schemaType: SchemaType | CrossDatasetType + maxDepth: number + isCrossDataset?: boolean + processPaths?: (paths: SearchPath[]) => SearchPath[] +} + +export function deriveSearchWeightsFromType2024({ + schemaType, + maxDepth, + isCrossDataset, + processPaths = (paths) => paths, +}: DeriveSearchWeightsFromTypeOptions): SearchSpec { + const cached = CACHE.get(schemaType) + if (cached) return cached + + const userSetWeights = getLeafWeights(schemaType, maxDepth, getUserSetWeight) + const hiddenWeights = getLeafWeights(schemaType, maxDepth, getHiddenWeight) + const defaultWeights = getLeafWeights(schemaType, maxDepth, getDefaultWeights) + const previewWeights = getPreviewWeights(schemaType, maxDepth, isCrossDataset) + + const weights: Record> = { + ...BASE_WEIGHTS, + ...defaultWeights, + ...hiddenWeights, + ...previewWeights, + ...userSetWeights, + } + + const result = { + typeName: isSchemaType(schemaType) ? schemaType.name : schemaType.type, + paths: processPaths( + Object.entries(weights).map(([path, {weight}]) => ({ + path, + weight, + })), + ), + } + + CACHE.set(schemaType, result) + return result +} diff --git a/packages/sanity/src/core/search/common/index.ts b/packages/sanity/src/core/search/common/index.ts index c8242bfd8a4..927a375afc1 100644 --- a/packages/sanity/src/core/search/common/index.ts +++ b/packages/sanity/src/core/search/common/index.ts @@ -1,3 +1,4 @@ export * from './deriveSearchWeightsFromType' +export * from './deriveSearchWeightsFromType2024' export * from './getSearchableTypes' export * from './types' diff --git a/packages/sanity/src/core/search/common/token.ts b/packages/sanity/src/core/search/common/token.ts new file mode 100644 index 00000000000..4a8ecd01215 --- /dev/null +++ b/packages/sanity/src/core/search/common/token.ts @@ -0,0 +1,49 @@ +const WILDCARD_TOKEN = '*' +const NEGATION_TOKEN = '-' +const TOKEN_REGEX = /(?:[^\s"]+|"[^"]*")+/g + +/** + * @internal + */ +export function isNegationToken(token: string | undefined): boolean { + return typeof token !== 'undefined' && token.trim().at(0) === NEGATION_TOKEN +} + +/** + * @internal + */ +export function isPrefixToken(token: string | undefined): boolean { + return typeof token !== 'undefined' && token.trim().at(-1) === WILDCARD_TOKEN +} + +/** + * @internal + */ +export function isExactMatchToken(token: string | undefined): boolean { + return [token?.at(0), token?.at(-1)].every((character) => character === '"') +} + +/** + * @internal + */ +export function prefixLast(query: string): string { + const tokens = (query.match(TOKEN_REGEX) ?? []).map((token) => token.trim()) + + const finalIncrementalTokenIndex = tokens.findLastIndex( + (token) => !isNegationToken(token) && !isExactMatchToken(token), + ) + + const finalIncrementalToken = tokens[finalIncrementalTokenIndex] + + if (tokens.length === 0) { + return WILDCARD_TOKEN + } + + if (isPrefixToken(finalIncrementalToken) || typeof finalIncrementalToken === 'undefined') { + return tokens.join(' ') + } + + const prefixedTokens = [...tokens] + prefixedTokens.splice(finalIncrementalTokenIndex, 1, `${finalIncrementalToken}${WILDCARD_TOKEN}`) + return prefixedTokens.join(' ') +} diff --git a/packages/sanity/src/core/search/common/types.ts b/packages/sanity/src/core/search/common/types.ts index 38b35536d86..c5ec8a3f44c 100644 --- a/packages/sanity/src/core/search/common/types.ts +++ b/packages/sanity/src/core/search/common/types.ts @@ -93,7 +93,18 @@ export interface WeightedSearchResults { /** * @internal */ -export type SearchStrategyFactory = ( +export interface Groq2024SearchResults { + type: 'groq2024' + hits: SearchHit[] + nextCursor?: string +} + +/** + * @internal + */ +export type SearchStrategyFactory< + TResult extends TextSearchResults | WeightedSearchResults | Groq2024SearchResults, +> = ( types: (SchemaType | CrossDatasetType)[], client: SanityClient, commonOpts: SearchFactoryOptions, diff --git a/packages/sanity/src/core/search/groq2024/createGroq2024Search.ts b/packages/sanity/src/core/search/groq2024/createGroq2024Search.ts new file mode 100644 index 00000000000..05c11c6c194 --- /dev/null +++ b/packages/sanity/src/core/search/groq2024/createGroq2024Search.ts @@ -0,0 +1,74 @@ +import {type CrossDatasetType, type SanityDocumentLike, type SchemaType} from '@sanity/types' +import {map} from 'rxjs' + +import { + type Groq2024SearchResults, + type SearchStrategyFactory, + type SearchTerms, +} from '../common/types' +import {createSearchQuery} from './createSearchQuery' +import {getNextCursor} from './getNextCursor' + +function getSearchTerms( + searchParams: string | SearchTerms, + types: (SchemaType | CrossDatasetType)[], +) { + if (typeof searchParams === 'string') { + return { + query: searchParams, + types: types, + } + } + return searchParams.types.length ? searchParams : {...searchParams, types} +} + +/** + * @internal + */ +export const createGroq2024Search: SearchStrategyFactory = ( + typesFromFactory, + client, + factoryOptions, +) => { + return function search(searchParams, searchOptions = {}) { + const searchTerms = getSearchTerms(searchParams, typesFromFactory) + + const mergedOptions = { + ...factoryOptions, + ...searchOptions, + } + + const {query, params, options, sortOrder} = createSearchQuery( + searchTerms, + searchParams, + mergedOptions, + ) + + return client.observable + .withConfig({ + // The GROQ functions that power `groq2024` are currently only available using API `vX`. + // + // TODO: Switch to stable API version before `groq2024` general availability. + apiVersion: 'vX', + }) + .fetch(query, params, options) + .pipe( + map((hits) => { + const hasNextPage = + typeof searchOptions.limit !== 'undefined' && hits.length > searchOptions.limit + + // Search overfetches by 1 to determine whether there is another page to fetch. Therefore, + // the penultimate result must be used to determine the start of the next page. + const lastResult = hasNextPage ? hits.at(-2) : hits.at(-1) + + return { + type: 'groq2024', + // Search overfetches by 1 to determine whether there is another page to fetch. Therefore, + // exclude the final result if it's beyond the limit. + hits: hits.map((hit) => ({hit})).slice(0, searchOptions.limit), + nextCursor: hasNextPage ? getNextCursor({lastResult, sortOrder}) : undefined, + } + }), + ) + } +} diff --git a/packages/sanity/src/core/search/groq2024/createSearchQuery.test.ts b/packages/sanity/src/core/search/groq2024/createSearchQuery.test.ts new file mode 100644 index 00000000000..1d1943db29f --- /dev/null +++ b/packages/sanity/src/core/search/groq2024/createSearchQuery.test.ts @@ -0,0 +1,285 @@ +import {Schema} from '@sanity/schema' +import {defineArrayMember, defineField, defineType} from '@sanity/types' +import {describe, expect, it} from 'vitest' + +import {DEFAULT_LIMIT} from '../weighted/createSearchQuery' +import {createSearchQuery} from './createSearchQuery' + +const testType = Schema.compile({ + types: [ + defineType({ + name: 'basic-schema-test', + type: 'document', + preview: { + select: { + title: 'title', + }, + }, + fields: [ + defineField({ + name: 'title', + type: 'string', + options: { + search: { + weight: 10, + }, + }, + }), + ], + }), + ], +}).get('basic-schema-test') + +describe('createSearchQuery', () => { + describe('searchTerms', () => { + it('should create query for basic type', () => { + const {query, params} = createSearchQuery( + { + query: 'test', + types: [testType], + }, + '', + ) + + expect(query).toMatchInlineSnapshot( + ` + "// findability-mvi:5 + *[_type in $__types && !(_id in path("versions.**"))] | score(boost(_type in ["basic-schema-test"] && title match text::query($__query), 10), @ match text::query($__query)) | order(_score desc) [_score > 0] [0...$__limit] {_score, _type, _id}" + `, + ) + + expect(params).toEqual({ + __query: '*', + __types: ['basic-schema-test'], + __limit: DEFAULT_LIMIT + 1, + }) + }) + }) + + describe('searchOptions', () => { + it('should include drafts by default', () => { + const {options} = createSearchQuery( + { + query: 'term0', + types: [testType], + }, + '', + ) + + expect(options.perspective).toBe('previewDrafts') + }) + + it('should exclude drafts when configured', () => { + const {options} = createSearchQuery( + { + query: 'term0', + types: [testType], + }, + '', + {includeDrafts: false}, + ) + + expect(options.perspective).toBe('published') + }) + + it('should use provided limit (plus one to determine existence of next page)', () => { + const {params} = createSearchQuery( + { + query: 'term0', + types: [testType], + }, + '', + { + limit: 30, + }, + ) + + expect(params.__limit).toEqual(31) + }) + + it('should add configured filter and params', () => { + const {query, params} = createSearchQuery( + { + query: 'term', + types: [testType], + }, + '', + {filter: 'randomCondition == $customParam', params: {customParam: 'custom'}}, + ) + + expect(query).toContain( + '*[_type in $__types && (randomCondition == $customParam) && !(_id in path("versions.**"))]', + ) + expect(params.customParam).toEqual('custom') + }) + + it('should use configured tag', () => { + const {options} = createSearchQuery( + { + query: 'term', + types: [testType], + }, + '', + {tag: 'customTag'}, + ) + + expect(options.tag).toEqual('customTag') + }) + + it('should use configured sort field and direction', () => { + const {query} = createSearchQuery( + { + query: 'test', + types: [testType], + }, + '', + { + sort: [ + { + direction: 'desc', + field: 'exampleField', + }, + ], + }, + ) + + expect(query).toMatchInlineSnapshot(` + "// findability-mvi:5 + *[_type in $__types && @ match text::query($__query) && !(_id in path("versions.**"))] | order(exampleField desc) [0...$__limit] {exampleField, _type, _id}" + `) + + expect(query).toContain('| order(exampleField desc)') + }) + + it('should use multiple sort fields and directions', () => { + const {query} = createSearchQuery( + { + query: 'test', + types: [testType], + }, + '', + { + sort: [ + { + direction: 'desc', + field: 'exampleField', + }, + { + direction: 'asc', + field: 'anotherExampleField', + }, + { + direction: 'asc', + field: 'mapWithField', + mapWith: 'lower', + }, + ], + }, + ) + + expect(query).toMatchInlineSnapshot(` + "// findability-mvi:5 + *[_type in $__types && @ match text::query($__query) && !(_id in path("versions.**"))] | order(exampleField desc,anotherExampleField asc,lower(mapWithField) asc) [0...$__limit] {exampleField, anotherExampleField, mapWithField, _type, _id}" + `) + + expect(query).toContain( + '| order(exampleField desc,anotherExampleField asc,lower(mapWithField) asc)', + ) + }) + + it('should order results by _score desc if no sort field and direction is configured', () => { + const {query} = createSearchQuery( + { + query: 'test', + types: [testType], + }, + '', + ) + + expect(query).toMatchInlineSnapshot(` + "// findability-mvi:5 + *[_type in $__types && !(_id in path("versions.**"))] | score(boost(_type in ["basic-schema-test"] && title match text::query($__query), 10), @ match text::query($__query)) | order(_score desc) [_score > 0] [0...$__limit] {_score, _type, _id}" + `) + + expect(query).toContain('| order(_score desc)') + }) + + it('should prepend comments (with new lines) if comments is configured', () => { + const {query} = createSearchQuery( + { + query: 'test', + types: [testType], + }, + '', + { + comments: ['foo=1', 'bar'], + }, + ) + const lines = query.split('\n') + expect(lines[0]).toEqual(`// findability-mvi:5`) + expect(lines[1]).toEqual('// foo=1') + expect(lines[2]).toEqual('// bar') + }) + }) + + describe('search config', () => { + it('should handle indexed array fields in an optimized manner', () => { + const {query} = createSearchQuery( + { + query: 'term0 term1', + types: [ + Schema.compile({ + types: [ + defineType({ + name: 'numbers-in-path', + type: 'document', + fields: [ + defineField({ + name: 'cover', + type: 'array', + of: [ + defineArrayMember({ + type: 'object', + fields: [ + defineField({ + name: 'cards', + type: 'array', + of: [ + defineArrayMember({ + type: 'object', + fields: [ + defineField({ + name: 'title', + type: 'string', + options: { + search: { + weight: 5, + }, + }, + }), + ], + }), + ], + }), + ], + }), + ], + }), + ], + }), + ], + }).get('numbers-in-path'), + ], + }, + '', + ) + + expect(query).toMatchInlineSnapshot(` + "// findability-mvi:5 + *[_type in $__types && !(_id in path("versions.**"))] | score(boost(_type in ["numbers-in-path"] && cover[].cards[].title match text::query($__query), 5), @ match text::query($__query)) | order(_score desc) [_score > 0] [0...$__limit] {_score, _type, _id}" + `) + + expect(query).toContain('cover[].cards[].title match text::query($__query), 5)') + }) + }) +}) diff --git a/packages/sanity/src/core/search/groq2024/createSearchQuery.ts b/packages/sanity/src/core/search/groq2024/createSearchQuery.ts new file mode 100644 index 00000000000..04fea50bb77 --- /dev/null +++ b/packages/sanity/src/core/search/groq2024/createSearchQuery.ts @@ -0,0 +1,139 @@ +import {DEFAULT_MAX_FIELD_DEPTH} from '@sanity/schema/_internal' +import {type CrossDatasetType, type SchemaType} from '@sanity/types' +import {groupBy} from 'lodash' + +import {deriveSearchWeightsFromType2024} from '../common/deriveSearchWeightsFromType2024' +import {prefixLast} from '../common/token' +import { + type SearchFactoryOptions, + type SearchOptions, + type SearchSort, + type SearchTerms, +} from '../common/types' + +interface SearchParams extends Record { + __types: string[] + __limit: number + __query: string +} + +const FINDABILITY_MVI = 5 +const DEFAULT_LIMIT = 1_000 + +interface SearchQuery { + query: string + params: SearchParams + options: Record + sortOrder: SearchSort[] +} + +function isSchemaType( + maybeSchemaType: SchemaType | CrossDatasetType | undefined, +): maybeSchemaType is SchemaType { + return typeof maybeSchemaType !== 'undefined' && 'name' in maybeSchemaType +} + +function toOrderClause(orderBy: SearchSort[]): string { + function wrapFieldWithFn(ordering: SearchSort): string { + return ordering.mapWith ? `${ordering.mapWith}(${ordering.field})` : ordering.field + } + + return (orderBy || []) + .map((ordering) => + [wrapFieldWithFn(ordering), (ordering.direction || '').toLowerCase()] + .map((str) => str.trim()) + .filter(Boolean) + .join(' '), + ) + .join(',') +} + +/** + * @internal + */ +export function createSearchQuery( + searchTerms: SearchTerms, + searchParams: string | SearchTerms, + {includeDrafts = true, ...options}: SearchOptions & SearchFactoryOptions = {}, +): SearchQuery { + const specs = searchTerms.types + .map((schemaType) => + deriveSearchWeightsFromType2024({ + schemaType, + maxDepth: options.maxDepth || DEFAULT_MAX_FIELD_DEPTH, + isCrossDataset: options.isCrossDataset, + processPaths: (paths) => paths.filter(({weight}) => weight !== 1), + }), + ) + .filter(({paths}) => paths.length !== 0) + + // Note: Computing this is unnecessary when `!isScored`. + const flattenedSpecs = specs + .map(({typeName, paths}) => paths.map((path) => ({...path, typeName}))) + .flat() + + // Note: Computing this is unnecessary when `!isScored`. + const groupedSpecs = groupBy(flattenedSpecs, (entry) => [entry.path, entry.weight].join(':')) + + const baseMatch = '@ match text::query($__query)' + + // Note: Computing this is unnecessary when `!isScored`. + const score = Object.entries(groupedSpecs) + .flatMap(([, entries]) => { + if (entries.some(({weight}) => weight === 0)) { + return [] + } + return `boost(_type in ${JSON.stringify(entries.map((entry) => entry.typeName))} && ${entries[0].path} match text::query($__query), ${entries[0].weight})` + }) + .concat(baseMatch) + + const sortOrder = options?.sort ?? [{field: '_score', direction: 'desc'}] + const isScored = sortOrder.some(({field}) => field === '_score') + + const filters: string[] = [ + '_type in $__types', + // If the search request doesn't use scoring, directly filter documents. + isScored ? [] : baseMatch, + options.filter ? `(${options.filter})` : [], + searchTerms.filter ? `(${searchTerms.filter})` : [], + '!(_id in path("versions.**"))', + options.cursor ?? [], + ].flat() + + const projectionFields = sortOrder.map(({field}) => field).concat('_type', '_id') + const projection = projectionFields.join(', ') + + const query = [ + `*[${filters.join(' && ')}]`, + isScored ? ['|', `score(${score.join(', ')})`] : [], + ['|', `order(${toOrderClause(sortOrder)})`], + isScored ? `[_score > 0]` : [], + `[0...$__limit]`, + `{${projection}}`, + ] + .flat() + .join(' ') + + const params: SearchParams = { + __types: searchTerms.types.map((type) => (isSchemaType(type) ? type.name : type.type)), + // Overfetch by 1 to determine whether there is another page to fetch. + __limit: (options?.limit ?? DEFAULT_LIMIT) + 1, + __query: prefixLast(typeof searchParams === 'string' ? searchParams : searchParams.query), + ...options.params, + } + + const pragma = [`findability-mvi:${FINDABILITY_MVI}`] + .concat(options?.comments || []) + .map((s) => `// ${s}`) + .join('\n') + + return { + query: [pragma, query].join('\n'), + options: { + tag: options.tag, + perspective: includeDrafts ? 'previewDrafts' : 'published', + }, + params, + sortOrder, + } +} diff --git a/packages/sanity/src/core/search/groq2024/getNextCursor.test.ts b/packages/sanity/src/core/search/groq2024/getNextCursor.test.ts new file mode 100644 index 00000000000..dbcf27331d2 --- /dev/null +++ b/packages/sanity/src/core/search/groq2024/getNextCursor.test.ts @@ -0,0 +1,194 @@ +import {describe, expect, it} from 'vitest' + +import {getCursorPredicate, getNextCursor} from './getNextCursor' + +describe('getNextCursor', () => { + it('returns `undefined` if there is no `lastResult`', () => { + expect( + getNextCursor({ + sortOrder: [ + { + direction: 'asc', + field: 'a', + }, + ], + }), + ).toBeUndefined() + }) + + it('produces the correct cursor for a single sort order', () => { + expect( + getNextCursor({ + lastResult: { + _type: 'value:_type', + _id: 'value:_id', + a: 'value:a', + }, + sortOrder: [ + { + direction: 'asc', + field: 'a', + }, + ], + }), + ).toBe('(a > "value:a") || (a == "value:a" && _id > "value:_id")') + }) + + it('produces the correct cursor for multiple sort orders', () => { + expect( + getNextCursor({ + lastResult: { + _type: 'value:_type', + _id: 'value:_id', + a: 'value:a', + b: 'value:b', + c: 'value:c', + }, + sortOrder: [ + { + direction: 'asc', + field: 'a', + }, + { + direction: 'asc', + field: 'b', + }, + { + direction: 'desc', + field: 'c', + }, + ], + }), + ).toBe( + '(a > "value:a") || (a == "value:a" && b > "value:b") || (a == "value:a" && b == "value:b" && c < "value:c") || (a == "value:a" && b == "value:b" && c == "value:c" && _id > "value:_id")', + ) + }) + + it('uses `_id` as a tiebreaker', () => { + expect( + getNextCursor({ + lastResult: { + _type: 'value:_type', + _id: 'value:_id', + a: 'value:a', + }, + sortOrder: [ + { + direction: 'asc', + field: 'a', + }, + ], + }), + ).toBe('(a > "value:a") || (a == "value:a" && _id > "value:_id")') + }) +}) + +it('does not uses `_id` as a tiebreaker if it appears in the user-provided sort orders', () => { + expect( + getNextCursor({ + lastResult: { + _type: 'value:_type', + _id: 'value:_id', + a: 'value:a', + }, + sortOrder: [ + { + direction: 'desc', + field: '_id', + }, + ], + }), + ).toBe('(_id < "value:_id")') + + expect( + getNextCursor({ + lastResult: { + _type: 'value:_type', + _id: 'value:_id', + a: 'value:a', + b: 'value:b', + }, + sortOrder: [ + { + direction: 'asc', + field: '_id', + }, + { + direction: 'desc', + field: 'a', + }, + { + direction: 'asc', + field: 'b', + }, + ], + }), + ).toBe('(_id > "value:_id") || (a < "value:a") || (a == "value:a" && b > "value:b")') +}) + +describe('getCursorPredicate', () => { + it('uses the `>` comparator when sort is `asc`', () => { + expect( + getCursorPredicate( + { + direction: 'asc', + field: 'a', + }, + { + _type: 'value:_type', + _id: 'value:_id', + a: 'value:a', + }, + ), + ).toBe(`a > "value:a"`) + }) + + it('uses the `<` comparator when sort is `desc`', () => { + expect( + getCursorPredicate( + { + direction: 'desc', + field: 'a', + }, + { + _type: 'value:_type', + _id: 'value:_id', + a: 'value:a', + }, + ), + ).toBe(`a < "value:a"`) + }) + + it('allows the comparator to be overridden', () => { + expect( + getCursorPredicate( + { + direction: 'asc', + field: 'a', + }, + { + _type: 'value:_type', + _id: 'value:_id', + a: 'value:a', + }, + '==', + ), + ).toBe(`a == "value:a"`) + }) + + it('returns `undefined` when comparing equality of unique field', () => { + expect( + getCursorPredicate( + { + direction: 'asc', + field: '_id', + }, + { + _type: 'value:_type', + _id: 'value:_id', + }, + '==', + ), + ).toBeUndefined() + }) +}) diff --git a/packages/sanity/src/core/search/groq2024/getNextCursor.ts b/packages/sanity/src/core/search/groq2024/getNextCursor.ts new file mode 100644 index 00000000000..42ad5793dec --- /dev/null +++ b/packages/sanity/src/core/search/groq2024/getNextCursor.ts @@ -0,0 +1,57 @@ +import {type SanityDocumentLike} from '@sanity/types' + +import {type SearchSort, type SortDirection} from '../common/types' + +/** + * @internal + */ +export function getNextCursor({ + lastResult, + sortOrder, +}: { + lastResult?: SanityDocumentLike + sortOrder: SearchSort[] +}): string | undefined { + if (!lastResult) { + return undefined + } + + const hasIdSort = sortOrder.some(({field}) => field === '_id') + + return ( + sortOrder + // Content Lake always orders by `_id asc` as a tiebreaker. + .concat(hasIdSort ? [] : {field: '_id', direction: 'asc'}) + .reduce((cursor, sortEntry, index) => { + const nextPredicate = sortOrder + .slice(0, index) + .map((previousSortEntry) => getCursorPredicate(previousSortEntry, lastResult, '==')) + .concat(getCursorPredicate(sortEntry, lastResult)) + .filter((predicate) => typeof predicate !== 'undefined') + .join(' && ') + + return [cursor, `(${nextPredicate})`] + .filter((segment) => typeof segment !== 'undefined') + .join(' || ') + }, undefined) + ) +} + +const sortComparators: Record' | '<'> = { + asc: '>', + desc: '<', +} + +/** + * @internal + */ +export function getCursorPredicate( + sort: SearchSort, + lastEntry: SanityDocumentLike, + comparator: '>' | '<' | '==' = sortComparators[sort.direction], +): string | undefined { + if (sort.field == '_id' && comparator == '==') { + return undefined + } + return [sort.field, comparator, JSON.stringify(lastEntry[sort.field])].join(' ') +} diff --git a/packages/sanity/src/core/search/groq2024/index.ts b/packages/sanity/src/core/search/groq2024/index.ts new file mode 100644 index 00000000000..63755a3c0b6 --- /dev/null +++ b/packages/sanity/src/core/search/groq2024/index.ts @@ -0,0 +1 @@ +export {createGroq2024Search} from './createGroq2024Search' diff --git a/packages/sanity/src/core/search/search.ts b/packages/sanity/src/core/search/search.ts index a877c4c20e3..bfdb6731c48 100644 --- a/packages/sanity/src/core/search/search.ts +++ b/packages/sanity/src/core/search/search.ts @@ -1,26 +1,30 @@ import {type SearchStrategy} from '@sanity/types' import { + type Groq2024SearchResults, type SearchStrategyFactory, type TextSearchResults, type WeightedSearchResults, } from './common' +import {createGroq2024Search} from './groq2024' import {createTextSearch} from './text-search' import {createWeightedSearch} from './weighted' const searchStrategies = { groqLegacy: createWeightedSearch, textSearch: createTextSearch, -} satisfies Record> + groq2024: createGroq2024Search, +} satisfies Record< + SearchStrategy, + SearchStrategyFactory +> const DEFAULT_SEARCH_STRATEGY: SearchStrategy = 'groqLegacy' /** @internal */ -export const createSearch: SearchStrategyFactory = ( - searchableTypes, - client, - options, -) => { +export const createSearch: SearchStrategyFactory< + TextSearchResults | WeightedSearchResults | Groq2024SearchResults +> = (searchableTypes, client, options) => { const factory = searchStrategies[options.strategy ?? DEFAULT_SEARCH_STRATEGY] return factory(searchableTypes, client, options) } diff --git a/packages/sanity/src/core/search/text-search/createTextSearch.test.ts b/packages/sanity/src/core/search/text-search/createTextSearch.test.ts index c169a4fc9fb..6f807f478a8 100644 --- a/packages/sanity/src/core/search/text-search/createTextSearch.test.ts +++ b/packages/sanity/src/core/search/text-search/createTextSearch.test.ts @@ -2,15 +2,7 @@ import {Schema} from '@sanity/schema' import {defineField, defineType} from '@sanity/types' import {describe, expect, it} from 'vitest' -import { - getDocumentTypeConfiguration, - getOrder, - getQueryString, - isExactMatchToken, - isNegationToken, - isPrefixToken, - prefixLast, -} from './createTextSearch' +import {getDocumentTypeConfiguration, getOrder, getQueryString} from './createTextSearch' const testType = Schema.compile({ types: [ @@ -243,65 +235,3 @@ describe('getQueryString', () => { expect(getQueryString('', {queryType: 'prefixNone'})).toEqual('') }) }) - -describe('isNegationToken', () => { - it('identifies negation tokens', () => { - expect(isNegationToken('-test')).toBe(true) - expect(isNegationToken('--')).toBe(true) - expect(isNegationToken('test')).toBe(false) - expect(isNegationToken('test-')).toBe(false) - expect(isNegationToken(undefined)).toBe(false) - }) -}) - -describe('isPrefixToken', () => { - it('identifies prefix tokens', () => { - expect(isPrefixToken('test*')).toBe(true) - expect(isPrefixToken('test')).toBe(false) - expect(isPrefixToken('*test')).toBe(false) - expect(isPrefixToken(undefined)).toBe(false) - }) -}) - -describe('prefixLast', () => { - it('transforms the final non-negation token into a wildcard prefix', () => { - expect(prefixLast('a')).toBe('a*') - expect(prefixLast('a b')).toBe('a b*') - expect(prefixLast('a -b')).toBe('a* -b') - expect(prefixLast('a "bc" d')).toBe('a "bc" d*') - expect(prefixLast('ab "cd"')).toBe('ab* "cd"') - expect(prefixLast('a --')).toBe('a* --') - }) - - it('does not transform the final non-negation token if it is already a wildcard prefix', () => { - expect(prefixLast('a*')).toBe('a*') - expect(prefixLast('a* -b')).toBe('a* -b') - }) - - it('does not transform any tokens if only negation tokens are present', () => { - expect(prefixLast('-a -b')).toBe('-a -b') - expect(prefixLast('--')).toBe('--') - }) - - it('trims tokens', () => { - expect(prefixLast('a "ab c" d')).toBe('a "ab c" d*') - }) - - it('preserves quoted tokens', () => { - expect(prefixLast('"a b" c d')).toBe('"a b" c d*') - expect(prefixLast('"a b" c d "ef" "g "')).toBe('"a b" c d* "ef" "g "') - expect(prefixLast('"a " b" c d')).toBe('"a " b c d*') - }) -}) - -describe('isExactMatchToken', () => { - it('recognises that a token is encased in quote marks', () => { - expect(isExactMatchToken(undefined)).toBe(false) - expect(isExactMatchToken('"a"')).toBe(true) - expect(isExactMatchToken('"a b"')).toBe(true) - expect(isExactMatchToken('"a')).toBe(false) - expect(isExactMatchToken('a"')).toBe(false) - expect(isExactMatchToken('"a b')).toBe(false) - expect(isExactMatchToken('a b"')).toBe(false) - }) -}) diff --git a/packages/sanity/src/core/search/text-search/createTextSearch.ts b/packages/sanity/src/core/search/text-search/createTextSearch.ts index a6f7d6ab166..8a4756f1953 100644 --- a/packages/sanity/src/core/search/text-search/createTextSearch.ts +++ b/packages/sanity/src/core/search/text-search/createTextSearch.ts @@ -16,11 +16,9 @@ import { type TextSearchResponse, type TextSearchResults, } from '../common' +import {prefixLast} from '../common/token' const DEFAULT_LIMIT = 1000 -const WILDCARD_TOKEN = '*' -const NEGATION_TOKEN = '-' -const TOKEN_REGEX = /(?:[^\s"]+|"[^"]*")+/g function normalizeSearchTerms( searchParams: string | SearchTerms, @@ -86,40 +84,6 @@ export function getOrder(sort: SearchSort[] = []): TextSearchOrder[] { ) } -export function isNegationToken(token: string | undefined): boolean { - return typeof token !== 'undefined' && token.trim().at(0) === NEGATION_TOKEN -} - -export function isPrefixToken(token: string | undefined): boolean { - return typeof token !== 'undefined' && token.trim().at(-1) === WILDCARD_TOKEN -} - -export function isExactMatchToken(token: string | undefined): boolean { - return [token?.at(0), token?.at(-1)].every((character) => character === '"') -} - -export function prefixLast(query: string): string { - const tokens = (query.match(TOKEN_REGEX) ?? []).map((token) => token.trim()) - - const finalIncrementalTokenIndex = tokens.findLastIndex( - (token) => !isNegationToken(token) && !isExactMatchToken(token), - ) - - const finalIncrementalToken = tokens[finalIncrementalTokenIndex] - - if (tokens.length === 0) { - return WILDCARD_TOKEN - } - - if (isPrefixToken(finalIncrementalToken) || typeof finalIncrementalToken === 'undefined') { - return tokens.join(' ') - } - - const prefixedTokens = [...tokens] - prefixedTokens.splice(finalIncrementalTokenIndex, 1, `${finalIncrementalToken}${WILDCARD_TOKEN}`) - return prefixedTokens.join(' ') -} - export function getQueryString( query: string, {queryType = 'prefixLast'}: Pick, diff --git a/packages/sanity/src/core/studio/components/navbar/search/components/searchResults/SearchResults.tsx b/packages/sanity/src/core/studio/components/navbar/search/components/searchResults/SearchResults.tsx index b94726dd501..695cc7c1d41 100644 --- a/packages/sanity/src/core/studio/components/navbar/search/components/searchResults/SearchResults.tsx +++ b/packages/sanity/src/core/studio/components/navbar/search/components/searchResults/SearchResults.tsx @@ -17,8 +17,8 @@ import {type ItemSelectHandler, SearchResultItem} from './item/SearchResultItem' const VIRTUAL_LIST_SEARCH_RESULT_ITEM_HEIGHT = 57 // px const VIRTUAL_LIST_OVERSCAN = 4 -const SearchResultsInnerFlex = styled(Flex)<{$loading: boolean}>` - opacity: ${({$loading}) => ($loading ? 0.5 : 1)}; +const SearchResultsInnerFlex = styled(Flex)<{$loadingFirstPage: boolean}>` + opacity: ${({$loadingFirstPage}) => ($loadingFirstPage ? 0.5 : 1)}; overflow-x: hidden; overflow-y: auto; position: relative; @@ -37,7 +37,7 @@ export function SearchResults({disableIntentLink, inputElement, onItemSelect}: S dispatch, onClose, setSearchCommandList, - state: {debug, filters, fullscreen, lastActiveIndex, result, terms}, + state: {debug, filters, fullscreen, lastActiveIndex, result, terms, cursor}, } = useSearchState() const {t} = useTranslation() const recentSearchesStore = useRecentSearchesStore() @@ -90,7 +90,11 @@ export function SearchResults({disableIntentLink, inputElement, onItemSelect}: S {hasSearchResults && } {/* Results */} - + {hasError ? ( ) : ( diff --git a/packages/sanity/src/core/studio/components/navbar/search/contexts/search/SearchProvider.tsx b/packages/sanity/src/core/studio/components/navbar/search/contexts/search/SearchProvider.tsx index e0bc413acb8..ff40f92eeb6 100644 --- a/packages/sanity/src/core/studio/components/navbar/search/contexts/search/SearchProvider.tsx +++ b/packages/sanity/src/core/studio/components/navbar/search/contexts/search/SearchProvider.tsx @@ -129,7 +129,8 @@ export function SearchProvider({children, fullscreen}: SearchProviderProps) { `findability-source: global`, `findability-filter-count:${completeFilters.length}`, ], - limit: SEARCH_LIMIT, + // `groq2024` supports pagination. Therefore, fetch fewer results. + limit: strategy === 'groq2024' ? 25 : SEARCH_LIMIT, skipSortByScore: ordering.ignoreScore, ...(ordering.sort ? {sort: [ordering.sort]} : {}), cursor: cursor || undefined, @@ -142,7 +143,9 @@ export function SearchProvider({children, fullscreen}: SearchProviderProps) { }) // Update previousCursorRef snapshot only on a valid search request - previousCursorRef.current = cursor + if (cursorChanged) { + previousCursorRef.current = cursor + } } // Update snapshots, even if no search request was executed @@ -158,6 +161,7 @@ export function SearchProvider({children, fullscreen}: SearchProviderProps) { searchState.terms, terms, cursor, + strategy, ]) /** diff --git a/packages/sanity/src/core/studio/components/navbar/search/contexts/search/reducer.test.ts b/packages/sanity/src/core/studio/components/navbar/search/contexts/search/reducer.test.ts index 1911f45ae01..16c73916b67 100644 --- a/packages/sanity/src/core/studio/components/navbar/search/contexts/search/reducer.test.ts +++ b/packages/sanity/src/core/studio/components/navbar/search/contexts/search/reducer.test.ts @@ -169,7 +169,81 @@ describe('searchReducer', () => { `) }) - it('should merge results after fetching an additional page', () => { + it('should merge results after fetching an additional page when using `textSearch` strategy', () => { + const {result} = renderHook(() => + useReducer(searchReducer, {...initialState, strategy: 'textSearch'}), + ) + const [, dispatch] = result.current + + act(() => + dispatch({ + type: 'SEARCH_REQUEST_COMPLETE', + nextCursor: 'cursorA', + hits: [ + { + hit: { + _type: 'person', + _id: 'personA', + }, + }, + { + hit: { + _type: 'person', + _id: 'personB', + }, + }, + ], + }), + ) + + act(() => + dispatch({ + type: 'SEARCH_REQUEST_COMPLETE', + nextCursor: undefined, + hits: [ + { + hit: { + _type: 'person', + _id: 'personB', + }, + }, + { + hit: { + _type: 'person', + _id: 'personC', + }, + }, + ], + }), + ) + + const [state] = result.current + + expect(state.result.hits).toMatchInlineSnapshot(` + [ + { + "hit": { + "_id": "personA", + "_type": "person", + }, + }, + { + "hit": { + "_id": "personB", + "_type": "person", + }, + }, + { + "hit": { + "_id": "personC", + "_type": "person", + }, + }, + ] + `) + }) + + it('should not merge results after fetching an additional page when not using `textSearch` strategy', () => { const {result} = renderHook(() => useReducer(searchReducer, initialState)) const [, dispatch] = result.current @@ -231,6 +305,12 @@ describe('searchReducer', () => { "_type": "person", }, }, + { + "hit": { + "_id": "personB", + "_type": "person", + }, + }, { "hit": { "_id": "personC", diff --git a/packages/sanity/src/core/studio/components/navbar/search/contexts/search/reducer.ts b/packages/sanity/src/core/studio/components/navbar/search/contexts/search/reducer.ts index 6786d5fd710..06a3b33972c 100644 --- a/packages/sanity/src/core/studio/components/navbar/search/contexts/search/reducer.ts +++ b/packages/sanity/src/core/studio/components/navbar/search/contexts/search/reducer.ts @@ -225,7 +225,7 @@ export function searchReducer(state: SearchReducerState, action: SearchAction): error: null, hasLocal: true, hits: state.result.hasLocal - ? deduplicate([...state.result.hits, ...action.hits]) + ? deduplicate([...state.result.hits, ...action.hits], state) : action.hits, loaded: true, loading: false, @@ -597,7 +597,11 @@ function stripRecent(terms: RecentSearch | SearchTerms) { * * Note that should any result appear again in subsequent pages, its first instance will be removed. */ -function deduplicate(hits: SearchHit[]): SearchHit[] { +function deduplicate(hits: SearchHit[], {strategy}: {strategy?: SearchStrategy}): SearchHit[] { + if (strategy !== 'textSearch') { + return hits + } + const hitsById = hits.reduce((map, hit) => { const id = getPublishedId(hit.hit._id)