Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add groq2024 search strategy #7838

Merged
merged 5 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion dev/test-studio/sanity.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ const sharedSettings = definePlugin({
enabled: true,
},
},

search: {
strategy: 'groq2024',
},
document: {
actions: documentActions,
inspectors: (prev, ctx) => {
Expand Down
2 changes: 1 addition & 1 deletion packages/@sanity/types/src/search/types.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* @public
*/
export const searchStrategies = ['groqLegacy', 'textSearch'] as const
export const searchStrategies = ['groqLegacy', 'textSearch', 'groq2024'] as const

/**
* @public
Expand Down
2 changes: 2 additions & 0 deletions packages/sanity/src/core/config/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,8 @@ export interface PluginOptions {
* - `"groqLegacy"` (default): Use client-side tokenization and schema introspection to search
* using the GROQ Query API.
* - `"textSearch"` (deprecated): Perform full text searching using the Text Search API.
* - `"groq2024"`: (experimental) Perform full text searching using the GROQ Query API and its
* new `text::matchQuery` function.
*/
strategy?: SearchStrategy

Expand Down
65 changes: 65 additions & 0 deletions packages/sanity/src/core/search/common/__tests__/tokens.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import {describe, expect, it} from 'vitest'

import {isExactMatchToken, isNegationToken, isPrefixToken, prefixLast} from '../token'

describe('isNegationToken', () => {
it('identifies negation tokens', () => {
expect(isNegationToken('-test')).toBe(true)
expect(isNegationToken('--')).toBe(true)
expect(isNegationToken('test')).toBe(false)
expect(isNegationToken('test-')).toBe(false)
expect(isNegationToken(undefined)).toBe(false)
})
})

describe('isPrefixToken', () => {
it('identifies prefix tokens', () => {
expect(isPrefixToken('test*')).toBe(true)
expect(isPrefixToken('test')).toBe(false)
expect(isPrefixToken('*test')).toBe(false)
expect(isPrefixToken(undefined)).toBe(false)
})
})

describe('prefixLast', () => {
it('transforms the final non-negation token into a wildcard prefix', () => {
expect(prefixLast('a')).toBe('a*')
expect(prefixLast('a b')).toBe('a b*')
expect(prefixLast('a -b')).toBe('a* -b')
expect(prefixLast('a "bc" d')).toBe('a "bc" d*')
expect(prefixLast('ab "cd"')).toBe('ab* "cd"')
expect(prefixLast('a --')).toBe('a* --')
})

it('does not transform the final non-negation token if it is already a wildcard prefix', () => {
expect(prefixLast('a*')).toBe('a*')
expect(prefixLast('a* -b')).toBe('a* -b')
})

it('does not transform any tokens if only negation tokens are present', () => {
expect(prefixLast('-a -b')).toBe('-a -b')
expect(prefixLast('--')).toBe('--')
})

it('trims tokens', () => {
expect(prefixLast('a "ab c" d')).toBe('a "ab c" d*')
})

it('preserves quoted tokens', () => {
expect(prefixLast('"a b" c d')).toBe('"a b" c d*')
expect(prefixLast('"a b" c d "ef" "g "')).toBe('"a b" c d* "ef" "g "')
expect(prefixLast('"a " b" c d')).toBe('"a " b c d*')
})
})

describe('isExactMatchToken', () => {
it('recognises that a token is encased in quote marks', () => {
expect(isExactMatchToken(undefined)).toBe(false)
expect(isExactMatchToken('"a"')).toBe(true)
expect(isExactMatchToken('"a b"')).toBe(true)
expect(isExactMatchToken('"a')).toBe(false)
expect(isExactMatchToken('a"')).toBe(false)
expect(isExactMatchToken('"a b')).toBe(false)
expect(isExactMatchToken('a b"')).toBe(false)
})
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
import {
type CrossDatasetType,
type SchemaType,
type SearchConfiguration,
type SlugSchemaType,
} from '@sanity/types'
import {toString as pathToString} from '@sanity/util/paths'

import {isRecord} from '../../util'
import {type SearchPath, type SearchSpec} from './types'

interface SearchWeightEntry {
path: string
weight: number
type?: 'string' | 'pt'
}

const CACHE = new WeakMap<SchemaType | CrossDatasetType, SearchSpec>()

const PREVIEW_FIELD_WEIGHT_MAP = {
title: 10,
subtitle: 5,
description: 1.5,
}
const BASE_WEIGHTS: Record<string, Omit<SearchWeightEntry, 'path'>> = {
_id: {weight: 1},
_type: {weight: 1},
}

// Any object type whose fields should not be considered for custom weighting.
//
// Search may still match on their fields, but will not traverse their schema to find custom
// weights.
//
// Some types, such as `slug`, may instead determine weights using a specialised implementation.
const ignoredBuiltInObjectTypes = ['reference', 'crossDatasetReference', 'slug']

const getTypeChain = (type: SchemaType | undefined): SchemaType[] =>
type ? [type, ...getTypeChain(type.type)] : []

const isPtField = (type: SchemaType | undefined) =>
type?.jsonType === 'array' &&
type.of.some((arrType) => getTypeChain(arrType).some(({name}) => name === 'block'))

const isStringField = (schemaType: SchemaType | undefined): boolean =>
schemaType ? schemaType?.jsonType === 'string' : false

const isSlugField = (schemaType: SchemaType | undefined): schemaType is SlugSchemaType => {
const typeChain = getTypeChain(schemaType)
return typeChain.some(({jsonType, name}) => jsonType === 'object' && name === 'slug')
}

const isSearchConfiguration = (options: unknown): options is SearchConfiguration =>
isRecord(options) && 'search' in options && isRecord(options.search)

function isSchemaType(input: SchemaType | CrossDatasetType | undefined): input is SchemaType {
return typeof input !== 'undefined' && 'name' in input
}

function getFullyQualifiedPath(schemaType: SchemaType, path: string): string {
// Slug field weights should be applied to the object's `current` field.
if (isSlugField(schemaType)) {
return [path, 'current'].join('.')
}

return path
}

function getLeafWeights(
schemaType: SchemaType | CrossDatasetType | undefined,
maxDepth: number,
getWeight: (schemaType: SchemaType, path: string) => number | null,
): Record<string, SearchWeightEntry> {
function traverse(
type: SchemaType | undefined,
path: string,
depth: number,
): SearchWeightEntry[] {
if (!type) return []
if (depth > maxDepth) return []

const typeChain = getTypeChain(type)

if (isStringField(type) || isPtField(type)) {
const weight = getWeight(type, path)

if (typeof weight !== 'number') return []
return [{path, weight}]
}

if (isSlugField(type)) {
const weight = getWeight(type, path)
if (typeof weight !== 'number') return []
return [{path: getFullyQualifiedPath(type, path), weight}]
}

const results: SearchWeightEntry[] = []

const objectTypes = typeChain.filter(
(t): t is Extract<SchemaType, {jsonType: 'object'}> =>
t.jsonType === 'object' &&
!!t.fields?.length &&
!ignoredBuiltInObjectTypes.includes(t.name),
)
for (const objectType of objectTypes) {
for (const field of objectType.fields) {
const nextPath = pathToString([path, field.name].filter(Boolean))
results.push(...traverse(field.type, nextPath, depth + 1))
}
}

const arrayTypes = typeChain.filter(
(t): t is Extract<SchemaType, {jsonType: 'array'}> =>
t.jsonType === 'array' && !!t.of?.length,
)
for (const arrayType of arrayTypes) {
for (const arrayItemType of arrayType.of) {
const nextPath = `${path}[]`
results.push(...traverse(arrayItemType, nextPath, depth + 1))
}
}

return results
}

// Cross Dataset Reference are not part of the schema, so we should not attempt to reconcile them.
if (!isSchemaType(schemaType)) {
return {}
}

return traverse(schemaType, '', 0).reduce<Record<string, SearchWeightEntry>>(
(acc, {path, weight, type}) => {
acc[path] = {weight, type, path}
return acc
},
{},
)
}

const getUserSetWeight = (schemaType: SchemaType) => {
const searchOptions = getTypeChain(schemaType)
.map((type) => type.options)
.find(isSearchConfiguration)

return typeof searchOptions?.search?.weight === 'number' ? searchOptions.search.weight : null
}

const getHiddenWeight = (schemaType: SchemaType) => {
const hidden = getTypeChain(schemaType).some((type) => type.hidden)
return hidden ? 0 : null
}

const getDefaultWeights = (schemaType: SchemaType) => {
// if there is no user set weight or a `0` weight due to be hidden,
// then we can return the default weight of `1`
const result = getUserSetWeight(schemaType) ?? getHiddenWeight(schemaType)
return typeof result === 'number' ? null : 1
}

const getPreviewWeights = (
schemaType: SchemaType | CrossDatasetType | undefined,
maxDepth: number,
isCrossDataset?: boolean,
): Record<string, SearchWeightEntry> | null => {
const select = schemaType?.preview?.select
if (!select) return null

const selectionKeysBySelectionPath = Object.fromEntries(
Object.entries(select).map(([selectionKey, selectionPath]) => [
// replace indexed paths with `[]`
// e.g. `arrayOfObjects.0.myField` becomes `arrayOfObjects[].myField`
selectionPath.replace(/\.\d+/g, '[]'),
selectionKey,
]),
)

const defaultWeights = getLeafWeights(schemaType, maxDepth, getDefaultWeights)
const nestedWeightsBySelectionPath = Object.fromEntries(
Object.entries(defaultWeights)
.map(([path, {type}]) => ({path, type}))
.filter(({path}) => selectionKeysBySelectionPath[path])
.map(({path, type}) => [
path,
{
type,
weight:
PREVIEW_FIELD_WEIGHT_MAP[
selectionKeysBySelectionPath[path] as keyof typeof PREVIEW_FIELD_WEIGHT_MAP
],
},
]),
)

if (isCrossDataset) {
return Object.fromEntries(
Object.entries(selectionKeysBySelectionPath).map(([path, previewFieldName]) => {
return [
path,
{
path,
type: 'string',
weight:
PREVIEW_FIELD_WEIGHT_MAP[previewFieldName as keyof typeof PREVIEW_FIELD_WEIGHT_MAP],
},
]
}),
)
}

return getLeafWeights(schemaType, maxDepth, (type, path) => {
const nested = nestedWeightsBySelectionPath[getFullyQualifiedPath(type, path)]
return nested ? nested.weight : null
})
}

interface DeriveSearchWeightsFromTypeOptions {
schemaType: SchemaType | CrossDatasetType
maxDepth: number
isCrossDataset?: boolean
processPaths?: (paths: SearchPath[]) => SearchPath[]
}

export function deriveSearchWeightsFromType2024({
schemaType,
maxDepth,
isCrossDataset,
processPaths = (paths) => paths,
}: DeriveSearchWeightsFromTypeOptions): SearchSpec {
const cached = CACHE.get(schemaType)
if (cached) return cached

const userSetWeights = getLeafWeights(schemaType, maxDepth, getUserSetWeight)
const hiddenWeights = getLeafWeights(schemaType, maxDepth, getHiddenWeight)
const defaultWeights = getLeafWeights(schemaType, maxDepth, getDefaultWeights)
const previewWeights = getPreviewWeights(schemaType, maxDepth, isCrossDataset)

const weights: Record<string, Omit<SearchWeightEntry, 'path'>> = {
...BASE_WEIGHTS,
...defaultWeights,
...hiddenWeights,
...previewWeights,
...userSetWeights,
}

const result = {
typeName: isSchemaType(schemaType) ? schemaType.name : schemaType.type,
paths: processPaths(
Object.entries(weights).map(([path, {weight}]) => ({
path,
weight,
})),
),
}

CACHE.set(schemaType, result)
return result
}
1 change: 1 addition & 0 deletions packages/sanity/src/core/search/common/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
export * from './deriveSearchWeightsFromType'
export * from './deriveSearchWeightsFromType2024'
export * from './getSearchableTypes'
export * from './types'
Loading
Loading