sanity-io · juice49 · Dec 10, 2024 · Dec 9, 2024 · Nov 25, 2024 · Nov 27, 2024
@@ -78,7 +78,9 @@ const sharedSettings = definePlugin({
       enabled: true,
     },
   },
-
+  search: {
+    strategy: 'groq2024',
+  },
   document: {
     actions: documentActions,
     inspectors: (prev, ctx) => {

@@ -1,7 +1,7 @@
 /**
  * @public
  */
-export const searchStrategies = ['groqLegacy', 'textSearch'] as const
+export const searchStrategies = ['groqLegacy', 'textSearch', 'groq2024'] as const
 
 /**
  * @public

@@ -396,6 +396,8 @@ export interface PluginOptions {
      * - `"groqLegacy"` (default): Use client-side tokenization and schema introspection to search
      *   using the GROQ Query API.
      * - `"textSearch"` (deprecated): Perform full text searching using the Text Search API.
+     * - `"groq2024"`: (experimental) Perform full text searching using the GROQ Query API and its
+     *   new `text::matchQuery` function.
      */
     strategy?: SearchStrategy
 

@@ -0,0 +1,65 @@
+import {describe, expect, it} from 'vitest'
+
+import {isExactMatchToken, isNegationToken, isPrefixToken, prefixLast} from '../token'
+
+describe('isNegationToken', () => {
+  it('identifies negation tokens', () => {
+    expect(isNegationToken('-test')).toBe(true)
+    expect(isNegationToken('--')).toBe(true)
+    expect(isNegationToken('test')).toBe(false)
+    expect(isNegationToken('test-')).toBe(false)
+    expect(isNegationToken(undefined)).toBe(false)
+  })
+})
+
+describe('isPrefixToken', () => {
+  it('identifies prefix tokens', () => {
+    expect(isPrefixToken('test*')).toBe(true)
+    expect(isPrefixToken('test')).toBe(false)
+    expect(isPrefixToken('*test')).toBe(false)
+    expect(isPrefixToken(undefined)).toBe(false)
+  })
+})
+
+describe('prefixLast', () => {
+  it('transforms the final non-negation token into a wildcard prefix', () => {
+    expect(prefixLast('a')).toBe('a*')
+    expect(prefixLast('a b')).toBe('a b*')
+    expect(prefixLast('a -b')).toBe('a* -b')
+    expect(prefixLast('a "bc" d')).toBe('a "bc" d*')
+    expect(prefixLast('ab "cd"')).toBe('ab* "cd"')
+    expect(prefixLast('a --')).toBe('a* --')
+  })
+
+  it('does not transform the final non-negation token if it is already a wildcard prefix', () => {
+    expect(prefixLast('a*')).toBe('a*')
+    expect(prefixLast('a* -b')).toBe('a* -b')
+  })
+
+  it('does not transform any tokens if only negation tokens are present', () => {
+    expect(prefixLast('-a -b')).toBe('-a -b')
+    expect(prefixLast('--')).toBe('--')
+  })
+
+  it('trims tokens', () => {
+    expect(prefixLast('a   "ab   c"   d')).toBe('a "ab   c" d*')
+  })
+
+  it('preserves quoted tokens', () => {
+    expect(prefixLast('"a b" c d')).toBe('"a b" c d*')
+    expect(prefixLast('"a   b"   c d  "ef" "g  "')).toBe('"a   b" c d* "ef" "g  "')
+    expect(prefixLast('"a " b" c d')).toBe('"a " b c d*')
+  })
+})
+
+describe('isExactMatchToken', () => {
+  it('recognises that a token is encased in quote marks', () => {
+    expect(isExactMatchToken(undefined)).toBe(false)
+    expect(isExactMatchToken('"a"')).toBe(true)
+    expect(isExactMatchToken('"a b"')).toBe(true)
+    expect(isExactMatchToken('"a')).toBe(false)
+    expect(isExactMatchToken('a"')).toBe(false)
+    expect(isExactMatchToken('"a b')).toBe(false)
+    expect(isExactMatchToken('a b"')).toBe(false)
+  })
+})
@@ -0,0 +1,257 @@
+import {
+  type CrossDatasetType,
+  type SchemaType,
+  type SearchConfiguration,
+  type SlugSchemaType,
+} from '@sanity/types'
+import {toString as pathToString} from '@sanity/util/paths'
+
+import {isRecord} from '../../util'
+import {type SearchPath, type SearchSpec} from './types'
+
+interface SearchWeightEntry {
+  path: string
+  weight: number
+  type?: 'string' | 'pt'
+}
+
+const CACHE = new WeakMap<SchemaType | CrossDatasetType, SearchSpec>()
+
+const PREVIEW_FIELD_WEIGHT_MAP = {
+  title: 10,
+  subtitle: 5,
+  description: 1.5,
+}
+const BASE_WEIGHTS: Record<string, Omit<SearchWeightEntry, 'path'>> = {
+  _id: {weight: 1},
+  _type: {weight: 1},
+}
+
+// Any object type whose fields should not be considered for custom weighting.
+//
+// Search may still match on their fields, but will not traverse their schema to find custom
+// weights.
+//
+// Some types, such as `slug`, may instead determine weights using a specialised implementation.
+const ignoredBuiltInObjectTypes = ['reference', 'crossDatasetReference', 'slug']
+
+const getTypeChain = (type: SchemaType | undefined): SchemaType[] =>
+  type ? [type, ...getTypeChain(type.type)] : []
+
+const isPtField = (type: SchemaType | undefined) =>
+  type?.jsonType === 'array' &&
+  type.of.some((arrType) => getTypeChain(arrType).some(({name}) => name === 'block'))
+
+const isStringField = (schemaType: SchemaType | undefined): boolean =>
+  schemaType ? schemaType?.jsonType === 'string' : false
+
+const isSlugField = (schemaType: SchemaType | undefined): schemaType is SlugSchemaType => {
+  const typeChain = getTypeChain(schemaType)
+  return typeChain.some(({jsonType, name}) => jsonType === 'object' && name === 'slug')
+}
+
+const isSearchConfiguration = (options: unknown): options is SearchConfiguration =>
+  isRecord(options) && 'search' in options && isRecord(options.search)
+
+function isSchemaType(input: SchemaType | CrossDatasetType | undefined): input is SchemaType {
+  return typeof input !== 'undefined' && 'name' in input
+}
+
+function getFullyQualifiedPath(schemaType: SchemaType, path: string): string {
+  // Slug field weights should be applied to the object's `current` field.
+  if (isSlugField(schemaType)) {
+    return [path, 'current'].join('.')
+  }
+
+  return path
+}
+
+function getLeafWeights(
+  schemaType: SchemaType | CrossDatasetType | undefined,
+  maxDepth: number,
+  getWeight: (schemaType: SchemaType, path: string) => number | null,
+): Record<string, SearchWeightEntry> {
+  function traverse(
+    type: SchemaType | undefined,
+    path: string,
+    depth: number,
+  ): SearchWeightEntry[] {
+    if (!type) return []
+    if (depth > maxDepth) return []
+
+    const typeChain = getTypeChain(type)
+
+    if (isStringField(type) || isPtField(type)) {
+      const weight = getWeight(type, path)
+
+      if (typeof weight !== 'number') return []
+      return [{path, weight}]
+    }
+
+    if (isSlugField(type)) {
+      const weight = getWeight(type, path)
+      if (typeof weight !== 'number') return []
+      return [{path: getFullyQualifiedPath(type, path), weight}]
+    }
+
+    const results: SearchWeightEntry[] = []
+
+    const objectTypes = typeChain.filter(
+      (t): t is Extract<SchemaType, {jsonType: 'object'}> =>
+        t.jsonType === 'object' &&
+        !!t.fields?.length &&
+        !ignoredBuiltInObjectTypes.includes(t.name),
+    )
+    for (const objectType of objectTypes) {
+      for (const field of objectType.fields) {
+        const nextPath = pathToString([path, field.name].filter(Boolean))
+        results.push(...traverse(field.type, nextPath, depth + 1))
+      }
+    }
+
+    const arrayTypes = typeChain.filter(
+      (t): t is Extract<SchemaType, {jsonType: 'array'}> =>
+        t.jsonType === 'array' && !!t.of?.length,
+    )
+    for (const arrayType of arrayTypes) {
+      for (const arrayItemType of arrayType.of) {
+        const nextPath = `${path}[]`
+        results.push(...traverse(arrayItemType, nextPath, depth + 1))
+      }
+    }
+
+    return results
+  }
+
+  // Cross Dataset Reference are not part of the schema, so we should not attempt to reconcile them.
+  if (!isSchemaType(schemaType)) {
+    return {}
+  }
+
+  return traverse(schemaType, '', 0).reduce<Record<string, SearchWeightEntry>>(
+    (acc, {path, weight, type}) => {
+      acc[path] = {weight, type, path}
+      return acc
+    },
+    {},
+  )
+}
+
+const getUserSetWeight = (schemaType: SchemaType) => {
+  const searchOptions = getTypeChain(schemaType)
+    .map((type) => type.options)
+    .find(isSearchConfiguration)
+
+  return typeof searchOptions?.search?.weight === 'number' ? searchOptions.search.weight : null
+}
+
+const getHiddenWeight = (schemaType: SchemaType) => {
+  const hidden = getTypeChain(schemaType).some((type) => type.hidden)
+  return hidden ? 0 : null
+}
+
+const getDefaultWeights = (schemaType: SchemaType) => {
+  // if there is no user set weight or a `0` weight due to be hidden,
+  // then we can return the default weight of `1`
+  const result = getUserSetWeight(schemaType) ?? getHiddenWeight(schemaType)
+  return typeof result === 'number' ? null : 1
+}
+
+const getPreviewWeights = (
+  schemaType: SchemaType | CrossDatasetType | undefined,
+  maxDepth: number,
+  isCrossDataset?: boolean,
+): Record<string, SearchWeightEntry> | null => {
+  const select = schemaType?.preview?.select
+  if (!select) return null
+
+  const selectionKeysBySelectionPath = Object.fromEntries(
+    Object.entries(select).map(([selectionKey, selectionPath]) => [
+      // replace indexed paths with `[]`
+      // e.g. `arrayOfObjects.0.myField` becomes `arrayOfObjects[].myField`
+      selectionPath.replace(/\.\d+/g, '[]'),
+      selectionKey,
+    ]),
+  )
+
+  const defaultWeights = getLeafWeights(schemaType, maxDepth, getDefaultWeights)
+  const nestedWeightsBySelectionPath = Object.fromEntries(
+    Object.entries(defaultWeights)
+      .map(([path, {type}]) => ({path, type}))
+      .filter(({path}) => selectionKeysBySelectionPath[path])
+      .map(({path, type}) => [
+        path,
+        {
+          type,
+          weight:
+            PREVIEW_FIELD_WEIGHT_MAP[
+              selectionKeysBySelectionPath[path] as keyof typeof PREVIEW_FIELD_WEIGHT_MAP
+            ],
+        },
+      ]),
+  )
+
+  if (isCrossDataset) {
+    return Object.fromEntries(
+      Object.entries(selectionKeysBySelectionPath).map(([path, previewFieldName]) => {
+        return [
+          path,
+          {
+            path,
+            type: 'string',
+            weight:
+              PREVIEW_FIELD_WEIGHT_MAP[previewFieldName as keyof typeof PREVIEW_FIELD_WEIGHT_MAP],
+          },
+        ]
+      }),
+    )
+  }
+
+  return getLeafWeights(schemaType, maxDepth, (type, path) => {
+    const nested = nestedWeightsBySelectionPath[getFullyQualifiedPath(type, path)]
+    return nested ? nested.weight : null
+  })
+}
+
+interface DeriveSearchWeightsFromTypeOptions {
+  schemaType: SchemaType | CrossDatasetType
+  maxDepth: number
+  isCrossDataset?: boolean
+  processPaths?: (paths: SearchPath[]) => SearchPath[]
+}
+
+export function deriveSearchWeightsFromType2024({
+  schemaType,
+  maxDepth,
+  isCrossDataset,
+  processPaths = (paths) => paths,
+}: DeriveSearchWeightsFromTypeOptions): SearchSpec {
+  const cached = CACHE.get(schemaType)
+  if (cached) return cached
+
+  const userSetWeights = getLeafWeights(schemaType, maxDepth, getUserSetWeight)
+  const hiddenWeights = getLeafWeights(schemaType, maxDepth, getHiddenWeight)
+  const defaultWeights = getLeafWeights(schemaType, maxDepth, getDefaultWeights)
+  const previewWeights = getPreviewWeights(schemaType, maxDepth, isCrossDataset)
+
+  const weights: Record<string, Omit<SearchWeightEntry, 'path'>> = {
+    ...BASE_WEIGHTS,
+    ...defaultWeights,
+    ...hiddenWeights,
+    ...previewWeights,
+    ...userSetWeights,
+  }
+
+  const result = {
+    typeName: isSchemaType(schemaType) ? schemaType.name : schemaType.type,
+    paths: processPaths(
+      Object.entries(weights).map(([path, {weight}]) => ({
+        path,
+        weight,
+      })),
+    ),
+  }
+
+  CACHE.set(schemaType, result)
+  return result
+}
@@ -1,3 +1,4 @@
 export * from './deriveSearchWeightsFromType'
+export * from './deriveSearchWeightsFromType2024'
 export * from './getSearchableTypes'
 export * from './types'