diff --git a/.changeset/popular-shirts-rescue.md b/.changeset/popular-shirts-rescue.md new file mode 100644 index 00000000000..3e7b8ed68bb --- /dev/null +++ b/.changeset/popular-shirts-rescue.md @@ -0,0 +1,5 @@ +--- +"@atproto/lexicon": patch +--- + +Various performance improvements diff --git a/packages/lexicon/src/lexicons.ts b/packages/lexicon/src/lexicons.ts index 382c8e898c7..979503d14e7 100644 --- a/packages/lexicon/src/lexicons.ts +++ b/packages/lexicon/src/lexicons.ts @@ -7,7 +7,6 @@ import { ValidationResult, ValidationError, isObj, - hasProp, } from './types' import { assertValidRecord, @@ -17,7 +16,7 @@ import { assertValidXrpcMessage, } from './validation' import { toLexUri } from './util' -import * as ComplexValidators from './validators/complex' +import { object as validateObject } from './validators/complex' /** * A collection of compiled lexicons. @@ -127,15 +126,17 @@ export class Lexicons implements Iterable { * Validate a record or object. */ validate(lexUri: string, value: unknown): ValidationResult { - lexUri = toLexUri(lexUri) - const def = this.getDefOrThrow(lexUri, ['record', 'object']) if (!isObj(value)) { throw new ValidationError(`Value must be an object`) } + + const lexUriNormalized = toLexUri(lexUri) + const def = this.getDefOrThrow(lexUriNormalized, ['record', 'object']) + if (def.type === 'record') { - return ComplexValidators.object(this, 'Record', def.record, value) + return validateObject(this, 'Record', def.record, value) } else if (def.type === 'object') { - return ComplexValidators.object(this, 'Object', def, value) + return validateObject(this, 'Object', def, value) } else { // shouldn't happen throw new InvalidLexiconError('Definition must be a record or object') @@ -146,20 +147,25 @@ export class Lexicons implements Iterable { * Validate a record and throw on any error. */ assertValidRecord(lexUri: string, value: unknown) { - lexUri = toLexUri(lexUri) - const def = this.getDefOrThrow(lexUri, ['record']) if (!isObj(value)) { throw new ValidationError(`Record must be an object`) } - if (!hasProp(value, '$type') || typeof value.$type !== 'string') { + if (!('$type' in value)) { throw new ValidationError(`Record/$type must be a string`) } - const $type = (value as Record).$type || '' - if (toLexUri($type) !== lexUri) { + const { $type } = value + if (typeof $type !== 'string') { + throw new ValidationError(`Record/$type must be a string`) + } + + const lexUriNormalized = toLexUri(lexUri) + if (toLexUri($type) !== lexUriNormalized) { throw new ValidationError( - `Invalid $type: must be ${lexUri}, got ${$type}`, + `Invalid $type: must be ${lexUriNormalized}, got ${$type}`, ) } + + const def = this.getDefOrThrow(lexUriNormalized, ['record']) return assertValidRecord(this, def as LexRecord, value) } diff --git a/packages/lexicon/src/types.ts b/packages/lexicon/src/types.ts index 5cc7d655f50..a686826a9ac 100644 --- a/packages/lexicon/src/types.ts +++ b/packages/lexicon/src/types.ts @@ -447,23 +447,13 @@ export function isValidLexiconDoc(v: unknown): v is LexiconDoc { return lexiconDoc.safeParse(v).success } -export function isObj(obj: unknown): obj is Record { - return obj !== null && typeof obj === 'object' +export function isObj(v: V): v is V & object { + return v != null && typeof v === 'object' } -export function hasProp( - data: object, - prop: K, -): data is Record { - return prop in data -} - -export const discriminatedObject = z.object({ $type: z.string() }) -export type DiscriminatedObject = z.infer -export function isDiscriminatedObject( - value: unknown, -): value is DiscriminatedObject { - return discriminatedObject.safeParse(value).success +export type DiscriminatedObject = { $type: string } +export function isDiscriminatedObject(v: unknown): v is DiscriminatedObject { + return isObj(v) && '$type' in v && typeof v.$type === 'string' } export function parseLexiconDoc(v: unknown): LexiconDoc { diff --git a/packages/lexicon/src/util.ts b/packages/lexicon/src/util.ts index beae2922d59..5a1c4d49c4b 100644 --- a/packages/lexicon/src/util.ts +++ b/packages/lexicon/src/util.ts @@ -1,6 +1,4 @@ import { z } from 'zod' -import { Lexicons } from './lexicons' -import { LexRefVariant, LexUserType } from './types' export function toLexUri(str: string, baseUri?: string): string { if (str.split('#').length > 2) { @@ -19,19 +17,6 @@ export function toLexUri(str: string, baseUri?: string): string { return `lex:${str}` } -export function toConcreteTypes( - lexicons: Lexicons, - def: LexRefVariant | LexUserType, -): LexUserType[] { - if (def.type === 'ref') { - return [lexicons.getDefOrThrow(def.ref)] - } else if (def.type === 'union') { - return def.refs.map((ref) => lexicons.getDefOrThrow(ref)).flat() - } else { - return [def] - } -} - export function requiredPropertiesRefinement< ObjectType extends { required?: string[] diff --git a/packages/lexicon/src/validators/complex.ts b/packages/lexicon/src/validators/complex.ts index 8e54eba66ac..fcca9e80720 100644 --- a/packages/lexicon/src/validators/complex.ts +++ b/packages/lexicon/src/validators/complex.ts @@ -1,14 +1,14 @@ import { Lexicons } from '../lexicons' import { LexArray, - LexObject, LexRefVariant, LexUserType, ValidationError, ValidationResult, isDiscriminatedObject, + isObj, } from '../types' -import { toConcreteTypes, toLexUri } from '../util' +import { toLexUri } from '../util' import { blob } from './blob' import { boolean, integer, string, bytes, cidLink, unknown } from './primitives' @@ -19,25 +19,46 @@ export function validate( def: LexUserType, value: unknown, ): ValidationResult { + // Order based on the number of occurrences in the Bluesky official lexicons: + + // grep -oh -R -E '"type": "[^"]+"' lexicons | sort | uniq -c | sort -r + // 873 "type": "string" + // 386 "type": "object" + // 256 "type": "ref" + // 198 "type": "array" + // 136 "type": "integer" + // 103 "type": "query" + // 96 "type": "boolean" + // 93 "type": "params" + // 76 "type": "procedure" + // 52 "type": "union" + // 31 "type": "unknown" + // 29 "type": "token" + // 15 "type": "record" + // 10 "type": "blob" + // 4 "type": "cid-link" + // 2 "type": "subscription" + // 2 "type": "bytes" + switch (def.type) { - case 'boolean': - return boolean(lexicons, path, def, value) - case 'integer': - return integer(lexicons, path, def, value) case 'string': return string(lexicons, path, def, value) - case 'bytes': - return bytes(lexicons, path, def, value) - case 'cid-link': - return cidLink(lexicons, path, def, value) - case 'unknown': - return unknown(lexicons, path, def, value) case 'object': return object(lexicons, path, def, value) case 'array': return array(lexicons, path, def, value) + case 'integer': + return integer(lexicons, path, def, value) + case 'boolean': + return boolean(lexicons, path, def, value) + case 'unknown': + return unknown(lexicons, path, def, value) case 'blob': return blob(lexicons, path, def, value) + case 'bytes': + return bytes(lexicons, path, def, value) + case 'cid-link': + return cidLink(lexicons, path, def, value) default: return { success: false, @@ -104,35 +125,31 @@ export function object( def: LexUserType, value: unknown, ): ValidationResult { - def = def as LexObject - // type - if (!value || typeof value !== 'object') { + if (!isObj(value)) { return { success: false, error: new ValidationError(`${path} must be an object`), } } - const requiredProps = new Set(def.required) - const nullableProps = new Set(def.nullable) - // properties let resultValue = value - if (typeof def.properties === 'object') { + if ('properties' in def && def.properties != null) { for (const key in def.properties) { - if (value[key] === null && nullableProps.has(key)) { + const keyValue = value[key] + if (keyValue === null && def.nullable?.includes(key)) { continue } const propDef = def.properties[key] - if (typeof value[key] === 'undefined' && !requiredProps.has(key)) { + if (keyValue === undefined && !def.required?.includes(key)) { // Fast path for non-required undefined props. if ( propDef.type === 'integer' || propDef.type === 'boolean' || propDef.type === 'string' ) { - if (typeof propDef.default === 'undefined') { + if (propDef.default === undefined) { continue } } else { @@ -141,20 +158,27 @@ export function object( } } const propPath = `${path}/${key}` - const validated = validateOneOf(lexicons, propPath, propDef, value[key]) - const propValue = validated.success ? validated.value : value[key] - const propIsUndefined = typeof propValue === 'undefined' + const validated = validateOneOf(lexicons, propPath, propDef, keyValue) + const propValue = validated.success ? validated.value : keyValue + // Return error for bad validation, giving required rule precedence - if (propIsUndefined && requiredProps.has(key)) { - return { - success: false, - error: new ValidationError(`${path} must have the property "${key}"`), + if (propValue === undefined) { + if (def.required?.includes(key)) { + return { + success: false, + error: new ValidationError( + `${path} must have the property "${key}"`, + ), + } + } + } else { + if (!validated.success) { + return validated } - } else if (!propIsUndefined && !validated.success) { - return validated } + // Adjust value based on e.g. applied defaults, cloning shallowly if there was a changed value - if (propValue !== value[key]) { + if (propValue !== keyValue) { if (resultValue === value) { // Lazy shallow clone resultValue = { ...value } @@ -174,9 +198,8 @@ export function validateOneOf( value: unknown, mustBeObj = false, // this is the only type constraint we need currently (used by xrpc body schema validators) ): ValidationResult { - let error + let concreteDef: LexUserType - let concreteDefs if (def.type === 'union') { if (!isDiscriminatedObject(value)) { return { @@ -197,33 +220,17 @@ export function validateOneOf( } return { success: true, value } } else { - concreteDefs = toConcreteTypes(lexicons, { - type: 'ref', - ref: value.$type, - }) + concreteDef = lexicons.getDefOrThrow(value.$type) } + } else if (def.type === 'ref') { + concreteDef = lexicons.getDefOrThrow(def.ref) } else { - concreteDefs = toConcreteTypes(lexicons, def) + concreteDef = def } - for (const concreteDef of concreteDefs) { - const result = mustBeObj - ? object(lexicons, path, concreteDef, value) - : validate(lexicons, path, concreteDef, value) - if (result.success) { - return result - } - error ??= result.error - } - if (concreteDefs.length > 1) { - return { - success: false, - error: new ValidationError( - `${path} did not match any of the expected definitions`, - ), - } - } - return { success: false, error } + return mustBeObj + ? object(lexicons, path, concreteDef, value) + : validate(lexicons, path, concreteDef, value) } // to avoid bugs like #0189 this needs to handle both @@ -235,7 +242,7 @@ const refsContainType = (refs: string[], type: string) => { } if (lexUri.endsWith('#main')) { - return refs.includes(lexUri.replace('#main', '')) + return refs.includes(lexUri.slice(0, -5)) } else { return refs.includes(lexUri + '#main') } diff --git a/packages/lexicon/src/validators/formats.ts b/packages/lexicon/src/validators/formats.ts index 42dd2ee1a16..7af80089b74 100644 --- a/packages/lexicon/src/validators/formats.ts +++ b/packages/lexicon/src/validators/formats.ts @@ -6,8 +6,8 @@ import { ensureValidHandle, ensureValidNsid, ensureValidAtUri, - ensureValidTid, ensureValidRecordKey, + isValidTid, } from '@atproto/syntax' import { validateLanguage } from '@atproto/common-web' @@ -131,17 +131,14 @@ export function language(path: string, value: string): ValidationResult { } export function tid(path: string, value: string): ValidationResult { - try { - ensureValidTid(value) - } catch { - return { - success: false, - error: new ValidationError( - `${path} must be a valid TID (timestamp identifier)`, - ), - } + if (isValidTid(value)) { + return { success: true, value } + } + + return { + success: false, + error: new ValidationError(`${path} must be a valid TID`), } - return { success: true, value } } export function recordKey(path: string, value: string): ValidationResult {