Skip to content

Commit

Permalink
Merge branch 'main' into notif-filter-impl
Browse files Browse the repository at this point in the history
  • Loading branch information
dholms committed Dec 11, 2024
2 parents c30c60f + 207728d commit c566ca7
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 22 deletions.
5 changes: 5 additions & 0 deletions .changeset/perfect-dodos-prove.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@atproto/lexicon": patch
---

Add fast paths that skip UTF8 encoding
59 changes: 42 additions & 17 deletions packages/lexicon/src/validators/primitives.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,27 +198,52 @@ export function string(
}

// maxLength and minLength
if (typeof def.maxLength === 'number' || typeof def.minLength === 'number') {
const len = utf8Len(value)
if (typeof def.minLength === 'number' || typeof def.maxLength === 'number') {
// If the JavaScript string length * 3 is below the maximum limit,
// its UTF8 length (which <= .length * 3) will also be below.
if (typeof def.minLength === 'number' && value.length * 3 < def.minLength) {
return {
success: false,
error: new ValidationError(
`${path} must not be shorter than ${def.minLength} characters`,
),
}
}

if (typeof def.maxLength === 'number') {
if (len > def.maxLength) {
return {
success: false,
error: new ValidationError(
`${path} must not be longer than ${def.maxLength} characters`,
),
// If the JavaScript string length * 3 is within the maximum limit,
// its UTF8 length (which <= .length * 3) will also be within.
// When there's no minimal length, this lets us skip the UTF8 length check.
let canSkipUtf8LenChecks = false
if (
typeof def.minLength === 'undefined' &&
typeof def.maxLength === 'number' &&
value.length * 3 <= def.maxLength
) {
canSkipUtf8LenChecks = true
}

if (!canSkipUtf8LenChecks) {
const len = utf8Len(value)

if (typeof def.maxLength === 'number') {
if (len > def.maxLength) {
return {
success: false,
error: new ValidationError(
`${path} must not be longer than ${def.maxLength} characters`,
),
}
}
}
}

if (typeof def.minLength === 'number') {
if (len < def.minLength) {
return {
success: false,
error: new ValidationError(
`${path} must not be shorter than ${def.minLength} characters`,
),
if (typeof def.minLength === 'number') {
if (len < def.minLength) {
return {
success: false,
error: new ValidationError(
`${path} must not be shorter than ${def.minLength} characters`,
),
}
}
}
}
Expand Down
18 changes: 18 additions & 0 deletions packages/lexicon/tests/_scaffolds/lexicons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,24 @@ const lexicons: LexiconDoc[] = [
},
},
},
{
lexicon: 1,
id: 'com.example.stringLengthNoMinLength',
defs: {
main: {
type: 'record',
record: {
type: 'object',
properties: {
string: {
type: 'string',
maxLength: 4,
},
},
},
},
},
},
{
lexicon: 1,
id: 'com.example.stringLengthGrapheme',
Expand Down
191 changes: 186 additions & 5 deletions packages/lexicon/tests/general.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -567,26 +567,207 @@ describe('Record validation', () => {
})

it('Applies string length constraint', () => {
// Shorter than two UTF8 characters
expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '',
}),
).toThrow('Record/string must not be shorter than 2 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: 'a',
}),
).toThrow('Record/string must not be shorter than 2 characters')

// Two to four UTF8 characters
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: 'ab',
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '\u0301', // Combining acute accent (2 bytes)
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: 'a\u0301', // 'a' + combining acute accent (1 + 2 bytes = 3 bytes)
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: 'aé', // 'a' (1 byte) + 'é' (2 bytes) = 3 bytes
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: 'abc',
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '123',
string: '一', // CJK character (3 bytes)
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '\uD83D', // Unpaired high surrogate (3 bytes)
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: 'abcd',
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: 'éé', // 'é' + 'é' (2 + 2 bytes = 4 bytes)
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: 'aaé', // 1 + 1 + 2 = 4 bytes
})
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '👋', // 4 bytes
})

expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '1',
string: 'abcde',
}),
).toThrow('Record/string must not be shorter than 2 characters')
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '12345',
string: 'a\u0301\u0301', // 1 + (2 * 2) = 5 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '👨‍👩‍👧‍👧',
string: '\uD83D\uD83D', // Two unpaired high surrogates (3 * 2 = 6 bytes)
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: 'ééé', // 2 + 2 + 2 bytes = 6 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '👋a', // 4 + 1 bytes = 5 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '👨👨', // 4 + 4 = 8 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLength', {
$type: 'com.example.stringLength',
string: '👨‍👩‍👧‍👧', // 4 emojis × 4 bytes + 3 ZWJs × 3 bytes = 25 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
})

it('Applies string length constraint (no minLength)', () => {
// Shorter than two UTF8 characters
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: '',
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'a',
})

// Two to four UTF8 characters
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'ab',
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: '\u0301', // Combining acute accent (2 bytes)
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'a\u0301', // 'a' + combining acute accent (1 + 2 bytes = 3 bytes)
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'aé', // 'a' (1 byte) + 'é' (2 bytes) = 3 bytes
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'abc',
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: '一', // CJK character (3 bytes)
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: '\uD83D', // Unpaired high surrogate (3 bytes)
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'abcd',
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'éé', // 'é' + 'é' (2 + 2 bytes = 4 bytes)
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'aaé', // 1 + 1 + 2 = 4 bytes
})
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: '👋', // 4 bytes
})

expect(() =>
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'abcde',
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'a\u0301\u0301', // 1 + (2 * 2) = 5 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: '\uD83D\uD83D', // Two unpaired high surrogates (3 * 2 = 6 bytes)
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: 'ééé', // 2 + 2 + 2 bytes = 6 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: '👋a', // 4 + 1 bytes = 5 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: '👨👨', // 4 + 4 = 8 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
expect(() =>
lex.assertValidRecord('com.example.stringLengthNoMinLength', {
$type: 'com.example.stringLengthNoMinLength',
string: '👨‍👩‍👧‍👧', // 4 emojis × 4 bytes + 3 ZWJs × 3 bytes = 25 bytes
}),
).toThrow('Record/string must not be longer than 4 characters')
})
Expand Down

0 comments on commit c566ca7

Please sign in to comment.