Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

harden datetime verification #1702

Merged
merged 12 commits into from
Dec 1, 2023
7 changes: 7 additions & 0 deletions interop-test-files/syntax/datetime_parse_invalid.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# superficial syntax parses ok, but are not valid datetimes for semantic reasons (eg, "month zero")
1985-00-12T23:20:50.123Z
1985-04-00T23:20:50.123Z
1985-13-12T23:20:50.123Z
1985-04-12T25:20:50.123Z
1985-04-12T23:99:50.123Z
1985-04-12T23:20:61.123Z
68 changes: 68 additions & 0 deletions interop-test-files/syntax/datetime_syntax_invalid.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@

# subtle changes to: 1985-04-12T23:20:50.123Z
1985-04-12T23:20:50.123z
01985-04-12T23:20:50.123Z
985-04-12T23:20:50.123Z
1985-04-12T23:20:50.Z
1985-04-32T23;20:50.123Z
1985-04-32T23;20:50.123Z

# en-dash and em-dash
1985—04-32T23;20:50.123Z
1985–04-32T23;20:50.123Z

# whitespace
1985-04-12T23:20:50.123Z
1985-04-12T23:20:50.123Z
1985-04-12T 23:20:50.123Z

# not enough zero padding
1985-4-12T23:20:50.123Z
1985-04-2T23:20:50.123Z
1985-04-12T3:20:50.123Z
1985-04-12T23:0:50.123Z
1985-04-12T23:20:5.123Z

# too much zero padding
01985-04-12T23:20:50.123Z
1985-004-12T23:20:50.123Z
1985-04-012T23:20:50.123Z
1985-04-12T023:20:50.123Z
1985-04-12T23:020:50.123Z
1985-04-12T23:20:050.123Z

# strict capitalization (ISO-8601)
1985-04-12t23:20:50.123Z
1985-04-12T23:20:50.123z

# RFC-3339, but not ISO-8601
1985-04-12T23:20:50.123-00:00
1985-04-12_23:20:50.123Z
1985-04-12 23:20:50.123Z

# ISO-8601, but weird
1985-04-274T23:20:50.123Z

# timezone is required
1985-04-12T23:20:50.123
1985-04-12T23:20:50

1985-04-12
1985-04-12T23:20Z
1985-04-12T23:20:5Z
1985-04-12T23:20:50.123
+001985-04-12T23:20:50.123Z
23:20:50.123Z

1985-04-12T23:20:50.123+00
1985-04-12T23:20:50.123+00:0
1985-04-12T23:20:50.123+0:00
1985-04-12T23:20:50.123
1985-04-12T23:20:50.123+0000
1985-04-12T23:20:50.123+00
1985-04-12T23:20:50.123+
1985-04-12T23:20:50.123-

# ISO-8601, but normalizes to a negative time
0000-01-01T00:00:00+01:00
-000001-12-31T23:00:00.000Z
40 changes: 40 additions & 0 deletions interop-test-files/syntax/datetime_syntax_valid.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# "preferred"
1985-04-12T23:20:50.123Z
1985-04-12T23:20:50.000Z
2000-01-01T00:00:00.000Z
1985-04-12T23:20:50.123456Z
1985-04-12T23:20:50.120Z
1985-04-12T23:20:50.120000Z

# "supported"
1985-04-12T23:20:50.1235678912345Z
1985-04-12T23:20:50.100Z
1985-04-12T23:20:50Z
1985-04-12T23:20:50.0Z
1985-04-12T23:20:50.123+00:00
1985-04-12T23:20:50.123-07:00
1985-04-12T23:20:50.123+07:00
1985-04-12T23:20:50.123+01:45
0985-04-12T23:20:50.123-07:00
1985-04-12T23:20:50.123-07:00
0123-01-01T00:00:00.000Z

# various precisions, up through at least 12 digits
1985-04-12T23:20:50.1Z
1985-04-12T23:20:50.12Z
1985-04-12T23:20:50.123Z
1985-04-12T23:20:50.1234Z
1985-04-12T23:20:50.12345Z
1985-04-12T23:20:50.123456Z
1985-04-12T23:20:50.1234567Z
1985-04-12T23:20:50.12345678Z
1985-04-12T23:20:50.123456789Z
1985-04-12T23:20:50.1234567890Z
1985-04-12T23:20:50.12345678901Z
1985-04-12T23:20:50.123456789012Z

# extreme but currently allowed
0010-12-31T23:00:00.000Z
1000-12-31T23:00:00.000Z
1900-12-31T23:00:00.000Z
3001-12-31T23:00:00.000Z
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/block.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as Block from '../../../lexicon/types/app/bsky/graph/block'
import * as lex from '../../../lexicon/lexicons'
Expand All @@ -27,7 +26,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
subjectDid: obj.subject,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/feed-generator.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as FeedGenerator from '../../../lexicon/types/app/bsky/feed/generator'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -33,7 +32,7 @@ const insertFn = async (
? JSON.stringify(obj.descriptionFacets)
: undefined,
avatarCid: obj.avatar?.ref.toString(),
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/follow.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as Follow from '../../../lexicon/types/app/bsky/graph/follow'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -28,7 +27,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
subjectDid: obj.subject,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/like.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as Like from '../../../lexicon/types/app/bsky/feed/like'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -29,7 +28,7 @@ const insertFn = async (
creator: uri.host,
subject: obj.subject.uri,
subjectCid: obj.subject.cid,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/list-block.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as ListBlock from '../../../lexicon/types/app/bsky/graph/listblock'
import * as lex from '../../../lexicon/lexicons'
Expand All @@ -27,7 +26,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
subjectUri: obj.subject,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/list-item.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as ListItem from '../../../lexicon/types/app/bsky/graph/listitem'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -35,7 +34,7 @@ const insertFn = async (
creator: uri.host,
subjectDid: obj.subject,
listUri: obj.list,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/list.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Selectable } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { CID } from 'multiformats/cid'
import * as List from '../../../lexicon/types/app/bsky/graph/list'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -33,7 +32,7 @@ const insertFn = async (
? JSON.stringify(obj.descriptionFacets)
: undefined,
avatarCid: obj.avatar?.ref.toString(),
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/post.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { Insertable, Selectable, sql } from 'kysely'
import { CID } from 'multiformats/cid'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { jsonStringToLex } from '@atproto/lexicon'
import {
Record as PostRecord,
Expand Down Expand Up @@ -68,7 +67,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
text: obj.text,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
replyRoot: obj.reply?.root?.uri || null,
replyRootCid: obj.reply?.root?.cid || null,
replyParent: obj.reply?.parent?.uri || null,
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/repost.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { Selectable } from 'kysely'
import { CID } from 'multiformats/cid'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import * as Repost from '../../../lexicon/types/app/bsky/feed/repost'
import * as lex from '../../../lexicon/lexicons'
import { DatabaseSchema, DatabaseSchemaType } from '../../../db/database-schema'
Expand All @@ -27,7 +26,7 @@ const insertFn = async (
creator: uri.host,
subject: obj.subject.uri,
subjectCid: obj.subject.cid,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
}
const [inserted] = await Promise.all([
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/indexing/plugins/thread-gate.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { AtUri } from '@atproto/syntax'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { InvalidRequestError } from '@atproto/xrpc-server'
import { toSimplifiedISOSafe } from '@atproto/common'
import { CID } from 'multiformats/cid'
import * as Threadgate from '../../../lexicon/types/app/bsky/feed/threadgate'
import * as lex from '../../../lexicon/lexicons'
Expand Down Expand Up @@ -33,7 +32,7 @@ const insertFn = async (
cid: cid.toString(),
creator: uri.host,
postUri: obj.post,
createdAt: toSimplifiedISOSafe(obj.createdAt),
createdAt: normalizeDatetimeAlways(obj.createdAt),
indexedAt: timestamp,
})
.onConflict((oc) => oc.doNothing())
Expand Down
5 changes: 2 additions & 3 deletions packages/bsky/src/services/label/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { sql } from 'kysely'
import { AtUri } from '@atproto/syntax'
import { toSimplifiedISOSafe } from '@atproto/common'
import { AtUri, normalizeDatetimeAlways } from '@atproto/syntax'
import { Database } from '../../db'
import { Label, isSelfLabels } from '../../lexicon/types/com/atproto/label/defs'
import { ids } from '../../lexicon/lexicons'
Expand Down Expand Up @@ -166,7 +165,7 @@ export function getSelfLabels(details: {
const src = new AtUri(uri).host // record creator
const cts =
typeof record.createdAt === 'string'
? toSimplifiedISOSafe(record.createdAt)
? normalizeDatetimeAlways(record.createdAt)
: new Date(0).toISOString()
return record.labels.values.map(({ val }) => {
return { src, uri, cid, val, cts, neg: false }
Expand Down
2 changes: 1 addition & 1 deletion packages/lexicon/src/validators/formats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export function datetime(path: string, value: string): ValidationResult {
return {
success: false,
error: new ValidationError(
`${path} must be an iso8601 formatted datetime`,
`${path} must be an valid atproto datetime (both RFC-3339 and ISO-8601)`,
),
}
}
Expand Down
4 changes: 3 additions & 1 deletion packages/lexicon/tests/general.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,9 @@ describe('Record validation', () => {
$type: 'com.example.datetime',
datetime: 'bad date',
}),
).toThrow('Record/datetime must be an iso8601 formatted datetime')
).toThrow(
'Record/datetime must be an valid atproto datetime (both RFC-3339 and ISO-8601)',
)
})

it('Applies uri formatting constraint', () => {
Expand Down
20 changes: 19 additions & 1 deletion packages/pds/src/repo/prepare.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { CID } from 'multiformats/cid'
import { AtUri } from '@atproto/syntax'
import { AtUri, ensureValidDatetime } from '@atproto/syntax'
import { MINUTE, TID, dataToCborBlock } from '@atproto/common'
import {
LexiconDefNotFoundError,
RepoRecord,
ValidationError,
lexToIpld,
} from '@atproto/lexicon'
import {
Expand Down Expand Up @@ -115,6 +116,7 @@ export const assertValidRecord = (record: Record<string, unknown>) => {
}
try {
lex.lexicons.assertValidRecord(record.$type, record)
assertValidCreatedAt(record)
} catch (e) {
if (e instanceof LexiconDefNotFoundError) {
throw new InvalidRecordError(e.message)
Expand All @@ -127,6 +129,22 @@ export const assertValidRecord = (record: Record<string, unknown>) => {
}
}

// additional more rigorous check on datetimes
// this check will eventually be in the lex sdk, but this will stop the bleed until then
export const assertValidCreatedAt = (record: Record<string, unknown>) => {
const createdAt = record['createdAt']
if (typeof createdAt !== 'string') {
return
}
try {
ensureValidDatetime(createdAt)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added the extra check here

This means records with bad timestamps will still pass "lexicon validation" (which we want so that old records don't break application views), but they will no longer be able to be created on the pds

this specific approach only works as is because the only datetimes on our records are createdAt times & every createdAt property is a datetime. if we get other datetimes, we'll need to tweak this logic

} catch {
throw new ValidationError(
'createdAt must be an valid atproto datetime (both RFC-3339 and ISO-8601)',
)
}
}

export const setCollectionName = (
collection: string,
record: RepoRecord,
Expand Down
Loading
Loading