From f8f2d04c1d43917b98786ce3a8b577bc2238379a Mon Sep 17 00:00:00 2001 From: Daniel Holmgren Date: Fri, 8 Sep 2023 15:08:27 -0500 Subject: [PATCH] Move fuzzy matcher to appview (#1566) * impl on appview * tests + clean up pds * tidy cfg --- packages/bsky/src/auto-moderator/abyss.ts | 4 +- .../src/auto-moderator/fuzzy-matcher.ts} | 22 ++++- packages/bsky/src/auto-moderator/index.ts | 71 +++++++++++++-- packages/bsky/src/auto-moderator/util.ts | 79 +++++++++++----- packages/bsky/src/indexer/config.ts | 15 ++++ packages/bsky/src/services/indexing/index.ts | 4 + .../auto-moderator/fuzzy-matcher.test.ts} | 53 ++++++++--- .../tests/auto-moderator/takedowns.test.ts | 6 +- packages/dev-env/src/bsky.ts | 3 + packages/dev-env/src/types.ts | 2 + .../api/com/atproto/identity/updateHandle.ts | 2 - .../api/com/atproto/server/createAccount.ts | 2 - packages/pds/src/config.ts | 19 ---- packages/pds/src/content-reporter/index.ts | 89 ------------------- packages/pds/src/context.ts | 6 -- .../explicit-slurs.ts | 0 packages/pds/src/handle/index.ts | 2 +- packages/pds/src/index.ts | 21 ----- packages/pds/src/repo/prepare.ts | 2 +- packages/pds/src/services/index.ts | 4 - packages/pds/src/services/repo/index.ts | 6 -- packages/pds/tests/handle-validation.test.ts | 22 ----- 22 files changed, 216 insertions(+), 218 deletions(-) rename packages/{pds/src/content-reporter/validator.ts => bsky/src/auto-moderator/fuzzy-matcher.ts} (83%) rename packages/{pds/tests/content-reporter.test.ts => bsky/tests/auto-moderator/fuzzy-matcher.test.ts} (67%) delete mode 100644 packages/pds/src/content-reporter/index.ts rename packages/pds/src/{content-reporter => handle}/explicit-slurs.ts (100%) diff --git a/packages/bsky/src/auto-moderator/abyss.ts b/packages/bsky/src/auto-moderator/abyss.ts index 6d9cb6afa37..fb9ee2c4e98 100644 --- a/packages/bsky/src/auto-moderator/abyss.ts +++ b/packages/bsky/src/auto-moderator/abyss.ts @@ -7,11 +7,11 @@ import { PrimaryDatabase } from '../db' import { IdResolver } from '@atproto/identity' import { labelerLogger as log } from '../logger' -export interface TakedownFlagger { +export interface ImageFlagger { scanImage(did: string, cid: CID): Promise } -export class Abyss implements TakedownFlagger { +export class Abyss implements ImageFlagger { protected auth: string constructor( diff --git a/packages/pds/src/content-reporter/validator.ts b/packages/bsky/src/auto-moderator/fuzzy-matcher.ts similarity index 83% rename from packages/pds/src/content-reporter/validator.ts rename to packages/bsky/src/auto-moderator/fuzzy-matcher.ts index 9f5b5689e4a..07b5fb9a85e 100644 --- a/packages/pds/src/content-reporter/validator.ts +++ b/packages/bsky/src/auto-moderator/fuzzy-matcher.ts @@ -1,6 +1,11 @@ import { dedupeStrs } from '@atproto/common' +import * as ui8 from 'uint8arrays' -export class UnacceptableWordValidator { +export interface TextFlagger { + getMatches(string: string): string[] +} + +export class FuzzyMatcher implements TextFlagger { private bannedWords: Set private falsePositives: Set @@ -11,6 +16,13 @@ export class UnacceptableWordValidator { ) } + static fromB64(bannedB64: string, falsePositivesB64?: string) { + return new FuzzyMatcher( + decode(bannedB64), + falsePositivesB64 ? decode(falsePositivesB64) : undefined, + ) + } + private normalize(domain: string): string[] { const withoutSymbols = domain.replace(/[\W_]+/g, '') // Remove non-alphanumeric characters const lowercase = withoutSymbols.toLowerCase() @@ -104,3 +116,11 @@ export class UnacceptableWordValidator { return [] } } + +export const decode = (encoded: string): string[] => { + return ui8.toString(ui8.fromString(encoded, 'base64'), 'utf8').split(',') +} + +export const encode = (words: string[]): string => { + return ui8.toString(ui8.fromString(words.join(','), 'utf8'), 'base64') +} diff --git a/packages/bsky/src/auto-moderator/index.ts b/packages/bsky/src/auto-moderator/index.ts index a2d041d17f3..85cc529bce1 100644 --- a/packages/bsky/src/auto-moderator/index.ts +++ b/packages/bsky/src/auto-moderator/index.ts @@ -10,17 +10,20 @@ import { buildBasicAuth } from '../auth' import { CID } from 'multiformats/cid' import { LabelService } from '../services/label' import { ModerationService } from '../services/moderation' -import { TakedownFlagger } from './abyss' +import { ImageFlagger } from './abyss' import { HiveLabeler, ImgLabeler } from './hive' import { KeywordLabeler, TextLabeler } from './keyword' import { ids } from '../lexicon/lexicons' import { ImageUriBuilder } from '../image/uri' import { ImageInvalidator } from '../image/invalidator' import { Abyss } from './abyss' +import { FuzzyMatcher, TextFlagger } from './fuzzy-matcher' +import { REASONOTHER } from '../lexicon/types/com/atproto/moderation/defs' export class AutoModerator { public pushAgent?: AtpAgent - public takedownFlagger?: TakedownFlagger + public imageFlagger?: ImageFlagger + public textFlagger?: TextFlagger public imgLabeler?: ImgLabeler public textLabeler?: TextLabeler @@ -59,7 +62,7 @@ export class AutoModerator { this.imgLabeler = hiveApiKey ? new HiveLabeler(hiveApiKey, ctx) : undefined this.textLabeler = new KeywordLabeler(ctx.cfg.labelerKeywords) if (abyssEndpoint && abyssPassword) { - this.takedownFlagger = new Abyss(abyssEndpoint, abyssPassword, ctx) + this.imageFlagger = new Abyss(abyssEndpoint, abyssPassword, ctx) } else { log.error( { abyssEndpoint, abyssPassword }, @@ -67,6 +70,13 @@ export class AutoModerator { ) } + if (ctx.cfg.fuzzyMatchB64) { + this.textFlagger = FuzzyMatcher.fromB64( + ctx.cfg.fuzzyMatchB64, + ctx.cfg.fuzzyFalsePositiveB64, + ) + } + if (ctx.cfg.moderationPushUrl) { const url = new URL(ctx.cfg.moderationPushUrl) this.pushAgent = new AtpAgent({ service: url.origin }) @@ -79,7 +89,7 @@ export class AutoModerator { processRecord(uri: AtUri, cid: CID, obj: unknown) { this.ctx.backgroundQueue.add(async () => { - const { text, imgs } = getFieldsFromRecord(obj) + const { text, imgs } = getFieldsFromRecord(obj, uri) await Promise.all([ this.labelRecord(uri, cid, text, imgs).catch((err) => { log.error( @@ -87,6 +97,12 @@ export class AutoModerator { 'failed to label record', ) }), + this.flagRecordText(uri, cid, text).catch((err) => { + log.error( + { err, uri: uri.toString(), record: obj }, + 'failed to check record for text flagging', + ) + }), this.checkImgForTakedown(uri, cid, imgs).catch((err) => { log.error( { err, uri: uri.toString(), record: obj }, @@ -97,8 +113,16 @@ export class AutoModerator { }) } + processHandle(handle: string, did: string) { + this.ctx.backgroundQueue.add(async () => { + await this.flagSubjectText(handle, { did }).catch((err) => { + log.error({ err, handle, did }, 'failed to label handle') + }) + }) + } + async labelRecord(uri: AtUri, recordCid: CID, text: string[], imgs: CID[]) { - if (uri.collection === ids.AppBskyActorProfile) { + if (uri.collection !== ids.AppBskyFeedPost) { // @TODO label profiles return } @@ -110,10 +134,45 @@ export class AutoModerator { await this.storeLabels(uri, recordCid, labels) } + async flagRecordText(uri: AtUri, cid: CID, text: string[]) { + if ( + ![ + ids.AppBskyActorProfile, + ids.AppBskyGraphList, + ids.AppBskyFeedGenerator, + ].includes(uri.collection) + ) { + return + } + return this.flagSubjectText(text.join(' '), { uri, cid }) + } + + async flagSubjectText( + text: string, + subject: { did: string } | { uri: AtUri; cid: CID }, + ) { + if (!this.textFlagger) return + const matches = this.textFlagger.getMatches(text) + if (matches.length < 1) return + if (!this.services.moderation) { + log.error( + { subject, text, matches }, + 'no moderation service setup to flag record text', + ) + return + } + await this.services.moderation(this.ctx.db).report({ + reasonType: REASONOTHER, + reason: `Automatically flagged for possible slurs: ${matches.join(', ')}`, + subject, + reportedBy: this.ctx.cfg.labelerDid, + }) + } + async checkImgForTakedown(uri: AtUri, recordCid: CID, imgCids: CID[]) { if (imgCids.length < 0) return const results = await Promise.all( - imgCids.map((cid) => this.takedownFlagger?.scanImage(uri.host, cid)), + imgCids.map((cid) => this.imageFlagger?.scanImage(uri.host, cid)), ) const takedownCids: CID[] = [] for (let i = 0; i < results.length; i++) { diff --git a/packages/bsky/src/auto-moderator/util.ts b/packages/bsky/src/auto-moderator/util.ts index ba49eb2a9f3..ab1467a07f2 100644 --- a/packages/bsky/src/auto-moderator/util.ts +++ b/packages/bsky/src/auto-moderator/util.ts @@ -1,7 +1,22 @@ import { CID } from 'multiformats/cid' +import { AtUri } from '@atproto/syntax' import * as lex from '../lexicon/lexicons' -import { Record as PostRecord } from '../lexicon/types/app/bsky/feed/post' -import { Record as ProfileRecord } from '../lexicon/types/app/bsky/actor/profile' +import { + isRecord as isPost, + Record as PostRecord, +} from '../lexicon/types/app/bsky/feed/post' +import { + isRecord as isProfile, + Record as ProfileRecord, +} from '../lexicon/types/app/bsky/actor/profile' +import { + isRecord as isList, + Record as ListRecord, +} from '../lexicon/types/app/bsky/graph/list' +import { + isRecord as isGenerator, + Record as GeneratorRecord, +} from '../lexicon/types/app/bsky/feed/generator' import { isMain as isEmbedImage } from '../lexicon/types/app/bsky/embed/images' import { isMain as isEmbedExternal } from '../lexicon/types/app/bsky/embed/external' import { isMain as isEmbedRecordWithMedia } from '../lexicon/types/app/bsky/embed/recordWithMedia' @@ -11,11 +26,18 @@ type RecordFields = { imgs: CID[] } -export const getFieldsFromRecord = (record: unknown): RecordFields => { +export const getFieldsFromRecord = ( + record: unknown, + uri: AtUri, +): RecordFields => { if (isPost(record)) { return getFieldsFromPost(record) } else if (isProfile(record)) { return getFieldsFromProfile(record) + } else if (isList(record)) { + return getFieldsFromList(record) + } else if (isGenerator(record)) { + return getFieldsFromGenerator(record, uri) } else { return { text: [], imgs: [] } } @@ -61,6 +83,40 @@ export const getFieldsFromProfile = (record: ProfileRecord): RecordFields => { return { text, imgs } } +export const getFieldsFromList = (record: ListRecord): RecordFields => { + const text: string[] = [] + const imgs: CID[] = [] + if (record.name) { + text.push(record.name) + } + if (record.description) { + text.push(record.description) + } + if (record.avatar) { + imgs.push(record.avatar.ref) + } + return { text, imgs } +} + +export const getFieldsFromGenerator = ( + record: GeneratorRecord, + uri: AtUri, +): RecordFields => { + const text: string[] = [] + const imgs: CID[] = [] + text.push(uri.rkey) + if (record.displayName) { + text.push(record.displayName) + } + if (record.description) { + text.push(record.description) + } + if (record.avatar) { + imgs.push(record.avatar.ref) + } + return { text, imgs } +} + export const dedupe = (strs: (string | undefined)[]): string[] => { const set = new Set() for (const str of strs) { @@ -71,23 +127,6 @@ export const dedupe = (strs: (string | undefined)[]): string[] => { return [...set] } -export const isPost = (obj: unknown): obj is PostRecord => { - return isRecordType(obj, 'app.bsky.feed.post') -} - -export const isProfile = (obj: unknown): obj is ProfileRecord => { - return isRecordType(obj, 'app.bsky.actor.profile') -} - -export const isRecordType = (obj: unknown, lexId: string): boolean => { - try { - lex.lexicons.assertValidRecord(lexId, obj) - return true - } catch { - return false - } -} - const separateEmbeds = (embed: PostRecord['embed']) => { if (!embed) { return [] diff --git a/packages/bsky/src/indexer/config.ts b/packages/bsky/src/indexer/config.ts index 579cfd24432..dd8b9ab89d5 100644 --- a/packages/bsky/src/indexer/config.ts +++ b/packages/bsky/src/indexer/config.ts @@ -18,6 +18,8 @@ export interface IndexerConfigValues { abyssEndpoint?: string abyssPassword?: string imgUriEndpoint?: string + fuzzyMatchB64?: string + fuzzyFalsePositiveB64?: string labelerKeywords: Record moderationPushUrl?: string indexerConcurrency?: number @@ -90,6 +92,9 @@ export class IndexerConfig { const ingesterPartitionCount = maybeParseInt(process.env.INGESTER_PARTITION_COUNT) ?? 64 const labelerKeywords = {} + const fuzzyMatchB64 = process.env.FUZZY_MATCH_B64 || undefined + const fuzzyFalsePositiveB64 = + process.env.FUZZY_FALSE_POSITIVE_B64 || undefined const pushNotificationEndpoint = process.env.PUSH_NOTIFICATION_ENDPOINT assert(dbPostgresUrl) assert(redisHost || (redisSentinelName && redisSentinelHosts?.length)) @@ -120,6 +125,8 @@ export class IndexerConfig { indexerPort, ingesterPartitionCount, labelerKeywords, + fuzzyMatchB64, + fuzzyFalsePositiveB64, pushNotificationEndpoint, ...stripUndefineds(overrides ?? {}), }) @@ -225,6 +232,14 @@ export class IndexerConfig { return this.cfg.labelerKeywords } + get fuzzyMatchB64() { + return this.cfg.fuzzyMatchB64 + } + + get fuzzyFalsePositiveB64() { + return this.cfg.fuzzyFalsePositiveB64 + } + get pushNotificationEndpoint() { return this.cfg.pushNotificationEndpoint } diff --git a/packages/bsky/src/services/indexing/index.ts b/packages/bsky/src/services/indexing/index.ts index ff979ee034f..33f0a2577c2 100644 --- a/packages/bsky/src/services/indexing/index.ts +++ b/packages/bsky/src/services/indexing/index.ts @@ -165,6 +165,10 @@ export class IndexingService { .onConflict((oc) => oc.column('did').doUpdateSet(actorInfo)) .returning('did') .executeTakeFirst() + + if (handle) { + this.autoMod.processHandle(handle, did) + } } async indexRepo(did: string, commit?: string) { diff --git a/packages/pds/tests/content-reporter.test.ts b/packages/bsky/tests/auto-moderator/fuzzy-matcher.test.ts similarity index 67% rename from packages/pds/tests/content-reporter.test.ts rename to packages/bsky/tests/auto-moderator/fuzzy-matcher.test.ts index e4a25d68f05..f9fc320dcb9 100644 --- a/packages/pds/tests/content-reporter.test.ts +++ b/packages/bsky/tests/auto-moderator/fuzzy-matcher.test.ts @@ -1,23 +1,29 @@ -import { encode } from '../src/content-reporter' -import { TestNetworkNoAppView } from '@atproto/dev-env' -import { SeedClient } from './seeds/client' -import basicSeed from './seeds/basic' +import { FuzzyMatcher, encode } from '../../src/auto-moderator/fuzzy-matcher' +import { TestNetwork } from '@atproto/dev-env' +import { SeedClient } from '../seeds/client' +import basicSeed from '../seeds/basic' import { AtpAgent } from '@atproto/api' +import { ImageInvalidator } from '../../src/image/invalidator' -describe('content reporter', () => { - let network: TestNetworkNoAppView +describe('fuzzy matcher', () => { + let network: TestNetwork let agent: AtpAgent let sc: SeedClient + let fuzzyMatcher: FuzzyMatcher let alice: string beforeAll(async () => { - network = await TestNetworkNoAppView.create({ - dbPostgresSchema: 'content_reporter', - pds: { - unacceptableWordsB64: encode(['evil']), + network = await TestNetwork.create({ + dbPostgresSchema: 'fuzzy_matcher', + bsky: { + imgInvalidator: new NoopInvalidator(), + indexer: { + fuzzyMatchB64: encode(['evil']), + }, }, }) + fuzzyMatcher = new FuzzyMatcher(['evil', 'mean', 'bad'], ['baddie']) agent = network.pds.getClient() sc = new SeedClient(agent) await basicSeed(sc) @@ -30,13 +36,30 @@ describe('content reporter', () => { }) const getAllReports = () => { - return network.pds.ctx.db.db - .selectFrom('moderation_report') + return network.bsky.ctx.db + .getPrimary() + .db.selectFrom('moderation_report') .selectAll() .orderBy('id', 'asc') .execute() } + it('identifies fuzzy matches', () => { + expect(fuzzyMatcher.getMatches('evil.john.test')).toMatchObject(['evil']) + expect(fuzzyMatcher.getMatches('john.evil.test')).toMatchObject(['evil']) + expect(fuzzyMatcher.getMatches('john.test.evil')).toMatchObject(['evil']) + expect(fuzzyMatcher.getMatches('ev1l.test.john')).toMatchObject(['evil']) + expect(fuzzyMatcher.getMatches('ev-1l.test.john')).toMatchObject(['evil']) + expect(fuzzyMatcher.getMatches('ev-11.test.john')).toMatchObject(['evil']) + expect(fuzzyMatcher.getMatches('ev.-1.l-test.john')).toMatchObject(['evil']) + }) + + it('identifies fuzzy false positivies', () => { + expect(fuzzyMatcher.getMatches('john.test')).toHaveLength(0) + expect(fuzzyMatcher.getMatches('good.john.test')).toHaveLength(0) + expect(fuzzyMatcher.getMatches('john.baddie.test')).toHaveLength(0) + }) + it('doesnt label any of the content in the seed', async () => { const reports = await getAllReports() expect(reports.length).toBe(0) @@ -62,7 +85,7 @@ describe('content reporter', () => { }, { headers: sc.getHeaders(alice), encoding: 'application/json' }, ) - await network.pds.ctx.backgroundQueue.processAll() + await network.processAll() const reports = await getAllReports() expect(reports.length).toBe(2) @@ -136,3 +159,7 @@ describe('content reporter', () => { expect(reports.at(-1)?.subjectCid).toEqual(res.data.cid) }) }) + +class NoopInvalidator implements ImageInvalidator { + async invalidate() {} +} diff --git a/packages/bsky/tests/auto-moderator/takedowns.test.ts b/packages/bsky/tests/auto-moderator/takedowns.test.ts index 0fa4cbf730c..27b0c986115 100644 --- a/packages/bsky/tests/auto-moderator/takedowns.test.ts +++ b/packages/bsky/tests/auto-moderator/takedowns.test.ts @@ -7,7 +7,7 @@ import { TestNetwork } from '@atproto/dev-env' import { ImageRef, SeedClient } from '../seeds/client' import usersSeed from '../seeds/users' import { CID } from 'multiformats/cid' -import { TakedownFlagger } from '../../src/auto-moderator/abyss' +import { ImageFlagger } from '../../src/auto-moderator/abyss' import { ImageInvalidator } from '../../src/image/invalidator' import { sha256 } from '@atproto/crypto' import { ids } from '../../src/lexicon/lexicons' @@ -38,7 +38,7 @@ describe('takedowner', () => { }) ctx = network.bsky.indexer.ctx autoMod = ctx.autoMod - autoMod.takedownFlagger = new TestFlagger() + autoMod.imageFlagger = new TestFlagger() pdsAgent = new AtpAgent({ service: network.pds.url }) sc = new SeedClient(pdsAgent) await usersSeed(sc) @@ -156,7 +156,7 @@ class TestInvalidator implements ImageInvalidator { } } -class TestFlagger implements TakedownFlagger { +class TestFlagger implements ImageFlagger { async scanImage(_did: string, cid: CID): Promise { if (cid.equals(badCid1)) { return ['kill-it'] diff --git a/packages/dev-env/src/bsky.ts b/packages/dev-env/src/bsky.ts index 15ea03375e2..a99385b755b 100644 --- a/packages/dev-env/src/bsky.ts +++ b/packages/dev-env/src/bsky.ts @@ -95,6 +95,7 @@ export class TestBsky { labelerKeywords: { label_me: 'test-label', label_me_2: 'test-label-2' }, abyssEndpoint: '', abyssPassword: '', + imgUriEndpoint: 'img.example.com', moderationPushUrl: `http://admin:${config.adminPassword}@localhost:${cfg.pdsPort}`, indexerPartitionIds: [0], indexerNamespace: `ns${ns}`, @@ -102,6 +103,7 @@ export class TestBsky { indexerPort: await getPort(), ingesterPartitionCount: 1, pushNotificationEndpoint: 'https://push.bsky.app/api/push', + ...(cfg.indexer ?? {}), }) assert(indexerCfg.redisHost) const indexerRedis = new bsky.Redis({ @@ -124,6 +126,7 @@ export class TestBsky { ingesterNamespace: `ns${ns}`, ingesterSubLockId: uniqueLockId(), ingesterPartitionCount: 1, + ...(cfg.ingester ?? {}), }) assert(ingesterCfg.redisHost) const ingesterRedis = new bsky.Redis({ diff --git a/packages/dev-env/src/types.ts b/packages/dev-env/src/types.ts index ef8f357ee73..0aac4f3aa25 100644 --- a/packages/dev-env/src/types.ts +++ b/packages/dev-env/src/types.ts @@ -24,6 +24,8 @@ export type BskyConfig = Partial & { imgInvalidator?: ImageInvalidator migration?: string algos?: bsky.MountedAlgos + indexer?: Partial + ingester?: Partial } export type TestServerParams = { diff --git a/packages/pds/src/api/com/atproto/identity/updateHandle.ts b/packages/pds/src/api/com/atproto/identity/updateHandle.ts index ccf8e56b1bd..6db63fab0c0 100644 --- a/packages/pds/src/api/com/atproto/identity/updateHandle.ts +++ b/packages/pds/src/api/com/atproto/identity/updateHandle.ts @@ -73,8 +73,6 @@ export default function (server: Server, ctx: AppContext) { 'failed to sequence handle update', ) } - - ctx.contentReporter?.checkHandle({ handle, did: requester }) }, }) } diff --git a/packages/pds/src/api/com/atproto/server/createAccount.ts b/packages/pds/src/api/com/atproto/server/createAccount.ts index 313f7ab5cd8..5827ff6c658 100644 --- a/packages/pds/src/api/com/atproto/server/createAccount.ts +++ b/packages/pds/src/api/com/atproto/server/createAccount.ts @@ -111,8 +111,6 @@ export default function (server: Server, ctx: AppContext) { } }) - ctx.contentReporter?.checkHandle({ handle, did: result.did }) - return { encoding: 'application/json', body: { diff --git a/packages/pds/src/config.ts b/packages/pds/src/config.ts index 741fae22fde..c6a176bfe35 100644 --- a/packages/pds/src/config.ts +++ b/packages/pds/src/config.ts @@ -57,8 +57,6 @@ export interface ServerConfigValues { hiveApiKey?: string labelerDid: string labelerKeywords: Record - unacceptableWordsB64?: string - falsePositiveWordsB64?: string feedGenDid?: string @@ -189,13 +187,6 @@ export class ServerConfig { const labelerDid = process.env.LABELER_DID || 'did:example:labeler' const labelerKeywords = {} - const unacceptableWordsB64 = nonemptyString( - process.env.UNACCEPTABLE_WORDS_B64, - ) - const falsePositiveWordsB64 = nonemptyString( - process.env.FALSE_POSITIVE_WORDS_B64, - ) - const feedGenDid = process.env.FEED_GEN_DID const dbPostgresUrl = process.env.DB_POSTGRES_URL @@ -285,8 +276,6 @@ export class ServerConfig { hiveApiKey, labelerDid, labelerKeywords, - unacceptableWordsB64, - falsePositiveWordsB64, feedGenDid, maxSubscriptionBuffer, repoBackfillLimitMs, @@ -505,14 +494,6 @@ export class ServerConfig { return this.cfg.labelerKeywords } - get unacceptableWordsB64() { - return this.cfg.unacceptableWordsB64 - } - - get falsePositiveWordsB64() { - return this.cfg.falsePositiveWordsB64 - } - get feedGenDid() { return this.cfg.feedGenDid } diff --git a/packages/pds/src/content-reporter/index.ts b/packages/pds/src/content-reporter/index.ts deleted file mode 100644 index 0f21fa6986f..00000000000 --- a/packages/pds/src/content-reporter/index.ts +++ /dev/null @@ -1,89 +0,0 @@ -import { AtUri } from '@atproto/syntax' -import { RepoRecord } from '@atproto/lexicon' -import { CID } from 'multiformats/cid' -import * as ui8 from 'uint8arrays' -import { UnacceptableWordValidator } from './validator' -import { REASONOTHER } from '../lexicon/types/com/atproto/moderation/defs' -import { isRecord as isList } from '../lexicon/types/app/bsky/graph/list' -import { isRecord as isProfile } from '../lexicon/types/app/bsky/actor/profile' -import { isRecord as isFeedGenerator } from '../lexicon/types/app/bsky/feed/generator' -import { BackgroundQueue } from '../event-stream/background-queue' -import { ModerationService } from '../services/moderation' - -export class ContentReporter { - backgroundQueue: BackgroundQueue - moderationService: ModerationService - reporterDid: string - validator: UnacceptableWordValidator - - constructor(opts: { - backgroundQueue: BackgroundQueue - moderationService: ModerationService - reporterDid: string - unacceptableB64: string - falsePositivesB64?: string - }) { - this.backgroundQueue = opts.backgroundQueue - this.moderationService = opts.moderationService - this.reporterDid = opts.reporterDid - this.validator = new UnacceptableWordValidator( - decode(opts.unacceptableB64), - opts.falsePositivesB64 ? decode(opts.falsePositivesB64) : undefined, - ) - } - - checkHandle(opts: { handle: string; did: string }) { - const { handle, did } = opts - return this.checkContent({ - content: handle, - subject: { did }, - }) - } - - checkRecord(opts: { record: RepoRecord; uri: AtUri; cid: CID }) { - const { record, uri, cid } = opts - let content = '' - if (isProfile(record)) { - content += ' ' + record.displayName - } else if (isList(record)) { - content += ' ' + record.name - } else if (isFeedGenerator(record)) { - content += ' ' + uri.rkey - content += ' ' + record.displayName - } - - return this.checkContent({ - content, - subject: { uri, cid }, - }) - } - - checkContent(opts: { - content: string - subject: { did: string } | { uri: AtUri; cid?: CID } - }) { - const { content, subject } = opts - const possibleSlurs = this.validator.getMatches(content) - if (possibleSlurs.length < 1) { - return - } - this.backgroundQueue.add(async () => { - await this.moderationService.report({ - reasonType: REASONOTHER, - reason: `Automatically flagged for possible slurs: ${possibleSlurs.join( - ', ', - )}`, - subject, - reportedBy: this.reporterDid, - }) - }) - } -} - -export const decode = (encoded: string): string[] => { - return ui8.toString(ui8.fromString(encoded, 'base64'), 'utf8').split(',') -} - -export const encode = (words: string[]): string => { - return ui8.toString(ui8.fromString(words.join(','), 'utf8'), 'base64') -} diff --git a/packages/pds/src/context.ts b/packages/pds/src/context.ts index 06740ea41e5..ca57730504b 100644 --- a/packages/pds/src/context.ts +++ b/packages/pds/src/context.ts @@ -21,7 +21,6 @@ import DidSqlCache from './did-cache' import { MountedAlgos } from './feed-gen/types' import { Crawlers } from './crawlers' import { LabelCache } from './label-cache' -import { ContentReporter } from './content-reporter' import { RuntimeFlags } from './runtime-flags' export class AppContext { @@ -46,7 +45,6 @@ export class AppContext { labeler: Labeler labelCache: LabelCache runtimeFlags: RuntimeFlags - contentReporter?: ContentReporter backgroundQueue: BackgroundQueue appviewAgent?: AtpAgent crawlers: Crawlers @@ -150,10 +148,6 @@ export class AppContext { return this.opts.runtimeFlags } - get contentReporter(): ContentReporter | undefined { - return this.opts.contentReporter - } - get backgroundQueue(): BackgroundQueue { return this.opts.backgroundQueue } diff --git a/packages/pds/src/content-reporter/explicit-slurs.ts b/packages/pds/src/handle/explicit-slurs.ts similarity index 100% rename from packages/pds/src/content-reporter/explicit-slurs.ts rename to packages/pds/src/handle/explicit-slurs.ts diff --git a/packages/pds/src/handle/index.ts b/packages/pds/src/handle/index.ts index f9f05c28c48..deae5409945 100644 --- a/packages/pds/src/handle/index.ts +++ b/packages/pds/src/handle/index.ts @@ -1,7 +1,7 @@ import * as ident from '@atproto/syntax' import { InvalidRequestError } from '@atproto/xrpc-server' import { reservedSubdomains } from './reserved' -import { hasExplicitSlur } from '../content-reporter/explicit-slurs' +import { hasExplicitSlur } from './explicit-slurs' import AppContext from '../context' export const normalizeAndValidateHandle = async (opts: { diff --git a/packages/pds/src/index.ts b/packages/pds/src/index.ts index 67552df8e3d..32abb30056d 100644 --- a/packages/pds/src/index.ts +++ b/packages/pds/src/index.ts @@ -52,8 +52,6 @@ import DidSqlCache from './did-cache' import { MountedAlgos } from './feed-gen/types' import { Crawlers } from './crawlers' import { LabelCache } from './label-cache' -import { ContentReporter } from './content-reporter' -import { ModerationService } from './services/moderation' import { getRedisClient } from './redis' import { RuntimeFlags } from './runtime-flags' @@ -200,23 +198,6 @@ export class PDS { const labelCache = new LabelCache(db) - let contentReporter: ContentReporter | undefined = undefined - if (config.unacceptableWordsB64) { - contentReporter = new ContentReporter({ - backgroundQueue, - moderationService: new ModerationService( - db, - messageDispatcher, - blobstore, - imgUriBuilder, - imgInvalidator, - ), - reporterDid: config.labelerDid, - unacceptableB64: config.unacceptableWordsB64, - falsePositivesB64: config.falsePositiveWordsB64, - }) - } - const appviewAgent = config.bskyAppViewEndpoint ? new AtpAgent({ service: config.bskyAppViewEndpoint }) : undefined @@ -229,7 +210,6 @@ export class PDS { imgInvalidator, labeler, labelCache, - contentReporter, appviewAgent, appviewDid: config.bskyAppViewDid, appviewCdnUrlPattern: config.bskyAppViewCdnUrlPattern, @@ -263,7 +243,6 @@ export class PDS { labeler, labelCache, runtimeFlags, - contentReporter, services, mailer, moderationMailer, diff --git a/packages/pds/src/repo/prepare.ts b/packages/pds/src/repo/prepare.ts index c93030d1e6f..60fbe2d81cd 100644 --- a/packages/pds/src/repo/prepare.ts +++ b/packages/pds/src/repo/prepare.ts @@ -33,7 +33,7 @@ import { } from '../lexicon/types/app/bsky/feed/post' import { isRecord as isList } from '../lexicon/types/app/bsky/graph/list' import { isRecord as isProfile } from '../lexicon/types/app/bsky/actor/profile' -import { hasExplicitSlur } from '../content-reporter/explicit-slurs' +import { hasExplicitSlur } from '../handle/explicit-slurs' import { InvalidRequestError } from '@atproto/xrpc-server' // @TODO do this dynamically off of schemas diff --git a/packages/pds/src/services/index.ts b/packages/pds/src/services/index.ts index 6767b1c535e..b49693cc8cf 100644 --- a/packages/pds/src/services/index.ts +++ b/packages/pds/src/services/index.ts @@ -19,7 +19,6 @@ import { LabelService } from '../app-view/services/label' import { BackgroundQueue } from '../event-stream/background-queue' import { Crawlers } from '../crawlers' import { LabelCache } from '../label-cache' -import { ContentReporter } from '../content-reporter' import { LocalService } from './local' export function createServices(resources: { @@ -30,7 +29,6 @@ export function createServices(resources: { imgInvalidator: ImageInvalidator labeler: Labeler labelCache: LabelCache - contentReporter?: ContentReporter appviewAgent?: AtpAgent appviewDid?: string appviewCdnUrlPattern?: string @@ -45,7 +43,6 @@ export function createServices(resources: { imgInvalidator, labeler, labelCache, - contentReporter, appviewAgent, appviewDid, appviewCdnUrlPattern, @@ -63,7 +60,6 @@ export function createServices(resources: { backgroundQueue, crawlers, labeler, - contentReporter, ), local: LocalService.creator( repoSigningKey, diff --git a/packages/pds/src/services/repo/index.ts b/packages/pds/src/services/repo/index.ts index 759d1c50c37..8b8db8eb6be 100644 --- a/packages/pds/src/services/repo/index.ts +++ b/packages/pds/src/services/repo/index.ts @@ -20,7 +20,6 @@ import { Labeler } from '../../labeler' import { wait } from '@atproto/common' import { BackgroundQueue } from '../../event-stream/background-queue' import { Crawlers } from '../../crawlers' -import { ContentReporter } from '../../content-reporter' export class RepoService { blobs: RepoBlobs @@ -33,7 +32,6 @@ export class RepoService { public backgroundQueue: BackgroundQueue, public crawlers: Crawlers, public labeler: Labeler, - public contentReporter?: ContentReporter, ) { this.blobs = new RepoBlobs(db, blobstore, backgroundQueue) } @@ -45,7 +43,6 @@ export class RepoService { backgroundQueue: BackgroundQueue, crawlers: Crawlers, labeler: Labeler, - contentReporter?: ContentReporter, ) { return (db: Database) => new RepoService( @@ -56,7 +53,6 @@ export class RepoService { backgroundQueue, crawlers, labeler, - contentReporter, ) } @@ -77,7 +73,6 @@ export class RepoService { this.backgroundQueue, this.crawlers, this.labeler, - this.contentReporter, ) return fn(srvc) }) @@ -306,7 +301,6 @@ export class RepoService { ) { // @TODO move to appview this.labeler.processRecord(write.uri, write.record) - this.contentReporter?.checkRecord(write) } }) }) diff --git a/packages/pds/tests/handle-validation.test.ts b/packages/pds/tests/handle-validation.test.ts index bf55d61e49f..c39f7db18de 100644 --- a/packages/pds/tests/handle-validation.test.ts +++ b/packages/pds/tests/handle-validation.test.ts @@ -1,6 +1,5 @@ import { isValidTld } from '@atproto/syntax' import { ensureHandleServiceConstraints } from '../src/handle' -import { UnacceptableWordValidator } from '../src/content-reporter/validator' describe('handle validation', () => { it('validates service constraints', () => { @@ -26,25 +25,4 @@ describe('handle validation', () => { expect(isValidTld('atproto.onion')).toBe(false) expect(isValidTld('atproto.internal')).toBe(false) }) - - const validator = new UnacceptableWordValidator( - ['evil', 'mean', 'bad'], - ['baddie'], - ) - - it('identifies offensive handles', () => { - expect(validator.getMatches('evil.john.test')).toMatchObject(['evil']) - expect(validator.getMatches('john.evil.test')).toMatchObject(['evil']) - expect(validator.getMatches('john.test.evil')).toMatchObject(['evil']) - expect(validator.getMatches('ev1l.test.john')).toMatchObject(['evil']) - expect(validator.getMatches('ev-1l.test.john')).toMatchObject(['evil']) - expect(validator.getMatches('ev-11.test.john')).toMatchObject(['evil']) - expect(validator.getMatches('ev.-1.l-test.john')).toMatchObject(['evil']) - }) - - it('identifies non-offensive handles', () => { - expect(validator.getMatches('john.test')).toHaveLength(0) - expect(validator.getMatches('good.john.test')).toHaveLength(0) - expect(validator.getMatches('john.baddie.test')).toHaveLength(0) - }) })