diff --git a/packages/ozone/src/api/moderation/emitEvent.ts b/packages/ozone/src/api/moderation/emitEvent.ts index 8b3a2814b3c..7b538150e1c 100644 --- a/packages/ozone/src/api/moderation/emitEvent.ts +++ b/packages/ozone/src/api/moderation/emitEvent.ts @@ -14,7 +14,7 @@ import { } from '../../lexicon/types/tools/ozone/moderation/defs' import { HandlerInput } from '../../lexicon/types/tools/ozone/moderation/emitEvent' import { subjectFromInput } from '../../mod-service/subject' -import { ModerationLangService } from '../../mod-service/lang' +import { TagService } from '../../tag-service' import { retryHttp } from '../../util' import { ModeratorOutput, AdminTokenOutput } from '../../auth-verifier' @@ -137,12 +137,13 @@ const handleModerationEvent = async ({ createdBy, }) - const moderationLangService = new ModerationLangService(moderationTxn) - await moderationLangService.tagSubjectWithLang({ + const tagService = new TagService( subject, - createdBy: ctx.cfg.service.did, - subjectStatus: result.subjectStatus, - }) + result.subjectStatus, + ctx.cfg.service.did, + moderationTxn, + ) + await tagService.evaluateForSubject() if (subject.isRepo()) { if (isTakedownEvent) { diff --git a/packages/ozone/src/api/report/createReport.ts b/packages/ozone/src/api/report/createReport.ts index 4d939b0983e..dc76b22763a 100644 --- a/packages/ozone/src/api/report/createReport.ts +++ b/packages/ozone/src/api/report/createReport.ts @@ -4,7 +4,7 @@ import { getReasonType } from '../util' import { subjectFromInput } from '../../mod-service/subject' import { REASONAPPEAL } from '../../lexicon/types/com/atproto/moderation/defs' import { ForbiddenError } from '@atproto/xrpc-server' -import { ModerationLangService } from '../../mod-service/lang' +import { TagService } from '../../tag-service' export default function (server: Server, ctx: AppContext) { server.com.atproto.moderation.createReport({ @@ -31,12 +31,13 @@ export default function (server: Server, ctx: AppContext) { reportedBy: requester || ctx.cfg.service.did, }) - const moderationLangService = new ModerationLangService(moderationTxn) - await moderationLangService.tagSubjectWithLang({ + const tagService = new TagService( subject, subjectStatus, - createdBy: ctx.cfg.service.did, - }) + ctx.cfg.service.did, + moderationTxn, + ) + await tagService.evaluateForSubject() return reportEvent }) diff --git a/packages/ozone/src/tag-service/content-tagger.ts b/packages/ozone/src/tag-service/content-tagger.ts new file mode 100644 index 00000000000..3e6b1b1a0a1 --- /dev/null +++ b/packages/ozone/src/tag-service/content-tagger.ts @@ -0,0 +1,30 @@ +import { ModerationService } from '../mod-service' +import { ModSubject } from '../mod-service/subject' +import { ModerationSubjectStatusRow } from '../mod-service/types' + +export abstract class ContentTagger { + constructor( + protected subject: ModSubject, + protected subjectStatus: ModerationSubjectStatusRow | null, + protected moderationService: ModerationService, + ) {} + + protected abstract tagPrefix: string + + protected abstract isApplicable(): boolean + protected abstract buildTags(): Promise + + async getTags(): Promise { + if (!this.isApplicable()) { + return [] + } + + return this.buildTags() + } + + protected tagAlreadyExists(): boolean { + return Boolean( + this.subjectStatus?.tags?.some((tag) => tag.startsWith(this.tagPrefix)), + ) + } +} diff --git a/packages/ozone/src/tag-service/embed-tagger.ts b/packages/ozone/src/tag-service/embed-tagger.ts new file mode 100644 index 00000000000..11c4e07af21 --- /dev/null +++ b/packages/ozone/src/tag-service/embed-tagger.ts @@ -0,0 +1,68 @@ +import { + AppBskyEmbedImages, + AppBskyEmbedRecordWithMedia, + AppBskyFeedPost, + AppBskyEmbedVideo, + AppBskyEmbedExternal, +} from '@atproto/api' +import { langLogger as log } from '../logger' +import { ContentTagger } from './content-tagger' +import { ids } from '../lexicon/lexicons' + +export class EmbedTagger extends ContentTagger { + tagPrefix = 'embed:' + + isApplicable(): boolean { + return ( + !!this.subjectStatus && + !this.tagAlreadyExists() && + this.subject.isRecord() && + this.subject.parsedUri.collection === ids.AppBskyFeedPost + ) + } + + async buildTags(): Promise { + try { + const recordValue = await this.getRecordValue() + if (!recordValue) { + return [] + } + const tags: string[] = [] + if (AppBskyFeedPost.isRecord(recordValue)) { + const embedContent = AppBskyEmbedRecordWithMedia.isMain( + recordValue.embed, + ) + ? recordValue.embed.media + : recordValue.embed + + if (AppBskyEmbedImages.isMain(embedContent)) { + tags.push(`${this.tagPrefix}image`) + } + + if (AppBskyEmbedVideo.isMain(embedContent)) { + tags.push(`${this.tagPrefix}video`) + } + + if (AppBskyEmbedExternal.isMain(embedContent)) { + tags.push(`${this.tagPrefix}external`) + } + } + return tags + } catch (err) { + log.error({ subject: this.subject, err }, 'Error getting record langs') + return [] + } + } + + async getRecordValue(): Promise | undefined> { + if (!this.subject.isRecord()) { + return undefined + } + const recordByUri = await this.moderationService.views.fetchRecords([ + this.subject, + ]) + + const record = recordByUri.get(this.subject.uri) + return record?.value + } +} diff --git a/packages/ozone/src/tag-service/index.ts b/packages/ozone/src/tag-service/index.ts new file mode 100644 index 00000000000..def3f6193f7 --- /dev/null +++ b/packages/ozone/src/tag-service/index.ts @@ -0,0 +1,59 @@ +import { ModerationService } from '../mod-service' +import { ModSubject } from '../mod-service/subject' +import { langLogger as log } from '../logger' +import { ContentTagger } from './content-tagger' +import { LanguageTagger } from './language-tagger' +import { EmbedTagger } from './embed-tagger' +import { ModerationSubjectStatusRow } from '../mod-service/types' + +export class TagService { + private taggers: ContentTagger[] + + constructor( + private subject: ModSubject, + protected subjectStatus: ModerationSubjectStatusRow | null, + private taggerDid: string, + private moderationService: ModerationService, + ) { + this.taggers = [ + new LanguageTagger(subject, subjectStatus, moderationService), + new EmbedTagger(subject, subjectStatus, moderationService), + // Add more taggers as needed + ] + } + + async evaluateForSubject() { + try { + const tags: string[] = [] + + await Promise.all( + this.taggers.map(async (tagger) => { + try { + const newTags = await tagger.getTags() + if (newTags.length) tags.push(...newTags) + } catch (e) { + // Don't let one tagger error stop the rest from running + log.error( + { subject: this.subject, err: e }, + 'Error applying tagger', + ) + } + }), + ) + + if (tags.length > 0) { + await this.moderationService.logEvent({ + event: { + $type: 'tools.ozone.moderation.defs#modEventTag', + add: tags, + remove: [], + }, + subject: this.subject, + createdBy: this.taggerDid, + }) + } + } catch (err) { + log.error({ subject: this.subject, err }, 'Error tagging subject') + } + } +} diff --git a/packages/ozone/src/mod-service/lang-data.ts b/packages/ozone/src/tag-service/language-data.ts similarity index 100% rename from packages/ozone/src/mod-service/lang-data.ts rename to packages/ozone/src/tag-service/language-data.ts diff --git a/packages/ozone/src/mod-service/lang.ts b/packages/ozone/src/tag-service/language-tagger.ts similarity index 57% rename from packages/ozone/src/mod-service/lang.ts rename to packages/ozone/src/tag-service/language-tagger.ts index 6c17f11ad1e..e11ea3ec93e 100644 --- a/packages/ozone/src/mod-service/lang.ts +++ b/packages/ozone/src/tag-service/language-tagger.ts @@ -5,46 +5,26 @@ import { AppBskyGraphList, } from '@atproto/api' -import { ModerationService } from '.' -import { ModSubject } from './subject' -import { ModerationSubjectStatusRow } from './types' import { langLogger as log } from '../logger' -import { code3ToCode2 } from './lang-data' +import { code3ToCode2 } from './language-data' +import { ContentTagger } from './content-tagger' -export class ModerationLangService { - constructor(private moderationService: ModerationService) {} +export class LanguageTagger extends ContentTagger { + tagPrefix = 'lang:' - async tagSubjectWithLang({ - subject, - subjectStatus, - createdBy, - }: { - subject: ModSubject - createdBy: string - subjectStatus: ModerationSubjectStatusRow | null - }) { - if ( - subjectStatus && - !subjectStatus.tags?.find((tag) => tag.includes('lang:')) - ) { - try { - const recordLangs = await this.getRecordLang({ - subject, - }) - await this.moderationService.logEvent({ - event: { - $type: 'tools.ozone.moderation.defs#modEventTag', - add: recordLangs - ? recordLangs.map((lang) => `lang:${lang}`) - : ['lang:und'], - remove: [], - }, - subject, - createdBy, - }) - } catch (err) { - log.error({ subject, err }, 'Error getting record langs') - } + isApplicable(): boolean { + return !!this.subjectStatus && !this.tagAlreadyExists() + } + + async buildTags(): Promise { + try { + const recordLangs = await this.getRecordLang() + return recordLangs + ? recordLangs.map((lang) => `${this.tagPrefix}${lang}`) + : [`${this.tagPrefix}und`] + } catch (err) { + log.error({ subject: this.subject, err }, 'Error getting record langs') + return [] } } @@ -65,20 +45,16 @@ export class ModerationLangService { return text?.trim() } - async getRecordLang({ - subject, - }: { - subject: ModSubject - }): Promise { - const isRecord = subject.isRecord() + async getRecordLang(): Promise { const langs = new Set() if ( - subject.isRepo() || - (isRecord && subject.uri.endsWith('/app.bsky.actor.profile/self')) + this.subject.isRepo() || + (this.subject.isRecord() && + this.subject.uri.endsWith('/app.bsky.actor.profile/self')) ) { const feed = await this.moderationService.views.fetchAuthorFeed( - subject.did, + this.subject.did, ) feed.forEach((item) => { const itemLangs = item.post.record['langs'] as string[] | null @@ -89,11 +65,11 @@ export class ModerationLangService { }) } - if (isRecord) { + if (this.subject.isRecord()) { const recordByUri = await this.moderationService.views.fetchRecords([ - subject, + this.subject, ]) - const record = recordByUri.get(subject.uri) + const record = recordByUri.get(this.subject.uri) const recordLang = record?.value.langs as string[] | null const recordText = this.getTextFromRecord(record?.value) if (recordLang?.length) { diff --git a/packages/ozone/tests/content-tagger.test.ts b/packages/ozone/tests/content-tagger.test.ts new file mode 100644 index 00000000000..1bb21e2f05d --- /dev/null +++ b/packages/ozone/tests/content-tagger.test.ts @@ -0,0 +1,141 @@ +import { + ModeratorClient, + SeedClient, + TestNetwork, + basicSeed, +} from '@atproto/dev-env' +import { REASONSPAM } from '../src/lexicon/types/com/atproto/moderation/defs' + +describe('moderation subject content tagging', () => { + let network: TestNetwork + let sc: SeedClient + let modClient: ModeratorClient + + beforeAll(async () => { + network = await TestNetwork.create({ + dbPostgresSchema: 'ozone_content_tagger_test', + }) + sc = network.getSeedClient() + modClient = network.ozone.getModClient() + await basicSeed(sc) + await network.processAll() + }) + + afterAll(async () => { + await network.close() + }) + + const getStatus = async (subject: string) => { + const { subjectStatuses } = await modClient.queryStatuses({ + subject, + }) + + return subjectStatuses[0] + } + + describe('lang tagger', () => { + it('Adds language tag to post from text', async () => { + const createPostAndReport = async (text: string) => { + const post = await sc.post(sc.dids.carol, text) + await network.processAll() + const report = await sc.createReport({ + reasonType: REASONSPAM, + subject: { + $type: 'com.atproto.repo.strongRef', + uri: post.ref.uriStr, + cid: post.ref.cidStr, + }, + reportedBy: sc.dids.alice, + }) + + return { post, report } + } + const [japanesePost, greekPost] = await Promise.all([ + createPostAndReport('Xで有名な人達+反AIや絵描きによくない'), + createPostAndReport( + 'Λορεμ ιπσθμ δολορ σιτ αμετ, μει θτ vιδιτ νοστρθμ προπριαε', + ), + ]) + + const [japanesePostStatus, greekPostStatus] = await Promise.all([ + getStatus(japanesePost.post.ref.uriStr), + getStatus(greekPost.post.ref.uriStr), + ]) + + expect(japanesePostStatus.tags).toContain('lang:ja') + expect(greekPostStatus.tags).toContain('lang:el') + }) + + it('Uses name/description text for language tag for list', async () => { + const createListAndReport = async ( + name: string, + description?: string, + ) => { + const list = await sc.createList(sc.dids.carol, name, 'mod', { + description, + }) + await network.processAll() + const report = await sc.createReport({ + reasonType: REASONSPAM, + subject: { + $type: 'com.atproto.repo.strongRef', + uri: list.uriStr, + cid: list.cidStr, + }, + reportedBy: sc.dids.alice, + }) + return { list, report } + } + + const [listWithDescription, listWithoutDescription] = await Promise.all([ + createListAndReport( + 'よくない', + 'Xで有名な人達+反AIや絵描きによくない感情を持つ人達+絵描き詐称', + ), + createListAndReport('人達+反AIや絵描きによくない感情'), + ]) + + const [japaneseListStatus, chineseListStatus] = await Promise.all([ + getStatus(listWithDescription.list.uriStr), + getStatus(listWithoutDescription.list.uriStr), + ]) + + expect(japaneseListStatus.tags).toContain('lang:ja') + expect(chineseListStatus.tags).toContain('lang:ja') + }) + }) + + describe('embed tagger', () => { + it('Adds image tag to post with image', async () => { + const postWithImageMediaEmbed = sc.posts[sc.dids.carol][0] + const postWithImageEmbed = sc.replies[sc.dids.bob][0] + await Promise.all([ + sc.createReport({ + reasonType: REASONSPAM, + subject: { + $type: 'com.atproto.repo.strongRef', + uri: postWithImageMediaEmbed.ref.uriStr, + cid: postWithImageMediaEmbed.ref.cidStr, + }, + reportedBy: sc.dids.alice, + }), + sc.createReport({ + reasonType: REASONSPAM, + subject: { + $type: 'com.atproto.repo.strongRef', + uri: postWithImageEmbed.ref.uriStr, + cid: postWithImageEmbed.ref.cidStr, + }, + reportedBy: sc.dids.alice, + }), + ]) + + const [mediaImagePostStatus, imagePostStatus] = await Promise.all([ + getStatus(postWithImageMediaEmbed.ref.uriStr), + getStatus(postWithImageEmbed.ref.uriStr), + ]) + expect(mediaImagePostStatus.tags).toContain('embed:image') + expect(imagePostStatus.tags).toContain('embed:image') + }) + }) +}) diff --git a/packages/ozone/tests/lang.test.ts b/packages/ozone/tests/lang.test.ts deleted file mode 100644 index ed4664b3dbd..00000000000 --- a/packages/ozone/tests/lang.test.ts +++ /dev/null @@ -1,106 +0,0 @@ -import { - ModeratorClient, - SeedClient, - TestNetwork, - basicSeed, -} from '@atproto/dev-env' -import { REASONSPAM } from '../src/lexicon/types/com/atproto/moderation/defs' - -describe('moderation status language tagging', () => { - let network: TestNetwork - let sc: SeedClient - let modClient: ModeratorClient - - beforeAll(async () => { - network = await TestNetwork.create({ - dbPostgresSchema: 'ozone_lang_test', - ozone: { - blobDivertUrl: `https://blob-report.com`, - blobDivertAdminPassword: 'test-auth-token', - }, - }) - sc = network.getSeedClient() - modClient = network.ozone.getModClient() - await basicSeed(sc) - await network.processAll() - }) - - afterAll(async () => { - await network.close() - }) - - const getStatus = async (subject: string) => { - const { subjectStatuses } = await modClient.queryStatuses({ - subject, - }) - - return subjectStatuses[0] - } - - it('Adds language tag to post from text', async () => { - const createPostAndReport = async (text: string) => { - const post = await sc.post(sc.dids.carol, text) - await network.processAll() - const report = await sc.createReport({ - reasonType: REASONSPAM, - subject: { - $type: 'com.atproto.repo.strongRef', - uri: post.ref.uriStr, - cid: post.ref.cidStr, - }, - reportedBy: sc.dids.alice, - }) - - return { post, report } - } - const [japanesePost, greekPost] = await Promise.all([ - createPostAndReport('Xで有名な人達+反AIや絵描きによくない'), - createPostAndReport( - 'Λορεμ ιπσθμ δολορ σιτ αμετ, μει θτ vιδιτ νοστρθμ προπριαε', - ), - ]) - - const [japanesePostStatus, greekPostStatus] = await Promise.all([ - getStatus(japanesePost.post.ref.uriStr), - getStatus(greekPost.post.ref.uriStr), - ]) - - expect(japanesePostStatus.tags).toContain('lang:ja') - expect(greekPostStatus.tags).toContain('lang:el') - }) - - it('Uses name/description text for language tag for list', async () => { - const createListAndReport = async (name: string, description?: string) => { - const list = await sc.createList(sc.dids.carol, name, 'mod', { - description, - }) - await network.processAll() - const report = await sc.createReport({ - reasonType: REASONSPAM, - subject: { - $type: 'com.atproto.repo.strongRef', - uri: list.uriStr, - cid: list.cidStr, - }, - reportedBy: sc.dids.alice, - }) - return { list, report } - } - - const [listWithDescription, listWithoutDescription] = await Promise.all([ - createListAndReport( - 'よくない', - 'Xで有名な人達+反AIや絵描きによくない感情を持つ人達+絵描き詐称', - ), - createListAndReport('人達+反AIや絵描きによくない感情'), - ]) - - const [japaneseListStatus, chineseListStatus] = await Promise.all([ - getStatus(listWithDescription.list.uriStr), - getStatus(listWithoutDescription.list.uriStr), - ]) - - expect(japaneseListStatus.tags).toContain('lang:ja') - expect(chineseListStatus.tags).toContain('lang:ja') - }) -})