From abc6f82da38abef2b1bbe8d9e41a0534a5418c9e Mon Sep 17 00:00:00 2001 From: Eric Bailey Date: Tue, 19 Mar 2024 15:59:20 -0500 Subject: [PATCH] Handle apostrophes and other punctuation when muting words (#2344) Support muted words with apostrophes/punct --- .changeset/big-houses-talk.md | 5 ++ packages/api/src/moderation/mutewords.ts | 38 ++------- packages/api/tests/bsky-agent.test.ts | 7 ++ .../api/tests/moderation-mutewords.test.ts | 82 ++++++++++++++++++- 4 files changed, 100 insertions(+), 32 deletions(-) create mode 100644 .changeset/big-houses-talk.md diff --git a/.changeset/big-houses-talk.md b/.changeset/big-houses-talk.md new file mode 100644 index 00000000000..8508ce90a21 --- /dev/null +++ b/.changeset/big-houses-talk.md @@ -0,0 +1,5 @@ +--- +'@atproto/api': patch +--- + +Support muting words that contain apostrophes and other punctuation diff --git a/packages/api/src/moderation/mutewords.ts b/packages/api/src/moderation/mutewords.ts index ef776b09460..8988f3dc2b6 100644 --- a/packages/api/src/moderation/mutewords.ts +++ b/packages/api/src/moderation/mutewords.ts @@ -82,38 +82,16 @@ export function hasMutedWord({ if (mutedWord === wordTrimmedPunctuation) return true if (mutedWord.length > wordTrimmedPunctuation.length) continue - // handle hyphenated, slash separated words, etc - if (REGEX.SEPARATORS.test(wordTrimmedPunctuation)) { - // check against full normalized phrase - const wordNormalizedSeparators = wordTrimmedPunctuation.replace( - REGEX.SEPARATORS, - ' ', - ) - const mutedWordNormalizedSeparators = mutedWord.replace( - REGEX.SEPARATORS, - ' ', - ) - // hyphenated (or other sep) to spaced words - if (wordNormalizedSeparators === mutedWordNormalizedSeparators) - return true + if (/\p{P}+/u.test(wordTrimmedPunctuation)) { + const spacedWord = wordTrimmedPunctuation.replace(/\p{P}+/gu, ' ') + if (spacedWord === mutedWord) return true - /* Disabled for now e.g. `super-cool` to `supercool` - const wordNormalizedCompressed = wordNormalizedSeparators.replace( - REGEX.WORD_BOUNDARY, - '', - ) - const mutedWordNormalizedCompressed = - mutedWordNormalizedSeparators.replace(/\s+?/g, '') - // hyphenated (or other sep) to non-hyphenated contiguous word - if (mutedWordNormalizedCompressed === wordNormalizedCompressed) - return true - */ + const contiguousWord = spacedWord.replace(/\s/gu, '') + if (contiguousWord === mutedWord) return true - // then individual parts of separated phrases/words - const wordParts = wordTrimmedPunctuation.split(REGEX.SEPARATORS) - for (const wp of wordParts) { - // still retain internal punctuation - if (wp === mutedWord) return true + const wordParts = wordTrimmedPunctuation.split(/\p{P}+/u) + for (const wordPart of wordParts) { + if (wordPart === mutedWord) return true } } } diff --git a/packages/api/tests/bsky-agent.test.ts b/packages/api/tests/bsky-agent.test.ts index 2946dafa3b7..e0713597a3d 100644 --- a/packages/api/tests/bsky-agent.test.ts +++ b/packages/api/tests/bsky-agent.test.ts @@ -1582,6 +1582,13 @@ describe('agent', () => { expect(end.mutedWords.find((m) => m.value === '##️⃣')).toBeFalsy() }) + it(`apostrophe: Bluesky's`, async () => { + await agent.upsertMutedWords([{ value: `Bluesky's`, targets: [] }]) + const { mutedWords } = (await agent.getPreferences()).moderationPrefs + + expect(mutedWords.find((m) => m.value === `Bluesky's`)).toBeTruthy() + }) + describe(`invalid characters`, () => { it('zero width space', async () => { const prev = (await agent.getPreferences()).moderationPrefs diff --git a/packages/api/tests/moderation-mutewords.test.ts b/packages/api/tests/moderation-mutewords.test.ts index 18a2f556887..5416152aecb 100644 --- a/packages/api/tests/moderation-mutewords.test.ts +++ b/packages/api/tests/moderation-mutewords.test.ts @@ -89,6 +89,22 @@ describe(`hasMutedWord`, () => { expect(match).toBe(true) }) + it(`match: single char with length > 1 ☠︎`, () => { + const rt = new RichText({ + text: `Idk why ☠︎ but maybe`, + }) + rt.detectFacetsWithoutResolution() + + const match = hasMutedWord({ + mutedWords: [{ value: '☠︎', targets: ['content'] }], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) + + expect(match).toBe(true) + }) + it(`no match: long muted word, short post`, () => { const rt = new RichText({ text: `hey`, @@ -248,6 +264,57 @@ describe(`hasMutedWord`, () => { }) }) + describe(`apostrophes: Bluesky's`, () => { + const rt = new RichText({ + text: `Yay, Bluesky's mutewords work`, + }) + rt.detectFacetsWithoutResolution() + + it(`match: Bluesky's`, () => { + const match = hasMutedWord({ + mutedWords: [{ value: `Bluesky's`, targets: ['content'] }], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) + + expect(match).toBe(true) + }) + + it(`match: Bluesky`, () => { + const match = hasMutedWord({ + mutedWords: [{ value: 'Bluesky', targets: ['content'] }], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) + + expect(match).toBe(true) + }) + + it(`match: bluesky`, () => { + const match = hasMutedWord({ + mutedWords: [{ value: 'bluesky', targets: ['content'] }], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) + + expect(match).toBe(true) + }) + + it(`match: blueskys`, () => { + const match = hasMutedWord({ + mutedWords: [{ value: 'blueskys', targets: ['content'] }], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) + + expect(match).toBe(true) + }) + }) + describe(`Why so S@assy?`, () => { const rt = new RichText({ text: `Why so S@assy?`, @@ -398,6 +465,17 @@ describe(`hasMutedWord`, () => { expect(match).toBe(true) }) + it(`match: bad`, () => { + const match = hasMutedWord({ + mutedWords: [{ value: `bad`, targets: ['content'] }], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) + + expect(match).toBe(true) + }) + it(`match: super bad`, () => { const match = hasMutedWord({ mutedWords: [{ value: `super bad`, targets: ['content'] }], @@ -417,7 +495,7 @@ describe(`hasMutedWord`, () => { outlineTags: [], }) - expect(match).toBe(false) + expect(match).toBe(true) }) }) @@ -474,7 +552,7 @@ describe(`hasMutedWord`, () => { outlineTags: [], }) - expect(match).toBe(false) + expect(match).toBe(true) }) })