Skip to content

Commit

Permalink
Add hashtag detection to richtext (#1651)
Browse files Browse the repository at this point in the history
* add tag detection to richtext

* fix duplicate tag index error

* add utils

* fix leading space index failures, test for them

* add changeset
  • Loading branch information
estrattonbailey authored Sep 25, 2023
1 parent 11bf4d3 commit 2ce8a11
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .changeset/moody-wombats-live.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@atproto/api': patch
---

Adds support for hashtags in the `RichText.detectFacets` method.
27 changes: 27 additions & 0 deletions packages/api/src/rich-text/detection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,33 @@ export function detectFacets(text: UnicodeString): Facet[] | undefined {
})
}
}
{
const re = /(?:^|\s)(#[^\d\s]\S*)(?=\s)?/g
while ((match = re.exec(text.utf16))) {
let [tag] = match
const hasLeadingSpace = /^\s/.test(tag)

tag = tag.trim().replace(/\p{P}+$/gu, '') // strip ending punctuation

// inclusive of #, max of 64 chars
if (tag.length > 66) continue

const index = match.index + (hasLeadingSpace ? 1 : 0)

facets.push({
index: {
byteStart: text.utf16IndexToUtf8Index(index),
byteEnd: text.utf16IndexToUtf8Index(index + tag.length), // inclusive of last char
},
features: [
{
$type: 'app.bsky.richtext.facet#tag',
tag,
},
],
})
}
}
return facets.length > 0 ? facets : undefined
}

Expand Down
13 changes: 13 additions & 0 deletions packages/api/src/rich-text/rich-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ import { detectFacets } from './detection'
export type Facet = AppBskyRichtextFacet.Main
export type FacetLink = AppBskyRichtextFacet.Link
export type FacetMention = AppBskyRichtextFacet.Mention
export type FacetTag = AppBskyRichtextFacet.Tag
export type Entity = AppBskyFeedPost.Entity

export interface RichTextProps {
Expand Down Expand Up @@ -141,6 +142,18 @@ export class RichTextSegment {
isMention() {
return !!this.mention
}

get tag(): FacetTag | undefined {
const tag = this.facet?.features.find(AppBskyRichtextFacet.isTag)
if (AppBskyRichtextFacet.isTag(tag)) {
return tag
}
return undefined
}

isTag() {
return !!this.tag
}
}

export class RichText {
Expand Down
104 changes: 104 additions & 0 deletions packages/api/tests/rich-text-detection.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { AtpAgent, RichText, RichTextSegment } from '../src'
import { isTag } from '../src/client/types/app/bsky/richtext/facet'

describe('detectFacets', () => {
const agent = new AtpAgent({ service: 'http://localhost' })
Expand Down Expand Up @@ -208,6 +209,109 @@ describe('detectFacets', () => {
expect(Array.from(rt.segments(), segmentToOutput)).toEqual(outputs[i])
}
})

it('correctly detects tags inline', async () => {
const inputs: [
string,
string[],
{ byteStart: number; byteEnd: number }[],
][] = [
['#a', ['#a'], [{ byteStart: 0, byteEnd: 2 }]],
[
'#a #b',
['#a', '#b'],
[
{ byteStart: 0, byteEnd: 2 },
{ byteStart: 3, byteEnd: 5 },
],
],
['#1', [], []],
['#tag', ['#tag'], [{ byteStart: 0, byteEnd: 4 }]],
['body #tag', ['#tag'], [{ byteStart: 5, byteEnd: 9 }]],
['#tag body', ['#tag'], [{ byteStart: 0, byteEnd: 4 }]],
['body #tag body', ['#tag'], [{ byteStart: 5, byteEnd: 9 }]],
['body #1', [], []],
['body #a1', ['#a1'], [{ byteStart: 5, byteEnd: 8 }]],
['#', [], []],
['text #', [], []],
['text # text', [], []],
[
'body #thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
['#thisisa64characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'],
[{ byteStart: 5, byteEnd: 71 }],
],
[
'body #thisisa65characterstring_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab',
[],
[],
],
[
'its a #double#rainbow',
['#double#rainbow'],
[{ byteStart: 6, byteEnd: 21 }],
],
['##hashash', ['##hashash'], [{ byteStart: 0, byteEnd: 9 }]],
['some #n0n3s@n5e!', ['#n0n3s@n5e'], [{ byteStart: 5, byteEnd: 15 }]],
[
'works #with,punctuation',
['#with,punctuation'],
[{ byteStart: 6, byteEnd: 23 }],
],
[
'strips trailing #punctuation, #like. #this!',
['#punctuation', '#like', '#this'],
[
{ byteStart: 16, byteEnd: 28 },
{ byteStart: 30, byteEnd: 35 },
{ byteStart: 37, byteEnd: 42 },
],
],
[
'strips #multi_trailing___...',
['#multi_trailing'],
[{ byteStart: 7, byteEnd: 22 }],
],
[
'works with #🦋 emoji, and #butter🦋fly',
['#🦋', '#butter🦋fly'],
[
{ byteStart: 11, byteEnd: 16 },
{ byteStart: 28, byteEnd: 42 },
],
],
[
'#same #same #but #diff',
['#same', '#same', '#but', '#diff'],
[
{ byteStart: 0, byteEnd: 5 },
{ byteStart: 6, byteEnd: 11 },
{ byteStart: 12, byteEnd: 16 },
{ byteStart: 17, byteEnd: 22 },
],
],
]

for (const [input, tags, indices] of inputs) {
const rt = new RichText({ text: input })
await rt.detectFacets(agent)

let detectedTags: string[] = []

Check warning on line 298 in packages/api/tests/rich-text-detection.test.ts

View workflow job for this annotation

GitHub Actions / Build & Publish

'detectedTags' is never reassigned. Use 'const' instead
let detectedIndices: { byteStart: number; byteEnd: number }[] = []

Check warning on line 299 in packages/api/tests/rich-text-detection.test.ts

View workflow job for this annotation

GitHub Actions / Build & Publish

'detectedIndices' is never reassigned. Use 'const' instead

for (const { facet } of rt.segments()) {
if (!facet) continue
for (const feature of facet.features) {
if (isTag(feature)) {
detectedTags.push(feature.tag)
}
}
detectedIndices.push(facet.index)
}

expect(detectedTags).toEqual(tags)
expect(detectedIndices).toEqual(indices)
}
})
})

function segmentToOutput(segment: RichTextSegment): string[] {
Expand Down

0 comments on commit 2ce8a11

Please sign in to comment.