diff --git a/package.json b/package.json index 571eb63..37d547f 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ }, "dependencies": { "cheerio": "1.0.0-rc.12", - "marked": "^12.0.2", + "marked": "^14.1.3", "memize": "^2.1.0", "sanitize-html": "^2.13.0", "smartypants": "^0.2.2" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1f4a6b7..473d433 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -12,8 +12,8 @@ importers: specifier: 1.0.0-rc.12 version: 1.0.0-rc.12 marked: - specifier: ^12.0.2 - version: 12.0.2 + specifier: ^14.1.3 + version: 14.1.3 memize: specifier: ^2.1.0 version: 2.1.0 @@ -767,8 +767,8 @@ packages: magic-string@0.30.10: resolution: {integrity: sha512-iIRwTIf0QKV3UAnYK4PU8uiEc4SRh5jX0mwpIwETPpHdhVM4f53RSwS/vXvN1JhGX+Cs7B8qIq3d6AH49O5fAQ==} - marked@12.0.2: - resolution: {integrity: sha512-qXUm7e/YKFoqFPYPa3Ukg9xlI5cyAtGmyEIzMfW//m6kXwCy2Ps9DYf5ioijFKQ8qyuscrHoY04iJGctu2Kg0Q==} + marked@14.1.3: + resolution: {integrity: sha512-ZibJqTULGlt9g5k4VMARAktMAjXoVnnr+Y3aCqW1oDftcV4BA3UmrBifzXoZyenHRk75csiPu9iwsTj4VNBT0g==} engines: {node: '>= 18'} hasBin: true @@ -1841,7 +1841,7 @@ snapshots: dependencies: '@jridgewell/sourcemap-codec': 1.4.15 - marked@12.0.2: {} + marked@14.1.3: {} memize@2.1.0: {} diff --git a/src/lib/applyIdsToElements.spec.ts b/src/lib/applyIdsToElements.spec.ts deleted file mode 100644 index 2035cbe..0000000 --- a/src/lib/applyIdsToElements.spec.ts +++ /dev/null @@ -1,17 +0,0 @@ -import applyIdsToElements from "./applyIdsToElements.js"; -import { describe, it, expect } from "vitest"; - -describe("applyIdsToElements", () => { - it("works", () => { - const r = applyIdsToElements("

{#foo}bar

"); - expect(r).toBe('

bar

'); - }); - - it("preserves HTML entities", () => { - const r = applyIdsToElements( - '{#example} <a id="foo1" href="#foo">[N]</a>', - ); - - expect(r).toContain('<a id="foo1" href="#foo">[N]</a>'); - }); -}); diff --git a/src/lib/applyIdsToElements.ts b/src/lib/applyIdsToElements.ts deleted file mode 100644 index 271f638..0000000 --- a/src/lib/applyIdsToElements.ts +++ /dev/null @@ -1,31 +0,0 @@ -import getDom from "./getDom.js"; - -function apply(html: string) { - const $ = getDom(html); - const matches = $("*").not("script").not("noscript").not("style"); - - matches.each((_, el) => { - if ($(el).children().length) return; - if (!$(el).text().length) return; - - const idText = $(el) - .text() - .match(/\{#(.*)\}/)?.[1]; - - if (!idText) return; - - const newTextContent = $(el) - .text() - .replace(/\{#([^}]*?)\}/g, "") - .trim(); - - $(el).attr("id", idText); - $(el).text(newTextContent); - }); - - return $.html(); -} - -export default function applyIdsToElements(html: string): string { - return /\{#.*?\}/.test(html) ? apply(html) : html; -} diff --git a/src/lib/marked/extentions/ids.spec.ts b/src/lib/marked/extentions/ids.spec.ts new file mode 100644 index 0000000..1131c06 --- /dev/null +++ b/src/lib/marked/extentions/ids.spec.ts @@ -0,0 +1,63 @@ +import { describe, it, expect } from "vitest"; +import { marked } from "marked"; +import ids from "./ids.js"; + +marked.use(ids); + +describe("marked ids extension", () => { + it("works", () => { + const r = marked.parse("{#foo}bar"); + expect(r).toContain('

bar

'); + }); + + it("works in combination with italics", () => { + const r = marked.parse("(words *foo*) {#bar}"); + expect(r).toContain('

(words foo)

'); + }); + + it("leaves comments intact", () => { + const r = marked.parse("\nbar"); + + expect(r).toContain(""); + }); + + it("leaves multiline comments intact", () => { + const r = marked.parse("\nbaz"); + + expect(r).toContain(""); + }); + + it("does not aggressively apply ids to parents", () => { + const r = marked.parse("> {#foo}bar"); + + expect(r).toBe('
\n

bar

\n
\n'); + }); + + it("does not decode entities", () => { + const r = marked.parse( + "Anyway, in terms of signals that we’re alive, we have" + ); + + expect(r).toContain( + "

Anyway, in terms of signals that we’re alive, we have

" + ); + }); + + it("does not concat words surrounding the id", () => { + const r = marked.parse("foo {#bar} baz"); + + expect(r).toContain('

foo baz

'); + }); + + it("handles code blocks", () => { + const r = marked.parse("```\n{#foo}\nbar\n```"); + + expect(r).toContain('
');
+  });
+
+  it("handles headings", () => {
+    const r = marked.parse("# heading {#foo}");
+
+    expect(r).toContain('

heading

'); + }); +}); diff --git a/src/lib/marked/extentions/ids.ts b/src/lib/marked/extentions/ids.ts new file mode 100644 index 0000000..4522648 --- /dev/null +++ b/src/lib/marked/extentions/ids.ts @@ -0,0 +1,43 @@ +import { type Tokens, type MarkedExtension, Renderer } from "marked"; + +function parseContent(token: Tokens.Generic, ctx: Renderer): string { + const c = token.tokens ? ctx.parser.parseInline(token.tokens) : token.text; + const r = new RegExp(`\\s?\\{#${token.id}\\}`, "g"); + return c.replace(r, ""); +} + +const ids: MarkedExtension = { + walkTokens(token: Tokens.Generic): void { + if (token.type === "text") return; + + const match = token.tokens?.find( + (t) => t.type === "text" && /\{#(.*?)\}/.test(t.raw) + ); + + if (!match && token.tokens?.length) return; + + const idMatch = token.raw.match(/\{#(.*?)\}/); + + if (!idMatch) return; + + token.id = idMatch[1]; + }, + renderer: { + paragraph(t: Tokens.Generic) { + if (!t.id) return false; + return `

${parseContent(t, this)}

\n`; + }, + code(t: Tokens.Generic) { + if (!t.id) return false; + return `
${parseContent(t, this)}
`; + }, + heading(t: Tokens.Generic) { + if (!t.id) return false; + return `${parseContent(t, this)}\n`; + }, + }, +}; + +export default ids; diff --git a/src/lib/marked/extentions/smartypants.spec.ts b/src/lib/marked/extentions/smartypants.spec.ts new file mode 100644 index 0000000..219e977 --- /dev/null +++ b/src/lib/marked/extentions/smartypants.spec.ts @@ -0,0 +1,13 @@ +import { describe, it, expect } from "vitest"; +import { marked } from "marked"; +import smartypants from "./smartypants.js"; + +marked.use(smartypants); + +describe("marked ids extension", () => { + it("handles arrows", () => { + const r = marked.parse("-> `a`"); + + expect(r).toContain("

-> a

"); + }); +}); diff --git a/src/lib/marked/extentions/smartypants.ts b/src/lib/marked/extentions/smartypants.ts new file mode 100644 index 0000000..2b5450a --- /dev/null +++ b/src/lib/marked/extentions/smartypants.ts @@ -0,0 +1,26 @@ +import type { MarkedExtension, Tokens } from "marked"; +import { smartypants } from "smartypants"; + +const extension: MarkedExtension = { + tokenizer: { + inlineText(src: string): false | Tokens.Text | undefined { + // don't escape inlineText + const cap = this.rules.inline.text.exec(src); + const raw = cap?.[0] ?? ""; + const text = raw.replace("<", "<").replace(">", ">"); + + return { + type: "text", + raw, + text: text, + }; + }, + }, + hooks: { + postprocess(html: string) { + return smartypants(html, "1"); + }, + }, +}; + +export default extension; diff --git a/src/lib/marked/extentions/urls.ts b/src/lib/marked/extentions/urls.ts new file mode 100644 index 0000000..333a345 --- /dev/null +++ b/src/lib/marked/extentions/urls.ts @@ -0,0 +1,38 @@ +import type { Tokens } from "marked"; + +export default { + tokenizer: { + url(src: string): Tokens.Link | false { + const urlRegex = /^https?:\/\/[^\s\]]+/; + const match = src.match(urlRegex); + + if (match) { + return { + type: "link", + raw: match[0], + href: match[0], + text: match[0], + tokens: [ + { + type: "text", + raw: match[0], + text: match[0], + }, + ], + }; + } + + return false; + }, + }, + renderer: { + link({ href, text }: Tokens.Link) { + const emailRegex = /^mailto:\S+@\S+\.\S+$/; + if (emailRegex.test(href)) { + return text; + } + + return false; + }, + }, +}; diff --git a/src/lib/marked/parse.spec.ts b/src/lib/marked/parse.spec.ts new file mode 100644 index 0000000..d2121b9 --- /dev/null +++ b/src/lib/marked/parse.spec.ts @@ -0,0 +1,18 @@ +import { describe, it, expect } from "vitest"; +import parse from "./parse.js"; + +describe("marked ids extension", () => { + it("preserves HTML entities", () => { + const r = parse( + '```\n{#example} <a id="foo1" href="#foo">[N]</a>\n```' + ); + + expect(r).toContain('<a id="foo1" href="#foo">[N]</a>'); + }); + + it("parses ~~ as strikethrough", () => { + const r = parse("~~foo~~"); + + expect(r).toContain("foo"); + }); +}); diff --git a/src/lib/marked/parse.ts b/src/lib/marked/parse.ts new file mode 100644 index 0000000..588e6af --- /dev/null +++ b/src/lib/marked/parse.ts @@ -0,0 +1,13 @@ +import { marked } from "marked"; +import smartypants from "./extentions/smartypants.js"; +import urls from "./extentions/urls.js"; +import ids from "./extentions/ids.js"; + +marked.use(urls, smartypants, ids); + +export default function parse(markdown: string): string { + // WORKAROUND: `marked.parse` shouldn't return a promise if + // the `async` option has not been set to `true` + // https://marked.js.org/using_pro#async + return marked.parse(markdown) as string; +} diff --git a/src/lib/parseMarkdown.spec.ts b/src/lib/parseMarkdown.spec.ts index c5ff985..fc043f0 100644 --- a/src/lib/parseMarkdown.spec.ts +++ b/src/lib/parseMarkdown.spec.ts @@ -7,7 +7,7 @@ describe("body", () => { const r = parseMarkdown( ether({ content: "foo -- bar", - }), + }) ); // https://www.codetable.net/name/em-dash @@ -28,8 +28,8 @@ describe("body", () => { null, ether({ content: "", - }), - ), + }) + ) ).toThrow(); }); @@ -39,8 +39,8 @@ describe("body", () => { null, ether({ content: "", - }), - ), + }) + ) ).toThrow(); }); @@ -49,12 +49,12 @@ describe("body", () => { parseMarkdown( ether({ content: "", - }), - ), + }) + ) ).toEqual( expect.stringContaining( - "<style>body {font-size: 2em;}</style>", - ), + "<style>body {font-size: 2em;}</style>" + ) ); }); @@ -64,8 +64,8 @@ describe("body", () => { null, ether({ content: ``, - }), - ), + }) + ) ).toThrow("Iframe src not allowed"); }); @@ -73,11 +73,11 @@ describe("body", () => { const r = parseMarkdown( ether({ content: ``, - }), + }) ); expect(r).toContain( - ``, + `` ); }); @@ -85,11 +85,11 @@ describe("body", () => { const r = parseMarkdown( ether({ content: `the_alt`, - }), + }) ); expect(r).toContain( - `the_alt`, + `the_alt` ); }); @@ -97,11 +97,11 @@ describe("body", () => { const r = parseMarkdown( ether({ content: `the_alt`, - }), + }) ); expect(r).toContain( - `the_alt`, + `the_alt` ); }); @@ -109,7 +109,7 @@ describe("body", () => { const r = parseMarkdown( ether({ content: `[2]`, - }), + }) ); expect(r).toContain('[2]'); @@ -119,7 +119,7 @@ describe("body", () => { const r = parseMarkdown( ether({ content: "foo@example.com", - }), + }) ); expect(r).toContain("

foo@example.com

"); }); @@ -128,11 +128,11 @@ describe("body", () => { const r = parseMarkdown( ether({ content: "[moved to http://example.com/foo]", - }), + }) ); expect(r).toContain( - '[moved to http://example.com/foo]', + '[moved to http://example.com/foo]' ); }); @@ -140,7 +140,7 @@ describe("body", () => { const r = parseMarkdown( ether({ content: "

\n\nfoo

", - }), + }) ); expect(r).toEqual("

foo

\n"); @@ -150,7 +150,7 @@ describe("body", () => { const r = parseMarkdown( ether({ content: '', - }), + }) ); expect(r).toContain(''); @@ -160,7 +160,7 @@ describe("body", () => { expect( parseMarkdown(["| foo |", "| --- |", "| ", @@ -173,7 +173,7 @@ describe("body", () => { "<PPR", "", "\n", - ].join("\n"), + ].join("\n") ); }); @@ -181,10 +181,20 @@ describe("body", () => { expect( parseMarkdown( 'For a lot of engineering problems, "almost right" isn\'t good enough.', - { strict: false }, - ), + { strict: false } + ) ).toBe( - "

For a lot of engineering problems, “almost right” isn’t good enough.

\n", + "

For a lot of engineering problems, “almost right” isn’t good enough.

\n" ); }); + + it("handles inline code blocks", () => { + const r = parseMarkdown( + ether({ + content: "foo `bar` baz", + }) + ); + + expect(r).toContain("

foo bar baz

"); + }); }); diff --git a/src/lib/parseMarkdown.ts b/src/lib/parseMarkdown.ts index 74ee54b..d0959cf 100644 --- a/src/lib/parseMarkdown.ts +++ b/src/lib/parseMarkdown.ts @@ -4,71 +4,14 @@ import linkFootnotes from "./linkFootnotes.js"; import expandRefs from "./expandRefs.js"; import spaceEMDashes from "./spaceEMDashes.js"; import flattenParagraphs from "./flattenParagraphs.js"; -import { marked, type Tokens } from "marked"; -import { smartypants } from "smartypants"; -import applyIdsToElements from "./applyIdsToElements.js"; import sanitizeHtml from "sanitize-html"; import { SANITIZE_HTML_OPTIONS } from "./parseMarkdown.options.js"; - -const tokenizer = { - url(src: string): Tokens.Link | false { - const urlRegex = /^https?:\/\/[^\s\]]+/; - const match = src.match(urlRegex); - - if (match) { - return { - type: "link", - raw: match[0], - href: match[0], - text: match[0], - tokens: [ - { - type: "text", - raw: match[0], - text: match[0], - }, - ], - }; - } - - return false; - }, -}; - -marked.use({ tokenizer }); - -marked.use({ - tokenizer: { - inlineText(src) { - // don't escape inlineText, unless it's < and > - const cap = this.rules.inline.text.exec(src); - const text = cap[0].replace("<", "<").replace(">", ">"); - - return { - type: "text", - raw: text, - text: text, - }; - }, - }, - hooks: { - postprocess(html) { - return smartypants(html, "1"); - }, - }, -}); - -marked.use({ - hooks: { - postprocess: applyIdsToElements, - // WORKAROUND: @types/marked incorrectly requires `preprocess` to be defined. - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } as any, -}); +import parse from "./marked/parse.js"; +import trimComments from "./stripComments.js"; export function parseMarkdown( markdown: string, - { strict = true }: { strict?: boolean } = {}, + { strict = true }: { strict?: boolean } = {} ): string { if (strict) { if (!markdown.includes("BEGIN_MAGIC")) { @@ -81,35 +24,19 @@ export function parseMarkdown( if (/(? { + it("removes comments on their own line in cluding newline", () => { + const r = trimComments(` +hello + +world`); + + expect(r).toEqual("hello\nworld"); + }); + + it("removes comments embedded within lines of text", () => { + const r = trimComments(`hello `); + + expect(r).toEqual("hello"); + }); + + it("removes multi-line comments", () => { + const r = trimComments(`hello +world`); + + expect(r).toEqual("hello\nworld"); + }); + + it("removes multi-line comments with newlines", () => { + const r = trimComments(` +foo`); + + expect(r).toEqual("foo"); + }); +}); diff --git a/src/lib/stripComments.ts b/src/lib/stripComments.ts new file mode 100644 index 0000000..d703328 --- /dev/null +++ b/src/lib/stripComments.ts @@ -0,0 +1,9 @@ +export default function trimComments(markdown: string): string { + // Remove comments that are the only thing on a line, including the newline character + markdown = markdown.replace(/^\s*\s*[\r\n]/gm, ""); + + // Remove comments embedded within lines of text + markdown = markdown.replace(/ ?/g, ""); + + return markdown; +} diff --git a/src/types/smartypants.d.ts b/src/types/smartypants.d.ts new file mode 100644 index 0000000..fab0ec8 --- /dev/null +++ b/src/types/smartypants.d.ts @@ -0,0 +1,3 @@ +module "smartypants" { + export function smartypants(text: string, options: string): string; +}