From f5f8428474ce9fd4f3c8981e08db63e66d3b787e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20G=C3=B6rlich?= <73133626+koilebeit@users.noreply.github.com> Date: Thu, 17 Oct 2024 10:55:15 +0200 Subject: [PATCH] fix parsing bug (#192) --- src/lib/downloadPostsPagesAssets.mjs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/lib/downloadPostsPagesAssets.mjs b/src/lib/downloadPostsPagesAssets.mjs index 1bb277f9..ce83d098 100644 --- a/src/lib/downloadPostsPagesAssets.mjs +++ b/src/lib/downloadPostsPagesAssets.mjs @@ -115,6 +115,15 @@ const getAssetUrl = (elem, $) => { return url; }; +/** + * Cleans up escaped asterisks between word characters. + * @param {string} text + * @returns {string} + */ +function cleanEscapedAsterisks(text) { + return text.replace(/\\\*/gm, '*'); +} + /** * Process and sanitize HTML content. * @param {string} html - HTML content to process. @@ -237,15 +246,11 @@ async function fetchAndProcessType(type) { if (type === 'posts' && categoryIds.some((id) => excludedCategories.includes(id))) { continue; } - const title = turndownService.turndown(item.title.rendered); + const title = cleanEscapedAsterisks(turndownService.turndown(item.title.rendered)); const content = await processContent(item.content.rendered, outputDir, item.link, item.slug); const tagsToRemove = ['span', 'a']; - const excerpt = await processContent( - item.excerpt.rendered, - outputDir, - item.link, - item.slug, - tagsToRemove + const excerpt = cleanEscapedAsterisks( + await processContent(item.excerpt.rendered, outputDir, item.link, item.slug, tagsToRemove) ); const featuredImageUrl = item.featured_media ? await fetchFeaturedImage(item.featured_media)