Fix HTML to markdown parsing

Expensify · Nov 22, 2024 · 17c5562 · 17c5562
1 parent 4665020
commit 17c5562
Showing 1 changed file with 39 additions and 2 deletions.
diff --git a/lib/ExpensiMark.ts b/lib/ExpensiMark.ts
@@ -1097,8 +1097,8 @@ export default class ExpensiMark {
                 return;
             }
 
-            // Insert '\n' unless it ends with '\n' or '>' or it's the last element, or if it's a header ('# ') with a space.
-            if (text.match(/[\n|>][>]?[\s]?$/) || index === splitText.length - 1 || text === '# ') {
+            // Insert '\n' unless it ends with '\n' or it's the last element, or if it's a header ('# ') with a space.
+            if (text.match(/[\n][\s]?$/) || index === splitText.length - 1 || text === '# ') {
                 joinedText += text;
             } else {
                 joinedText += `${text}\n`;
@@ -1110,6 +1110,41 @@ export default class ExpensiMark {
         return joinedText;
     }
 
+    splitNestedQuotesIntoSeparateOnes(text: string): string {
+        let count = 0;
+        let parsedText = text.replace(/(<\/blockquote>)+/g, (match) => {
+            return `${match.slice(0, match.lastIndexOf('</blockquote>'))}</blockquote><br />`;
+        });
+        const splittedText = parsedText.split('<br />');
+        if (splittedText.length > 0 && splittedText[splittedText.length - 1] === '') {
+            splittedText.pop();
+        }
+        parsedText = splittedText
+            .map((line, index, arr) => {
+                if (!line) return '';
+
+                if (line.startsWith('<blockquote>')) {
+                    count += (line.match(/<blockquote>/g) || []).length;
+                }
+
+                if (line.endsWith('</blockquote>')) {
+                    count -= (line.match(/<\/blockquote>/g) || []).length;
+                    if (count > 0) {
+                        return `${line}${'<blockquote>'.repeat(count)}`;
+                    }
+                }
+
+                if (count > 0) {
+                    return `${line}${'</blockquote>'}${'<blockquote>'.repeat(count)}`;
+                }
+
+                return line + (index < arr.length - 1 ? '<br />' : '');
+            })
+            .join('');
+
+        return parsedText;
+    }
+
     /**
      * Replaces HTML with markdown
      */
@@ -1118,6 +1153,8 @@ export default class ExpensiMark {
         const body = /<(body)(?:"[^"]*"|'[^']*'|[^'"><])*>(?:\n|\r\n)?([\s\S]*?)(?:\n|\r\n)?<\/\1>(?![^<]*(<\/pre>|<\/code>))/im;
         const parseBodyTag = generatedMarkdown.match(body);
 
+        generatedMarkdown = this.splitNestedQuotesIntoSeparateOnes(generatedMarkdown);
+
         // If body tag is found then use the content of body rather than the whole HTML
         if (parseBodyTag) {
             generatedMarkdown = parseBodyTag[2];