|
| 1 | +import { unified } from 'unified'; |
| 2 | +import remarkParse from 'remark-parse'; |
| 3 | +import remarkGridTable from '@adobe/remark-gridtables'; |
| 4 | +import { toHast as mdast2hast, defaultHandlers } from 'mdast-util-to-hast'; |
| 5 | +import { raw } from 'hast-util-raw'; |
| 6 | +import { mdast2hastGridTablesHandler } from '@adobe/mdast-util-gridtables'; |
| 7 | +import { toHtml } from 'hast-util-to-html'; |
| 8 | + |
| 9 | +import { JSDOM } from 'jsdom'; |
| 10 | + |
| 11 | +function toBlockCSSClassNames(text) { |
| 12 | + if (!text) return []; |
| 13 | + const names = []; |
| 14 | + const idx = text.lastIndexOf('('); |
| 15 | + if (idx >= 0) { |
| 16 | + names.push(text.substring(0, idx)); |
| 17 | + names.push(...text.substring(idx + 1).split(',')); |
| 18 | + } else { |
| 19 | + names.push(text); |
| 20 | + } |
| 21 | + |
| 22 | + return names.map((name) => name |
| 23 | + .toLowerCase() |
| 24 | + .replace(/[^0-9a-z]+/g, '-') |
| 25 | + .replace(/^-+/, '') |
| 26 | + .replace(/-+$/, '')) |
| 27 | + .filter((name) => !!name); |
| 28 | +} |
| 29 | + |
| 30 | +function convertBlocks(dom) { |
| 31 | + const tables = dom.window.document.querySelectorAll('body > table'); |
| 32 | + |
| 33 | + tables.forEach((table) => { |
| 34 | + const rows = [...table.querySelectorAll(':scope > tbody > tr, :scope > thead > tr')]; |
| 35 | + const nameRow = rows.shift(); |
| 36 | + const divs = rows.map((row) => { |
| 37 | + const cols = row.querySelectorAll(':scope > td, :scope > th'); |
| 38 | + // eslint-disable-next-line no-shadow |
| 39 | + const divs = [...cols].map((col) => { |
| 40 | + const { innerHTML } = col; |
| 41 | + const div = dom.window.document.createElement('div'); |
| 42 | + div.innerHTML = innerHTML; |
| 43 | + return div; |
| 44 | + }); |
| 45 | + const div = dom.window.document.createElement('div'); |
| 46 | + div.append(...divs); |
| 47 | + return div; |
| 48 | + }); |
| 49 | + |
| 50 | + const div = dom.window.document.createElement('div'); |
| 51 | + div.className = toBlockCSSClassNames(nameRow.textContent).join(' '); |
| 52 | + div.append(...divs); |
| 53 | + table.parentElement.replaceChild(div, table); |
| 54 | + }); |
| 55 | +} |
| 56 | + |
| 57 | +function makePictures(dom) { |
| 58 | + const imgs = dom.window.document.querySelectorAll('img'); |
| 59 | + imgs.forEach((img) => { |
| 60 | + const clone = img.cloneNode(true); |
| 61 | + clone.setAttribute('loading', 'lazy'); |
| 62 | + clone.src = `${clone.src}?optimize=medium`; |
| 63 | + |
| 64 | + let pic = dom.window.document.createElement('picture'); |
| 65 | + |
| 66 | + const srcMobile = dom.window.document.createElement('source'); |
| 67 | + srcMobile.srcset = clone.src; |
| 68 | + |
| 69 | + const srcTablet = dom.window.document.createElement('source'); |
| 70 | + srcTablet.srcset = clone.src; |
| 71 | + srcTablet.media = '(min-width: 600px)'; |
| 72 | + |
| 73 | + pic.append(srcMobile, srcTablet, clone); |
| 74 | + |
| 75 | + const hrefAttr = img.getAttribute('href'); |
| 76 | + if (hrefAttr) { |
| 77 | + const a = dom.window.document.createElement('a'); |
| 78 | + a.href = hrefAttr; |
| 79 | + const titleAttr = img.getAttribute('title'); |
| 80 | + if (titleAttr) { |
| 81 | + a.title = titleAttr; |
| 82 | + } |
| 83 | + a.append(pic); |
| 84 | + pic = a; |
| 85 | + } |
| 86 | + |
| 87 | + // Determine what to replace |
| 88 | + const imgParent = img.parentElement; |
| 89 | + const imgGrandparent = imgParent.parentElement; |
| 90 | + if (imgParent.nodeName === 'P' && imgGrandparent?.childElementCount === 1) { |
| 91 | + imgGrandparent.replaceChild(pic, imgParent); |
| 92 | + } else { |
| 93 | + imgParent.replaceChild(pic, img); |
| 94 | + } |
| 95 | + }); |
| 96 | +} |
| 97 | + |
| 98 | +function makeSections(dom) { |
| 99 | + const children = dom.window.document.body.querySelectorAll(':scope > *'); |
| 100 | + |
| 101 | + const section = dom.window.document.createElement('div'); |
| 102 | + const sections = [...children].reduce((acc, child) => { |
| 103 | + if (child.nodeName === 'HR') { |
| 104 | + child.remove(); |
| 105 | + acc.push(dom.window.document.createElement('div')); |
| 106 | + } else { |
| 107 | + acc[acc.length - 1].append(child); |
| 108 | + } |
| 109 | + return acc; |
| 110 | + }, [section]); |
| 111 | + |
| 112 | + dom.window.document.body.append(...sections); |
| 113 | +} |
| 114 | + |
| 115 | +// Generic docs have table blocks and HRs, but not ProseMirror decorations |
| 116 | +export function docDomToAemHtml(dom) { |
| 117 | + convertBlocks(dom); |
| 118 | + makePictures(dom); |
| 119 | + makeSections(dom); |
| 120 | + |
| 121 | + return dom.window.document.body.innerHTML; |
| 122 | +} |
| 123 | + |
| 124 | +function makeHast(mdast) { |
| 125 | + const handlers = { ...defaultHandlers, gridTable: mdast2hastGridTablesHandler() }; |
| 126 | + const hast = mdast2hast(mdast, { handlers, allowDangerousHtml: true }); |
| 127 | + return raw(hast); |
| 128 | +} |
| 129 | + |
| 130 | +function removeImageSizeHash(dom) { |
| 131 | + const imgs = dom.window.document.querySelectorAll('[src*="#width"]'); |
| 132 | + imgs.forEach((img) => { |
| 133 | + img.setAttribute('src', img.src.split('#width')[0]); |
| 134 | + }); |
| 135 | +} |
| 136 | + |
| 137 | +export function mdToDocDom(md) { |
| 138 | + // convert linebreaks |
| 139 | + const converted = md.replace(/(\r\n|\n|\r)/gm, '\n'); |
| 140 | + |
| 141 | + // convert to mdast |
| 142 | + const mdast = unified() |
| 143 | + .use(remarkParse) |
| 144 | + .use(remarkGridTable) |
| 145 | + .parse(converted); |
| 146 | + |
| 147 | + const hast = makeHast(mdast); |
| 148 | + |
| 149 | + let htmlText = toHtml(hast); |
| 150 | + htmlText = htmlText.replaceAll('.hlx.page', '.hlx.live'); |
| 151 | + htmlText = htmlText.replaceAll('.aem.page', '.aem.live'); |
| 152 | + |
| 153 | + const dom = new JSDOM(htmlText); |
| 154 | + removeImageSizeHash(dom); |
| 155 | + |
| 156 | + return dom; |
| 157 | +} |
0 commit comments