Skip to content

Commit e1fea81

Browse files
committed
da rolling import
1 parent 8ab6603 commit e1fea81

7 files changed

+2073
-7
lines changed

.github/workflows/import/constants.js

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
export const AEM_ORIGIN = 'https://admin.hlx.page';
2+
3+
export const SUPPORTED_FILES = {
4+
html: 'text/html',
5+
jpeg: 'image/jpeg',
6+
json: 'application/json',
7+
jpg: 'image/jpeg',
8+
png: 'image/png',
9+
gif: 'image/gif',
10+
mp4: 'video/mp4',
11+
pdf: 'application/pdf',
12+
svg: 'image/svg+xml',
13+
};
14+
15+
export const DA_ORIGIN = 'https://admin.da.live'
+157
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
import { unified } from 'unified';
2+
import remarkParse from 'remark-parse';
3+
import remarkGridTable from '@adobe/remark-gridtables';
4+
import { toHast as mdast2hast, defaultHandlers } from 'mdast-util-to-hast';
5+
import { raw } from 'hast-util-raw';
6+
import { mdast2hastGridTablesHandler } from '@adobe/mdast-util-gridtables';
7+
import { toHtml } from 'hast-util-to-html';
8+
9+
import { JSDOM } from 'jsdom';
10+
11+
function toBlockCSSClassNames(text) {
12+
if (!text) return [];
13+
const names = [];
14+
const idx = text.lastIndexOf('(');
15+
if (idx >= 0) {
16+
names.push(text.substring(0, idx));
17+
names.push(...text.substring(idx + 1).split(','));
18+
} else {
19+
names.push(text);
20+
}
21+
22+
return names.map((name) => name
23+
.toLowerCase()
24+
.replace(/[^0-9a-z]+/g, '-')
25+
.replace(/^-+/, '')
26+
.replace(/-+$/, ''))
27+
.filter((name) => !!name);
28+
}
29+
30+
function convertBlocks(dom) {
31+
const tables = dom.window.document.querySelectorAll('body > table');
32+
33+
tables.forEach((table) => {
34+
const rows = [...table.querySelectorAll(':scope > tbody > tr, :scope > thead > tr')];
35+
const nameRow = rows.shift();
36+
const divs = rows.map((row) => {
37+
const cols = row.querySelectorAll(':scope > td, :scope > th');
38+
// eslint-disable-next-line no-shadow
39+
const divs = [...cols].map((col) => {
40+
const { innerHTML } = col;
41+
const div = dom.window.document.createElement('div');
42+
div.innerHTML = innerHTML;
43+
return div;
44+
});
45+
const div = dom.window.document.createElement('div');
46+
div.append(...divs);
47+
return div;
48+
});
49+
50+
const div = dom.window.document.createElement('div');
51+
div.className = toBlockCSSClassNames(nameRow.textContent).join(' ');
52+
div.append(...divs);
53+
table.parentElement.replaceChild(div, table);
54+
});
55+
}
56+
57+
function makePictures(dom) {
58+
const imgs = dom.window.document.querySelectorAll('img');
59+
imgs.forEach((img) => {
60+
const clone = img.cloneNode(true);
61+
clone.setAttribute('loading', 'lazy');
62+
clone.src = `${clone.src}?optimize=medium`;
63+
64+
let pic = dom.window.document.createElement('picture');
65+
66+
const srcMobile = dom.window.document.createElement('source');
67+
srcMobile.srcset = clone.src;
68+
69+
const srcTablet = dom.window.document.createElement('source');
70+
srcTablet.srcset = clone.src;
71+
srcTablet.media = '(min-width: 600px)';
72+
73+
pic.append(srcMobile, srcTablet, clone);
74+
75+
const hrefAttr = img.getAttribute('href');
76+
if (hrefAttr) {
77+
const a = dom.window.document.createElement('a');
78+
a.href = hrefAttr;
79+
const titleAttr = img.getAttribute('title');
80+
if (titleAttr) {
81+
a.title = titleAttr;
82+
}
83+
a.append(pic);
84+
pic = a;
85+
}
86+
87+
// Determine what to replace
88+
const imgParent = img.parentElement;
89+
const imgGrandparent = imgParent.parentElement;
90+
if (imgParent.nodeName === 'P' && imgGrandparent?.childElementCount === 1) {
91+
imgGrandparent.replaceChild(pic, imgParent);
92+
} else {
93+
imgParent.replaceChild(pic, img);
94+
}
95+
});
96+
}
97+
98+
function makeSections(dom) {
99+
const children = dom.window.document.body.querySelectorAll(':scope > *');
100+
101+
const section = dom.window.document.createElement('div');
102+
const sections = [...children].reduce((acc, child) => {
103+
if (child.nodeName === 'HR') {
104+
child.remove();
105+
acc.push(dom.window.document.createElement('div'));
106+
} else {
107+
acc[acc.length - 1].append(child);
108+
}
109+
return acc;
110+
}, [section]);
111+
112+
dom.window.document.body.append(...sections);
113+
}
114+
115+
// Generic docs have table blocks and HRs, but not ProseMirror decorations
116+
export function docDomToAemHtml(dom) {
117+
convertBlocks(dom);
118+
makePictures(dom);
119+
makeSections(dom);
120+
121+
return dom.window.document.body.innerHTML;
122+
}
123+
124+
function makeHast(mdast) {
125+
const handlers = { ...defaultHandlers, gridTable: mdast2hastGridTablesHandler() };
126+
const hast = mdast2hast(mdast, { handlers, allowDangerousHtml: true });
127+
return raw(hast);
128+
}
129+
130+
function removeImageSizeHash(dom) {
131+
const imgs = dom.window.document.querySelectorAll('[src*="#width"]');
132+
imgs.forEach((img) => {
133+
img.setAttribute('src', img.src.split('#width')[0]);
134+
});
135+
}
136+
137+
export function mdToDocDom(md) {
138+
// convert linebreaks
139+
const converted = md.replace(/(\r\n|\n|\r)/gm, '\n');
140+
141+
// convert to mdast
142+
const mdast = unified()
143+
.use(remarkParse)
144+
.use(remarkGridTable)
145+
.parse(converted);
146+
147+
const hast = makeHast(mdast);
148+
149+
let htmlText = toHtml(hast);
150+
htmlText = htmlText.replaceAll('.hlx.page', '.hlx.live');
151+
htmlText = htmlText.replaceAll('.aem.page', '.aem.live');
152+
153+
const dom = new JSDOM(htmlText);
154+
removeImageSizeHash(dom);
155+
156+
return dom;
157+
}

.github/workflows/import/daFetch.js

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import { DA_ORIGIN } from './constants.js';
2+
3+
let imsDetails;
4+
5+
export function setImsDetails(token) {
6+
imsDetails = { accessToken: { token } };
7+
}
8+
9+
// export async function initIms() {
10+
// if (imsDetails) return imsDetails;
11+
// const { loadIms } = await import('./ims.js');
12+
// try {
13+
// imsDetails = await loadIms();
14+
// return imsDetails;
15+
// } catch {
16+
// return null;
17+
// }
18+
// }
19+
20+
export const daFetch = async (url, opts = {}) => {
21+
opts.headers ||= {};
22+
// if (localStorage.getItem('nx-ims') || imsDetails) {
23+
// const { accessToken } = await initIms();
24+
// if (accessToken) {
25+
// opts.headers.Authorization = `Bearer ${accessToken.token}`;
26+
// }
27+
// }
28+
const token = process.env.DA_TOKEN;
29+
opts.headers.Authorization = `Bearer ${token}`;
30+
const resp = await fetch(url, opts);
31+
// if (resp.status === 401) {
32+
// const { loadIms, handleSignIn } = await import('./ims.js');
33+
// await loadIms();
34+
// handleSignIn();
35+
// }
36+
return resp;
37+
};
38+
39+
export function replaceHtml(text, fromOrg, fromRepo) {
40+
let inner = text;
41+
if (fromOrg && fromRepo) {
42+
const fromOrigin = `https://main--${fromRepo}--${fromOrg}.hlx.live`;
43+
inner = text
44+
.replaceAll('./media', `${fromOrigin}/media`)
45+
.replaceAll('href="/', `href="${fromOrigin}/`);
46+
}
47+
48+
return `
49+
<body>
50+
<header></header>
51+
<main>${inner}</main>
52+
<footer></footer>
53+
</body>
54+
`;
55+
}
56+
57+
export async function saveToDa(text, url) {
58+
const daPath = `/${url.org}/${url.repo}${url.pathname}`;
59+
const daHref = `https://da.live/edit#${daPath}`;
60+
const { org, repo } = url;
61+
62+
const body = replaceHtml(text, org, repo);
63+
64+
const blob = new Blob([body], { type: 'text/html' });
65+
const formData = new FormData();
66+
formData.append('data', blob);
67+
const opts = { method: 'PUT', body: formData };
68+
try {
69+
const daResp = await daFetch(`${DA_ORIGIN}/source${daPath}.html`, opts);
70+
return { daHref, daStatus: daResp.status, daResp, ok: daResp.ok };
71+
} catch {
72+
console.log(`Couldn't save ${url.daUrl}`);
73+
return null;
74+
}
75+
}
76+
77+
function getBlob(url, content) {
78+
const body = url.type === 'json'
79+
? content : replaceHtml(content, url.fromOrg, url.fromRepo);
80+
81+
const type = url.type === 'json' ? 'application/json' : 'text/html';
82+
83+
return new Blob([body], { type });
84+
}
85+
86+
export async function saveAllToDa(url, content) {
87+
const { toOrg, toRepo, destPath, editPath, type } = url;
88+
89+
const route = type === 'json' ? '/sheet' : '/edit';
90+
url.daHref = `https://da.live${route}#/${toOrg}/${toRepo}${editPath}`;
91+
92+
const blob = getBlob(url, content);
93+
const body = new FormData();
94+
body.append('data', blob);
95+
const opts = { method: 'PUT', body };
96+
97+
try {
98+
const resp = await daFetch(`${DA_ORIGIN}/source/${toOrg}/${toRepo}${destPath}`, opts);
99+
return resp.status;
100+
} catch {
101+
console.log(`Couldn't save ${destPath}`);
102+
return 500;
103+
}
104+
}

.github/workflows/import/index.js

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import { DA_ORIGIN } from './constants.js';
2+
import { replaceHtml, daFetch } from './daFetch.js';
3+
import { mdToDocDom, docDomToAemHtml } from './converters.js';
4+
5+
const EXTS = ['json', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'mp4', 'pdf'];
6+
7+
const toOrg = 'adobecom';
8+
const toRepo = 'da-playground';
9+
10+
export function calculateTime(startTime) {
11+
const totalTime = Date.now() - startTime;
12+
return `${String((totalTime / 1000) / 60).substring(0, 4)}`;
13+
}
14+
15+
async function saveAllToDa(url, blob) {
16+
const { destPath, editPath, route } = url;
17+
18+
url.daHref = `https://da.live${route}#/${toOrg}/${toRepo}${editPath}`;
19+
20+
const body = new FormData();
21+
body.append('data', blob);
22+
const opts = { method: 'PUT', body };
23+
24+
try {
25+
const resp = await daFetch(`${DA_ORIGIN}/source/${toOrg}/${toRepo}${destPath}`, opts);
26+
return resp.status;
27+
} catch {
28+
console.log(`Couldn't save ${destPath}`);
29+
return 500;
30+
}
31+
}
32+
33+
async function importUrl(url) {
34+
const [fromRepo, fromOrg] = url.hostname.split('.')[0].split('--').slice(1).slice(-2);
35+
if (!(fromRepo || fromOrg)) {
36+
url.status = '403';
37+
url.error = 'URL is not from AEM.';
38+
return;
39+
}
40+
41+
url.fromRepo ??= fromRepo;
42+
url.fromOrg ??= fromOrg;
43+
44+
const { pathname, href } = url;
45+
if (href.endsWith('.xml') || href.endsWith('.html')) {
46+
url.status = 'error';
47+
url.error = 'DA does not support XML or raw HTML.';
48+
return;
49+
}
50+
51+
52+
const isExt = EXTS.some((ext) => href.endsWith(`.${ext}`));
53+
const path = href.endsWith('/') ? `${pathname}index` : pathname;
54+
const srcPath = isExt ? path : `${path}.md`;
55+
url.destPath = isExt ? path : `${path}.html`;
56+
url.editPath = href.endsWith('.json') ? path.replace('.json', '') : path;
57+
58+
if (isExt) {
59+
url.route = url.destPath.endsWith('json') ? '/sheet' : '/media';
60+
} else {
61+
url.route = '/edit';
62+
}
63+
64+
try {
65+
const resp = await fetch(`${url.origin}${srcPath}`);
66+
console.log("fetched resource from AEM at: ", `${url.origin}${srcPath}`)
67+
if (resp.redirected && !srcPath.endsWith('.mp4')) {
68+
url.status = 'redir';
69+
throw new Error('redir');
70+
}
71+
if (!resp.ok) {
72+
url.status = 'error';
73+
throw new Error('error');
74+
}
75+
let content = isExt ? await resp.blob() : await resp.text();
76+
if (!isExt) {
77+
const aemHtml = docDomToAemHtml(mdToDocDom(content))
78+
let html = replaceHtml(aemHtml, url.fromOrg, url.fromRepo);
79+
content = new Blob([html], { type: 'text/html' });
80+
}
81+
url.status = await saveAllToDa(url, content);
82+
console.log("imported resource " + url.destPath)
83+
84+
console.log("TODO - preview and publish.")
85+
} catch (e) {
86+
console.log(e)
87+
if (!url.status) url.status = 'error';
88+
// Do nothing
89+
}
90+
}
91+
92+
importUrl(new URL('https://main--bacom--adobecom.hlx.live' + "/customer-success-stories"))

0 commit comments

Comments
 (0)