forked from craigbox/docs-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler.js
78 lines (66 loc) · 1.53 KB
/
crawler.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import frontmatter from '@github-docs/frontmatter'
import fs from 'fs'
import {remark} from 'remark'
import strip from 'strip-markdown'
let results = []
function readFiles(dirname, onFileContent, onError, onFinish) {
fs.readdir(dirname, function(err, filenames) {
if (err) {
onError(err);
return;
}
let ctr = 0;
let final = false
filenames.forEach(function(filename) {
fs.readFile(dirname + filename, 'utf-8', function(err, fileContent) {
if (err) {
onError(err);
return;
}
ctr++;
if (ctr === filenames.length) {
final = true;
}
onFileContent(filename.replace(/\.[^/.]+$/, ""), fileContent, onFinish, final);
});
});
});
}
function runFrontMatter(url, fileContent, onFinish, final) {
const { data, content, errors } = frontmatter(fileContent)
if (errors.length > 0) {
logError(errors)
return;
}
remark()
.use(strip)
.process(content)
.then((file) => {
let text = String(file)
text = text.replace(/\n/g, " ")
results.push(
{
"url": url,
"url_without_anchor": url,
"anchor": null,
"type": "content",
"title": data.title,
"description": data.description,
"content": text,
"hierarchy": {
"lvl0": "Pages",
"lvl1": data.title
},
}
)
if (final) onFinish();
})
}
function logError(err) {
console.error(err)
}
function finish() {
console.log(JSON.stringify(results))
// console.log(results.length)
}
readFiles("src/pages/en/", runFrontMatter, logError, finish)