-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean-json.js
151 lines (143 loc) · 4.6 KB
/
clean-json.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
const JSON_ESCAPES = {
"—": "\\u2014", // Em dash
"–": "\\u2013", // En dash
"−": "\\u2212", // Minus sign
};
const JSON_REPLACEMENTS = {
"‑": "-", // Non-breaking hyphen --> hyphen
"“": '\\"',
"”": '\\"',
"’": "'",
"‘": "'",
"…": "...",
" ": " ", // Non-breaking space --> space
"\t": " ", // Tab --> space
ff: "ff",
ffi: "ffi",
ffl: "ffl",
fi: "fi",
fl: "fl",
IJ: "IJ",
ij: "ij",
LJ: "LJ",
Lj: "Lj",
lj: "lj",
NJ: "NJ",
Nj: "Nj",
nj: "nj",
ſt: "ft",
};
const JSON_ESCAPES_REGEX = new RegExp(Object.keys(JSON_ESCAPES).join("|"), "g");
const JSON_REPLACEMENTS_REGEX = new RegExp(Object.keys(JSON_REPLACEMENTS).join("|"), "g");
function getCleanFile(file) {
return (
JSON.stringify(
JSON.parse(
// JSON.parse() automatically converts "\u###" to string literals
file
.replace(/\s{2,}/g, " ") // Remove consecutive whitespace
.replace(JSON_REPLACEMENTS_REGEX, (match) => JSON_REPLACEMENTS[match]), // Replace unwanted characters
),
(key, value) => {
if (typeof value !== "string") return value;
return value.trim(); // Remove leading and trailing whitespace in strings
},
"\t",
).replace(JSON_ESCAPES_REGEX, (match) => JSON_ESCAPES[match]) + "\n" // Re-escape chosen characters
);
}
// Generic everything else below
import chalk from "chalk";
import { Command } from "commander";
import * as fs from "fs";
import * as path from "path";
// Define CLI
const program = new Command()
.summary("clean data for Pf2ools")
.description(
'Sanitises and regularises a file or directory of files against the Pf2ools schema. Only JSON files will be tested.\n\nChanges made:\n\t- Ligatures and non-breaking variants are replaced with their standard characters\n\t- Dashes and minus signs are replaced with Unicode escape codes ("\\u####")\n\t- All other Unicode escape codes are converted into literal characters\n\t- Strings are trimmed of leading and trailing whitespace\n\t- All consecutive whitespace is collapsed to a single space',
)
.argument("<paths...>", "File or directory paths to clean")
.option("-c, --changes", "Suppress printing of unchanged files")
.option("-d, --dry-run", "Prevent any modifications being written to the disk")
.option("-r, --recurse", "Recursively clean files in directories")
.option("-s, --summary", "Suppress printing of cleaning status for all files and only summarise results")
.parse(process.argv);
// File-tree-walker to find JSONs
function getJSONsRecursively(targetPath) {
let fileList = [];
fs.readdirSync(targetPath).forEach((file) => {
const filePath = path.join(targetPath, file);
if (fs.statSync(filePath).isDirectory()) {
fileList = fileList.concat(getJSONsRecursively(filePath));
} else if (isJSON(filePath)) {
fileList.push(filePath);
}
});
return fileList;
}
function isJSON(filename) {
return path.extname(filename) === ".json";
}
// Load and validate arguments
const opts = program.opts();
let filePaths = [];
for (const arg of program.args) {
const argClean = path.join(...arg.toString().split(path.sep));
let filePoint;
try {
filePoint = fs.statSync(argClean);
} catch {
program.error(chalk.red(`"${argClean}" not found`), {
exitCode: 1,
code: "invalid.path",
});
}
if (filePoint.isDirectory()) {
if (opts.recurse) {
filePaths = filePaths.concat(getJSONsRecursively(argClean));
} else {
filePaths = filePaths.concat(
fs
.readdirSync(argClean)
.filter((file) => isJSON(file))
.map((file) => path.join(argClean, file)),
);
}
} else if (!isJSON(argClean)) {
program.error(chalk.red(`"${argClean}" is not a JSON file`), {
exitCode: 1,
code: "invalid.file",
});
} else {
filePaths.push(argClean);
}
}
if (!filePaths.length) {
console.log(chalk.blue("No JSON files to test"));
process.exit();
}
const unchanged = `\t${chalk.blue("[Unchanged]")} `;
const cleaned = `\t${chalk.green("[Cleaned]")} `;
let cleanCount = 0;
for (const filePath of filePaths) {
const file = fs.readFileSync(filePath, { encoding: "utf-8" });
const cleanFile = getCleanFile(file);
if (cleanFile === file) {
if (!opts.changes && !opts.summary) console.log(chalk.dim(unchanged + filePath));
} else {
if (!opts.dryRun) fs.writeFileSync(filePath, cleanFile, { encoding: "utf-8" });
if (!opts.summary) console.log(cleaned + filePath);
cleanCount++;
}
}
// Summarise
if (opts.summary || filePaths.length > 1) {
console.log(
chalk[cleanCount ? "green" : "blue"](
`${chalk.bold(cleanCount)} file${cleanCount !== 1 ? "s" : ""} (${
Math.round((1000 * cleanCount) / filePaths.length) / 10
}%) ${opts.dryRun ? "would be" : cleanCount === 1 ? "was" : "were"} cleaned.`,
),
);
}