-
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathwords.ts
76 lines (67 loc) · 1.64 KB
/
words.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import { readFileSync } from "fs";
import path from "path";
import { Bench } from "tinybench";
import { countWords } from "../src";
// @ts-ignore
import markdownTable from "markdown-table";
const artOfWar = readFileSync(
path.join(__dirname, "data", "art-of-war.txt"),
"utf-8"
);
const gulliverTravels = readFileSync(
path.join(__dirname, "data", "gulliver.txt"),
"utf-8"
);
async function runBenchmark(name: string, text: string) {
const REGEX =
/\s+|[\p{Script=Han}\p{Script=Katakana}\p{Script=Hiragana}\u3000-\u303f]/u;
const bench = new Bench({
warmupIterations: 100,
});
bench
.add("alfaaz", () => {
countWords(text);
})
.add("regex", () => {
text.split(REGEX).length;
});
await bench.run();
console.log(
`**${name}:**\n\n`,
"Total words:",
countWords(text),
" \n",
"File size (bytes):",
Buffer.from(text).length,
" \n",
"File length (chars):",
text.length,
" "
);
const results = markdownTable([
["Task name", "ops/s", "GB/s", "Words/s"],
...bench.tasks.map(({ name, result }) =>
result
? [
name,
result.hz.toString(),
(
(Buffer.from(text).length * Math.round(result.hz)) /
1024 /
1024 /
1024
).toString(),
countWords(text) * Math.round(result.hz),
]
: []
),
]);
console.log();
console.log(results);
console.log();
}
async function main() {
await runBenchmark("Count words (no CJK, only English):", gulliverTravels);
await runBenchmark("Count words (CJK + English):", artOfWar);
}
main();