-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbench.sh
executable file
·374 lines (327 loc) · 8.7 KB
/
bench.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
#!/bin/bash
# ugrep project auto-benchmarks https://github.com/Genivia/ugrep
UG=${1:-ugrep} # ugrep or script argument (e.g. to use a specific version)
RG=rg # ripgrep
AG=ag # silver searcher
GG='egrep' # GNU grep -E (egrep)
# source code repositories to search and .zip, .tar, .tgz files
REPO1=corpi/openssl-3.1.2
REPO2=corpi/swift-swift-5.8.1-RELEASE
# large text file to search and .bz2, .gz, .lz4, .xz, .zst files
TEXT=corpi/enwik8
# large JSON file with long lines to search
JSON=corpi/large.json
# directory with word/string files for option -f
WORD=words
echo "# performance report `arch`"
echo
if [ -x "$(which $UG)" ] && $UG --version 2>&1 | grep -qw ugrep ; then
stat -f "found $UG %z byte executable located at %N" `which $UG`
echo ~~~
$UG --version
echo ~~~
echo
else
echo "$UG not found"
exit
fi
if [ -x "$(which $RG)" ] && $RG --version 2>&1 | grep -qw ripgrep ; then
stat -L -f "found $RG %z byte executable located at %N" `which $RG`
echo ~~~
$RG --version
echo ~~~
echo
else
RG="-"
fi
if [ -x "$(which $AG)" ] && $AG --version 2>&1 | grep -qw ag ; then
stat -L -f "found $AG %z byte executable located at %N" `which $AG`
echo ~~~
$AG --version
echo ~~~
echo
else
AG="-"
fi
if [ -x "$(which $GG)" ] && $GG --version 2>&1 | grep -qw 'GNU grep' ; then
stat -L -f "found $GG %z byte executable located at %N" `which $GG`
echo ~~~
$GG --version
echo ~~~
echo
else
if [ -x "$(which ggrep)" ] && ggrep --version 2>&1 | grep -qw 'GNU grep' ; then
stat -L -f "found ggrep %z byte executable located at %N" `which ggrep`
GG='ggrep -E'
echo ~~~
$GG --version
echo ~~~
echo
else
GG='-'
fi
fi
# run search benchmark
function run {
if [ "$1" != "-" ] ; then
# normalize arguments printout
sep='`'
for arg in "$@" ; do
if [ "$arg" == "" ] ; then
echo -n "$sep''"
else
echo -n "$sep$arg"
fi
sep=' '
done
echo '`'
# cold run and check if OK, then warm run to time
"$@" 2>&1 | cat > /dev/null
if [ "${PIPESTATUS[0]}" == 0 ] ; then
( time -p "$@" 2>&1 | cat > /dev/null ) 2>&1
else
echo '**ERROR!**'
fi
echo
fi
}
# search benchmark provably fails, report it
function fail {
if [ "$1" != "-" ] ; then
# normalize arguments printout
sep='`'
for arg in "$@" ; do
if [ "$arg" == "" ] ; then
echo -n "$sep''"
else
echo -n "$sep$arg"
fi
sep=' '
done
echo '`'
echo '**ERROR!**'
echo
fi
}
echo
echo "## large text file search"
echo
for REGEX in 'rol' 'the' 'cycles|semigroups' 'ro[a-z]*ds' 'r[a-z]*st' \
'[A-Z][a-z]+ny' '[A-Z][a-z]{11}ny' '\w+ny' 'ab(cd?)?' 'x*y*z*' \
'(19|20)[0-9]{2}/(0[1-9]|1[012])|(0[1-9]|1[012])/(19|20)[0-9]{2}' \
'(https?://|www\.)[-a-zA-Z0-9@:%._+~#=]{1,253}\.[-a-zA-Z0-9]{2,}\.[][a-zA-Z0-9()@:%_+.~#?&/=\-]+' \
'[a-z]+-[a-z]+' \
'' '^$' ; do
for OPTS in '' '-n' '-no' '-wn' '-win' '-wino' '-c' '-wic' '-l' '-wil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $TEXT
run $RG $OPTS "$REGEX" $TEXT
run $AG $OPTS "$REGEX" $TEXT
run $GG $OPTS "$REGEX" $TEXT
done
done
echo
echo "## large text file search for words from files"
echo
for FILE in $WORD/* ; do
for OPTS in '' '-n' '-no' '-wn' '-win' '-wino' '-c' '-wic' '-l' '-wil' ; do
echo '### grepping `'"$OPTS '-f$FILE'"'`'
echo
run $UG $OPTS -f"$FILE" $TEXT
run $RG $OPTS -f"$FILE" $TEXT
# GNU grep takes an hour on this one, so we fail it:
if [ "$FILE" != "$WORD/4.txt" ] ; then
run $GG $OPTS -f"$FILE" $TEXT
else
fail $GG $OPTS -f"$FILE" $TEXT
fi
done
done
echo
echo "## large text file search with formatted output"
echo
for REGEX in 'Sherlock|Holmes' ; do
for OPTS in '--json' '--csv' '--xml' '--hex' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $TEXT
run $RG $OPTS "$REGEX" $TEXT
run $AG $OPTS "$REGEX" $TEXT
done
done
echo
echo "## large text file search with replaced output"
echo
echo '### grepping `--replace=flip flop`'
echo
run $UG "--replace=flip" flop $TEXT
run $RG "--replace=flip" flop $TEXT
echo
echo "## large text file search with context"
echo
for REGEX in '^$' 'begin|end' ; do
for OPTS in '-nA9' '-nB9' '-nC9' '-winA999' '-winB999' '-winC999' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $TEXT
run $RG $OPTS "$REGEX" $TEXT
run $AG $OPTS "$REGEX" $TEXT
run $GG $OPTS "$REGEX" $TEXT
done
done
echo
echo "## large long lines JSON file search"
echo
for REGEX in 'abc[a-z0-9]+' ; do
for OPTS in '' '-n' '-no' '-wn' '-win' '-wino' '-c' '-wic' '-l' '-wil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $JSON
run $RG $OPTS "$REGEX" $JSON
run $AG $OPTS "$REGEX" $JSON
run $GG $OPTS "$REGEX" $JSON
done
done
echo
echo "## OpenSSL source code repo directory search"
echo
pushd $REPO1
echo
for REGEX in 'FIXME|TODO' 'char|int|long|size_t|void' 'ssl-?3(\.[0-9]+)?' ; do
for OPTS in '-n' '-wn' '-win' '-wino' '-wic' '-wil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG -I $OPTS "$REGEX"
run $RG $OPTS "$REGEX"
OPTS=$OPTS"r"
run $AG $OPTS "$REGEX"
run $GG -I $OPTS "$REGEX" .
done
done
popd
echo
echo
echo "## Swift source code repo directory search"
echo
pushd $REPO2
echo
for REGEX in '_(RUN|LIB|NAM)[A-Z_]+' 'String|Int|Double|Array|Dictionary' '(class|struct)\sS[a-z]+T' 'for\s[a-z]+\sin' ; do
for OPTS in '-n' '-wn' '-win' '-wino' '-wic' '-wil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX"
run $RG $OPTS "$REGEX"
OPTS=$OPTS"r"
run $AG $OPTS "$REGEX"
run $GG $OPTS "$REGEX" .
done
done
popd
echo
echo
echo "## bz2 compressed large text file search"
echo
for REGEX in 'landsnail' ; do
for OPTS in '-z' '-zwin' '-zc' '-zwic' '-zl' '-zwil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $TEXT.bz2
run $RG $OPTS "$REGEX" $TEXT.bz2
run $AG $OPTS "$REGEX" $TEXT.bz2
done
done
echo
echo "## gz compressed large text file search"
echo
for REGEX in 'landsnail' ; do
for OPTS in '-z' '-zwin' '-zc' '-zwic' '-zl' '-zwil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $TEXT.gz
run $RG $OPTS "$REGEX" $TEXT.gz
run $AG $OPTS "$REGEX" $TEXT.gz
done
done
echo
echo "## lz4 compressed large text file search"
echo
for REGEX in 'landsnail' ; do
for OPTS in '-z' '-zwin' '-zc' '-zwic' '-zl' '-zwil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $TEXT.lz4
run $RG $OPTS "$REGEX" $TEXT.lz4
run $AG $OPTS "$REGEX" $TEXT.lz4
done
done
echo
echo "## xz compressed large text file search"
echo
for REGEX in 'landsnail' ; do
for OPTS in '-z' '-zwin' '-zc' '-zwic' '-zl' '-zwil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $TEXT.xz
run $RG $OPTS "$REGEX" $TEXT.xz
run $AG $OPTS "$REGEX" $TEXT.xz
done
done
echo
echo "## zstd compressed large text file search"
echo
for REGEX in 'landsnail' ; do
for OPTS in '-z' '-zwin' '-zc' '-zwic' '-zl' '-zwil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $TEXT.zst
run $RG $OPTS "$REGEX" $TEXT.zst
run $AG $OPTS "$REGEX" $TEXT.zst
done
done
echo
echo "## zip archived repo search"
echo
for REGEX in 'FIXME|TODO' ; do
for OPTS in '-z' '-zwin' '-zc' '-zwic' '-zl' '-zwil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $REPO1.zip
run $RG $OPTS "$REGEX" $REPO1.zip
run $AG $OPTS "$REGEX" $REPO1.zip
done
done
echo
echo "## tar archived repo search"
echo
for REGEX in 'FIXME|TODO' ; do
for OPTS in '-z' '-zwin' '-zc' '-zwic' '-zl' '-zwil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $REPO1.tar
# wait, what???
# run $RG $OPTS "$REGEX" $REPO1.tar
# ripgrep searches tar as if it were a binary file without exiting with an error? That's not right!
fail $RG $OPTS "$REGEX" $REPO1.tar
run $AG $OPTS "$REGEX" $REPO1.tar
done
done
echo
echo "## compressed tarball search"
echo
for REGEX in 'FIXME|TODO' ; do
for OPTS in '-z' '-zwin' '-zc' '-zwic' '-zl' '-zwil' ; do
echo '### grepping `'"$OPTS '$REGEX'"'`'
echo
run $UG $OPTS "$REGEX" $REPO1.tgz
# wait, what???
# run $RG $OPTS "$REGEX" $REPO1.tgz
# ripgrep searches tar as if it were a binary file without exiting with an error? That's not right!
fail $RG $OPTS "$REGEX" $REPO1.tgz
run $AG $OPTS "$REGEX" $REPO1.tgz
done
done
# must end with a final ## section
echo
echo "## end of report"
echo