-
Notifications
You must be signed in to change notification settings - Fork 0
/
ansi2html.sh
executable file
·332 lines (293 loc) · 11.6 KB
/
ansi2html.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
#!/bin/sh
# Convert ANSI (terminal) colours and attributes to HTML
# Author:
# http://www.pixelbeat.org/docs/terminal_colours/
# Examples:
# ls -l --color=always | ansi2html.sh > ls.html
# git show --color | ansi2html.sh > last_change.html
# Generally one can use the `script` util to capture full terminal output.
# Changes:
# V0.1, 24 Apr 2008, Initial release
# V0.2, 01 Jan 2009, Phil Harnish <[email protected]>
# Support `git diff --color` output by
# matching ANSI codes that specify only
# bold or background colour.
# Support `ls --color` output by stripping
# redundant leading 0s from ANSI codes.
# Support `grep --color=always` by stripping
# unhandled ANSI codes (specifically ^[[K).
# V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/
# Remove cat -v usage which mangled non ascii input.
# Cleanup regular expressions used.
# Support other attributes like reverse, ...
# Correctly nest <span> tags (even across lines).
# Add a command line option to use a dark background.
# Strip more terminal control codes.
# V0.4, 17 Sep 2009, [email protected]
# Handle codes with combined attributes and color.
# Handle isolated <bold> attributes with css.
# Strip more terminal control codes.
# V0.13, 05 Mar 2012
# http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh
if [ "$1" = "--version" ]; then
printf '0.13\n' && exit
fi
if [ "$1" = "--help" ]; then
printf '%s\n' \
'This utility converts ANSI codes in data passed to stdin
It has 2 optional parameters:
--bg=dark --palette=linux|solarized|tango|xterm
E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html' >&2
exit
fi
[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
if [ "$1" = "--palette=solarized" ]; then
# See http://ethanschoonover.com/solarized
P0=073642; P1=D30102; P2=859900; P3=B58900;
P4=268BD2; P5=D33682; P6=2AA198; P7=EEE8D5;
P8=002B36; P9=CB4B16; P10=586E75; P11=657B83;
P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3;
shift;
elif [ "$1" = "--palette=solarized-xterm" ]; then
# Above mapped onto the xterm 256 color palette
P0=262626; P1=AF0000; P2=5F8700; P3=AF8700;
P4=0087FF; P5=AF005F; P6=00AFAF; P7=E4E4E4;
P8=1C1C1C; P9=D75F00; P10=585858; P11=626262;
P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7;
shift;
elif [ "$1" = "--palette=tango" ]; then
# Gnome default
P0=000000; P1=CC0000; P2=4E9A06; P3=C4A000;
P4=3465A4; P5=75507B; P6=06989A; P7=D3D7CF;
P8=555753; P9=EF2929; P10=8AE234; P11=FCE94F;
P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC;
shift;
elif [ "$1" = "--palette=xterm" ]; then
P0=000000; P1=CD0000; P2=00CD00; P3=CDCD00;
P4=0000EE; P5=CD00CD; P6=00CDCD; P7=E5E5E5;
P8=7F7F7F; P9=FF0000; P10=00FF00; P11=FFFF00;
P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF;
shift;
else # linux console
P0=000000; P1=AA0000; P2=00AA00; P3=AA5500;
P4=0000AA; P5=AA00AA; P6=00AAAA; P7=AAAAAA;
P8=555555; P9=FF5555; P10=55FF55; P11=FFFF55;
P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF;
[ "$1" = "--palette=linux" ] && shift
fi
[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; }
printf '%s' "<html>
<head>
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>
<style type=\"text/css\">
.ef0,.f0 { color: #$P0; } .eb0,.b0 { background-color: #$P0; }
.ef1,.f1 { color: #$P1; } .eb1,.b1 { background-color: #$P1; }
.ef2,.f2 { color: #$P2; } .eb2,.b2 { background-color: #$P2; }
.ef3,.f3 { color: #$P3; } .eb3,.b3 { background-color: #$P3; }
.ef4,.f4 { color: #$P4; } .eb4,.b4 { background-color: #$P4; }
.ef5,.f5 { color: #$P5; } .eb5,.b5 { background-color: #$P5; }
.ef6,.f6 { color: #$P6; } .eb6,.b6 { background-color: #$P6; }
.ef7,.f7 { color: #$P7; } .eb7,.b7 { background-color: #$P7; }
.ef8, .f0 > .bold,.bold > .f0 { color: #$P8; font-weight: normal; }
.ef9, .f1 > .bold,.bold > .f1 { color: #$P9; font-weight: normal; }
.ef10,.f2 > .bold,.bold > .f2 { color: #$P10; font-weight: normal; }
.ef11,.f3 > .bold,.bold > .f3 { color: #$P11; font-weight: normal; }
.ef12,.f4 > .bold,.bold > .f4 { color: #$P12; font-weight: normal; }
.ef13,.f5 > .bold,.bold > .f5 { color: #$P13; font-weight: normal; }
.ef14,.f6 > .bold,.bold > .f6 { color: #$P14; font-weight: normal; }
.ef15,.f7 > .bold,.bold > .f7 { color: #$P15; font-weight: normal; }
.eb8 { background-color: #$P8; }
.eb9 { background-color: #$P9; }
.eb10 { background-color: #$P10; }
.eb11 { background-color: #$P11; }
.eb12 { background-color: #$P12; }
.eb13 { background-color: #$P13; }
.eb14 { background-color: #$P14; }
.eb15 { background-color: #$P15; }
"
# The default xterm 256 colour palette
for red in $(seq 0 5); do
for green in $(seq 0 5); do
for blue in $(seq 0 5); do
c=$((16 + ($red * 36) + ($green * 6) + $blue))
r=$((($red * 40 + 55) * ($red > 0)))
g=$((($green * 40 + 55) * ($green > 0)))
b=$((($blue * 40 + 55) * ($blue > 0)))
printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $r $g $b
printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $r $g $b
done
done
done
for gray in $(seq 0 23); do
c=$(($gray+232))
l=$(($gray*10 + 8))
printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $l $l $l
printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $l $l $l
done
printf '%s' '
.f9 { color: '`[ "$dark_bg" ] && printf "#$P7;" || printf "#$P0;"`' }
.b9 { background-color: #'`[ "$dark_bg" ] && printf $P0 || printf $P15`'; }
.f9 > .bold,.bold > .f9, body.f9 > pre > .bold {
/* Bold is heavy black on white, or bright white
depending on the default background */
color: '`[ "$dark_bg" ] && printf "#$P15;" || printf "#$P0;"`'
font-weight: '`[ "$dark_bg" ] && printf 'normal;' || printf 'bold;'`'
}
.reverse {
/* CSS doesnt support swapping fg and bg colours unfortunately,
so just hardcode something that will look OK on all backgrounds. */
'"color: #$P0; background-color: #$P7;"'
}
.underline { text-decoration: underline; }
.line-through { text-decoration: line-through; }
.blink { text-decoration: blink; }
</style>
</head>
<body class="f9 b9">
<pre>
'
p='\x1b\[' #shortcut to match escape codes
P="\(^[^°]*\)¡$p" #expression to match prepended codes below
# Handle various xterm control sequences.
# See /usr/share/doc/xterm-*/ctlseqs.txt
sed "
s#\x1b[^\x1b]*\x1b\\\##g # strip anything between \e and ST
s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)
#handle carriage returns
s#^.*\r\{1,\}\([^$]\)#\1#
s#\r\$## # strip trailing \r
# strip other non SGR escape sequences
s#[\x07]##g
s#\x1b[]>=\][0-9;]*##g
s#\x1bP+.\{5\}##g
s#${p}[0-9;?]*[^0-9;?m]##g
#remove backspace chars and what they're backspacing over
:rm_bs
s#[^\x08]\x08##g; t rm_bs
" |
# Normalize the input before transformation
sed "
# escape HTML
s#\&#\&#g; s#>#\>#g; s#<#\<#g; s#\"#\"#g
# normalize SGR codes a little
# split 256 colors out and mark so that they're not
# recognised by the following 'split combined' line
:e
s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e
s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g;
:c
s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c # split combined
s#${p}0\([0-7]\)#${p}\1#g #strip leading 0
s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g #bold last (with clr)
s#${p}m#${p}0m#g #add leading 0 to norm
# undo any 256 color marking
s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g;
# map 16 color codes to color + bold
s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g;
s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g;
# change 'reset' code to a single char, and prepend a single char to
# other codes so that we can easily do negative matching, as sed
# does not support look behind expressions etc.
s#°#\°#g; s#${p}0m#°#g
s#¡#\¡#g; s#${p}[0-9;]*m#¡&#g
" |
# Convert SGR sequences to HTML
sed "
:ansi_to_span # replace ANSI codes with CSS classes
t ansi_to_span # hack so t commands below only apply to preceeding s cmd
/^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code
# common combinations to minimise html (optional)
s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1<span class=\"f\2 b\3\">#;t span_count
s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1<span class=\"f\3 b\2\">#;t span_count
s#${P}1m#\1<span class=\"bold\">#; t span_count
s#${P}4m#\1<span class=\"underline\">#; t span_count
s#${P}5m#\1<span class=\"blink\">#; t span_count
s#${P}7m#\1<span class=\"reverse\">#; t span_count
s#${P}9m#\1<span class=\"line-through\">#; t span_count
s#${P}3\([0-9]\)m#\1<span class=\"f\2\">#; t span_count
s#${P}4\([0-9]\)m#\1<span class=\"b\2\">#; t span_count
s#${P}38;5;\([0-9]\{1,3\}\)m#\1<span class=\"ef\2\">#; t span_count
s#${P}48;5;\([0-9]\{1,3\}\)m#\1<span class=\"eb\2\">#; t span_count
s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes
b # next line of input
# add a corresponding span end flag
:span_count
x; s/^/s/; x
b ansi_to_span
# replace 'reset code' with correct number of </span> tags
:span_end
x
/^s/ {
s/^.//
x
s#°#</span>°#
b span_end
}
x
s#°##
b ansi_to_span
" |
# Convert alternative character set
# Note we convert here, as if we do at start we have to worry about avoiding
# conversion of SGR codes etc., whereas doing here we only have to
# avoid conversions of stuff between &...; or <...>
#
# Note we could use sed to do this based around:
# sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
# However that would be very awkward as we need to only conv some input.
# The basic scheme that we do in the python script below is:
# 1. enable transliterate once ¡ char seen
# 2. disable once µ char seen (may be on diff line to ¡)
# 3. never transliterate between &; or <> chars
sed "
# change 'smacs' and 'rmacs' to a single char so that we can easily do
# negative matching, as sed does not support look behind expressions etc.
# Note we don't use ° like above as that's part of the alternate charset.
s#\x1b(0#¡#g;
s#µ#\µ#g; s#\x1b(B#µ#g
" |
(
python -c "
# vim:fileencoding=utf8
import sys
import locale
encoding=locale.getpreferredencoding()
old='abcdefghijklmnopqrstuvwxyz{}\`~'
new='▒␉␌␍␊°±␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·'
new=unicode(new, 'utf-8')
table=range(128)
for o,n in zip(old, new): table[ord(o)]=n
(STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3)
state = STANDARD
last_mode = STANDARD
for c in unicode(sys.stdin.read(), encoding):
if state == HTML_TAG:
if c == '>':
state = last_mode
elif state == HTML_ENTITY:
if c == ';':
state = last_mode
else:
if c == '<':
state = HTML_TAG
elif c == '&':
state = HTML_ENTITY
elif c == u'¡' and state == STANDARD:
state = ALTERNATIVE
last_mode = ALTERNATIVE
continue
elif c == u'µ' and state == ALTERNATIVE:
state = STANDARD
last_mode = STANDARD
continue
elif state == ALTERNATIVE:
c = c.translate(table)
sys.stdout.write(c.encode(encoding))
" 2>/dev/null ||
sed 's/[¡µ]//g' # just strip aternative flag chars
)
printf '</pre>
</body>
</html>\n'