-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathdecode.go
303 lines (271 loc) · 8.61 KB
/
decode.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
// Copyright 2014 Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ansi
import (
"bytes"
"errors"
"strings"
)
// An S contains a decoded escape sequence. There are several types of
// escape sequences:
//
// "C0" - Single Control Set 0 character 0x00 - 0x1f
// "C1" - Single Control Set 1 character ESC 0x40 - 0x5f / 0x80 - 0x9f
// "ICF" - Independent Control Function ESC 0x6f - 0x7e
// "ESC" - Other escape sequence
// "CSI" - Standard ANSI Escape Sequence ESC [ ...
// "CS" - Control String
// "" - String of regular text (no ESC or C1 characters)
//
// CSI escape sequences may contain some number of parameters. The parsed
// parameters are provided in Parmas. Control Strings, such as OSC, have the
// string stored in Params[0]. Some C1 sequences take additional bytes of
// data following the sequence, such as SS2 and SS3. In this case the
// additional data is in Params[0].
type S struct {
Code Name // The escape sequences sans parameters
Type string // The type of escape sequence
Params []string // parameters
}
// String returns s as a string. If s has no type or s.Code is unrecognized
// then s.Code is returned (s represents plain text, or there is an error).
// If s.Code is recognized, the original escape sequence is constructed and
// returned (single byte CSI sequences are translated to multi-byte sequences).
func (s *S) String() string {
if s.Type == "" {
return string(s.Code)
}
seq := s.Code.S()
if seq == nil {
return string(s.Code)
}
switch {
case s.Type == "C1":
// C1 sequences parameters follow the sequence
return string(seq.Type) + string(seq.Code) + strings.Join(s.Params, ";")
case len(s.Code) > 1 && (lookup[s.Code[1]]&sos) == sos:
// SOS sequence parameters follow the sequence followed by ST
return string(seq.Type) + string(seq.Code) + strings.Join(s.Params, ";") + string(ST)
default:
return string(seq.Type) + strings.Join(s.Params, ";") + string(seq.Code)
}
}
const (
sos = (1 << iota) // start of string
st // string terminator
)
// lookup provides a quick lookup of byte attributes
var lookup = [256]byte{
'_': sos, // APC
'P': sos, // DCS
']': sos, // OSC
'^': sos, // PM
'X': sos, // SOS
'\\': st, // ST
}
// Fixed error messages generated by package ansi.
var (
LoneEscape = errors.New("escape at end of input")
UnknownEscape = errors.New("unknown escape sequence")
NoST = errors.New("control string missing string terminator")
FoundSOS = errors.New("start of string encountered in control string")
IncompleteCSI = errors.New("incomplete control sequence")
ExtraParameters = errors.New("too many parameters for function")
MissingParameters = errors.New("not enough parameters for function")
)
// ms returns the bytes of in as a Name
func ms(in ...byte) Name { return Name(in) }
// Decode decodes the next sequence in in, returning the bytes following the
// sequence, the sequence s, and any possible error. The value of s will never
// be nil. Single byte C1 sequences are expanded to two byte sequences.
func Decode(in []byte) (out []byte, s *S, err error) {
if len(in) == 0 {
return nil, nil, nil
}
// TODO(borman): should we split out C0 characters?
// If the first byte is not an ESC or C1 code then return everything
// up to the first ESC or C1 code.
for x, c := range in {
if c == '\033' || (c&0xe0 == 0x80) {
if x > 0 {
return in[x:], &S{Code: Name(in[:x])}, nil
}
goto EscapeSequence
}
}
// If we get here the entire string has no escape sequences.
return nil, &S{Code: Name(in)}, nil
EscapeSequence:
// Some codes require additional post-processing. For example,
// SS2 and SS3 also gobble up the next character, which we
// return as a parameter.
defer func() {
if s == nil {
return
}
if f := Specials[s.Code]; f != nil {
out = f(out, s)
}
}()
// Any parameters (or control string) found is stored in params.
// Upon exit we convert it to a slice of parameters. In the case
// of a control string there will be at most one parameter.
// If params is nil then there are no parameters. If params is
// empty then there is exactly one empty parameter.
var params []byte
defer func() {
if s == nil {
return
}
if params != nil {
// 5.4.1: The Parameter Bytes are bit combinations from
// 03/00 to 03/15. The parameter string is interpreted
// as follows:
//
// a) If the first bit combination of the parameter
// string is in the range 03/00 to 03/11, the parameter
// string is interpreted according to the format
// described in 5.4.2.
//
// b) If the first bit combination of the parameter
// string is in the range 03/12 to 03/15, the parameter
// string is available for private (or experimental)
// use. Its format and meaning are not defined by this
// Standard.
//
// 5.4.1:
//
// b) Each parameter sub-string consists of one or more
// bit combinations from 03/00 to 03/10; the bit
// combinations from 03/00 to 03/09 represent the digits
// ZERO to NINE; bit combination 03/10 may be used as a
// separator in a parameter sub-string, for example, to
// separate the fractional part of a decimal number from
// the integer part of that number.
//
// c) Parameter sub-strings are separated by one bit
// combination 03/11.
if len(params) > 0 && s.Type != "CS" && params[0] <= 0x3b {
s.Params = strings.Split(string(params), ";")
} else {
s.Params = []string{string(params)}
}
}
// Only CSI sequences require additional processing
// If our code is CSI then we don't know how to validate
// the paramters (and there are no defaults).
if s.Type != "CSI" || s.Code == CSI {
return
}
// Now fill in default parameters
c := Table[s.Code]
if c == nil {
return
}
// Validate the number of parameters we were given.
switch {
case c.NParam == 0 && len(s.Params) > 0:
err = ExtraParameters
case len(s.Params) < c.MinParam:
err = MissingParameters
case c.NParam == -1:
case len(s.Params) > c.NParam:
err = ExtraParameters
}
for x, d := range c.Defaults {
if len(s.Params) == x {
s.Params = append(s.Params, d)
} else if s.Params[x] == "" {
s.Params[x] = d
}
}
// Add in empty missing parameters
for len(s.Params) < c.MinParam {
s.Params = append(s.Params, "")
}
}()
switch {
// Single byte CSI
case in[0] == '['+0x40:
in = in[1:]
// Single byte C1 (not an escape)
case in[0] != '\033':
return in[1:], &S{Code: ms('\033', in[0]-0x40), Type: "C1"}, nil
// Lone escape
case len(in) == 1:
return in[1:], &S{Code: ms('\033'), Type: "C0"}, LoneEscape
// Two byte CSI
case in[1] == '[':
in = in[2:]
// Start of control string
// Should terminate with ST
// Cannot contain SOS
case (lookup[in[1]] & sos) == sos:
t := bytes.Index(in[2:], []byte{'\033', '\\'})
var err error
if t < 0 {
t = len(in) - 2
err = NoST
}
d := in[2 : 2+t]
if i := bytes.Index(d, []byte{'\033', 'X'}); i >= 0 {
d = d[:i]
t = i
err = FoundSOS
}
params = d
// If ere not returning NoST then we need to consume
// the two bytes that terminated us
// Question: should we consume an SOS?
if err != NoST {
t += 2
}
return in[2+t:], &S{Code: ms(in[:2]...), Type: "CS"}, err
// Two byte C1 string
case in[1] >= 0x40 && in[1] <= 0x5f:
return in[2:], &S{Code: ms(in[:2]...), Type: "C1"}, nil
// Two byte independent control function
case in[1] >= 0x60 && in[1] <= 0x7e:
return in[2:], &S{Code: ms(in[:2]...), Type: "ICF"}, nil
// Unkonwn escape sequence type
default:
return in[2:], &S{Code: ms(in[:2]...), Type: "ESC"}, UnknownEscape
}
// a CSI sequence has been encountered. The sequence consists of
// 0 or more parameters bytes (0x30 - 0x3f) followed by
// 0 or more intermediate bytes (0x20 - 0x2f) followed by
// 1 final byte (0x40 - 0x7e)
s = &S{Type: "CSI", Code: CSI}
// collect up all the parameterb bytes into params
for x, c := range in {
if c < 0x30 || c > 0x3f {
if x > 0 {
params = in[:x]
}
in = in[x:]
goto Next
}
}
// this sequence prematurely terminated (no intermediate or final bytes)
if len(in) > 0 {
params = in
}
return nil, s, IncompleteCSI
Next:
// collect up all the intermediate bytes (0x20 - 0x2f)
for x, c := range in {
// Normal escape sequence
if c >= 0x40 && c <= 0x7e {
s.Code += Name(in[:x+1])
return in[x+1:], s, nil
}
if c < 0x20 || c > 0x2f {
s.Code += Name(in[:x+1])
return in[x+1:], s, UnknownEscape
}
}
// There was nothing following the intermediate bytes
s.Code += Name(in)
return nil, s, IncompleteCSI
}