-
Notifications
You must be signed in to change notification settings - Fork 0
/
readers.go
226 lines (208 loc) · 6.91 KB
/
readers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
package iohelper
import (
"bufio"
"bytes"
"encoding/csv"
"fmt"
"io"
"reflect"
)
// LineNumReportingCsvReader wraps std lib `*csv.Reader` and exposes the current line number.
type LineNumReportingCsvReader struct {
*csv.Reader
numLineFieldName string
}
// LineNum returns the current line number
func (r *LineNumReportingCsvReader) LineNum() int {
// Given the request to add line num report api on csv.Reader is denied
// per https://github.com/golang/go/issues/26679, let's use this hack to get
// the line number for our internal use.
value := reflect.ValueOf(*r).FieldByName(r.numLineFieldName)
if !value.IsValid() {
panic(fmt.Sprintf(
"unable to get '%s' from csv.Reader, has csv.Reader been changed/upgraded?", r.numLineFieldName))
}
return int(value.Int())
}
// NewLineNumReportingCsvReader creates a new `*LineNumReportingCsvReader`.
func NewLineNumReportingCsvReader(r io.Reader) *LineNumReportingCsvReader {
return &LineNumReportingCsvReader{csv.NewReader(r), "numLine"}
}
// BytesReplacingReader allows transparent replacement of a given token during read operation.
type BytesReplacingReader struct {
r io.Reader
search []byte
searchLen int
replace []byte
replaceLen int
lenDelta int // = replaceLen - searchLen. can be negative
err error
buf []byte
buf0, buf1 int // buf[0:buf0]: bytes already processed; buf[buf0:buf1] bytes read in but not yet processed.
max int // because we need to replace 'search' with 'replace', this marks the max bytes we can read into buf
}
const defaultBufSize = int(4096)
// NewBytesReplacingReader creates a new `*BytesReplacingReader`.
// `search` cannot be nil/empty. `replace` can.
func NewBytesReplacingReader(r io.Reader, search, replace []byte) *BytesReplacingReader {
return (&BytesReplacingReader{}).Reset(r, search, replace)
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
// Reset allows reuse of a previous allocated `*BytesReplacingReader` for buf allocation optimization.
// `search` cannot be nil/empty. `replace` can.
func (r *BytesReplacingReader) Reset(r1 io.Reader, search1, replace1 []byte) *BytesReplacingReader {
if r1 == nil {
panic("io.Reader cannot be nil")
}
if len(search1) == 0 {
panic("search token cannot be nil/empty")
}
r.r = r1
r.search = search1
r.searchLen = len(search1)
r.replace = replace1
r.replaceLen = len(replace1)
r.lenDelta = r.replaceLen - r.searchLen // could be negative
r.err = nil
bufSize := max(defaultBufSize, max(r.searchLen, r.replaceLen))
if r.buf == nil || len(r.buf) < bufSize {
r.buf = make([]byte, bufSize)
}
r.buf0 = 0
r.buf1 = 0
r.max = len(r.buf)
if r.searchLen < r.replaceLen {
// If len(search) < len(replace), then we have to assume the worst case:
// what's the max bound value such that if we have consecutive 'search' filling up
// the buf up to buf[:max], and all of them are placed with 'replace', and the final
// result won't end up exceed the len(buf)?
r.max = (len(r.buf) / r.replaceLen) * r.searchLen
}
return r
}
// Read implements the `io.Reader` interface.
func (r *BytesReplacingReader) Read(p []byte) (int, error) {
n := 0
for {
if r.buf0 > 0 {
n = copy(p, r.buf[0:r.buf0])
r.buf0 -= n
r.buf1 -= n
if r.buf1 == 0 && r.err != nil {
return n, r.err
}
copy(r.buf, r.buf[n:r.buf1+n])
return n, nil
} else if r.err != nil {
return 0, r.err
}
n, r.err = r.r.Read(r.buf[r.buf1:r.max])
if n > 0 {
r.buf1 += n
for {
index := bytes.Index(r.buf[r.buf0:r.buf1], r.search)
if index < 0 {
r.buf0 = max(r.buf0, r.buf1-r.searchLen+1)
break
}
index += r.buf0
copy(r.buf[index+r.replaceLen:r.buf1+r.lenDelta], r.buf[index+r.searchLen:r.buf1])
copy(r.buf[index:index+r.replaceLen], r.replace)
r.buf0 = index + r.replaceLen
r.buf1 += r.lenDelta
}
}
if r.err != nil {
r.buf0 = r.buf1
}
}
}
// ReadLine reads in a single line from a bufio.Reader.
func ReadLine(r *bufio.Reader) (string, error) {
// Turns out even with various bufio.Reader.Read???() and bufio.Scanner, there is not simple clean
// way of reading a single text line in:
// - bufio.ReadSlice('\n') doesn't have '\r' dropping. We want a line returned without neither '\n' nor '\r'.
// - bufio.ReadLine() drops '\r' and '\n', but has a fixed buf so may be unable to read a whole line in one call.
// - bufio.ReadBytes no buf size issue, but doesn't offer '\r' and '\n' cleanup.
// - bufio.ReadString essentially the same as bufio.ReadBytes.
// - bufio.Scanner deals with '\r' and '\n' but has fixed buf issue.
// Oh, come on!!
//
// Also found net/textproto's Reader.ReadLine() which meets all the requirements. But to use it
// we need to create yet another type of Reader (net.textproto.Reader), as if the
// io.Reader -> bufio.Reader isn't enough for us. So decided instead, just shamelessly copy
// net.textproto.Reader.ReadLine() here, credit goes to
// https://github.com/golang/go/blob/master/src/net/textproto/reader.go. However its test code
// coverage is lacking, so create all the new test cases for this ReadLine implementation copy.
var line []byte
for {
l, more, err := r.ReadLine()
if err != nil {
return "", err
}
// Avoid the copy if the first call produced a full line.
if line == nil && !more {
return string(l), nil
}
line = append(line, l...)
if !more {
break
}
}
return string(line), nil
}
const bom = '\uFEFF'
// StripBOM returns a new io.Reader that, if needed, strips away the BOM (byte order marker) of
// the input io.Reader.
func StripBOM(reader io.Reader) (io.Reader, error) {
br := bufio.NewReader(reader)
r, _, err := br.ReadRune()
switch {
case err == io.EOF:
// This is to handle empty file, can't call UnreadRune(), will meet ErrInvalidUnreadRune as
// b.lastRuneSize is -1. So simply reset buffer io.
br.Reset(reader)
return br, nil
case err != nil:
return nil, err
case r == bom:
return br, nil
default:
// Here we shouldn't meet any error during unread rune.
_ = br.UnreadRune()
return br, nil
}
}
// LineCountingReader wraps an io.Reader and reports currently which line the reader is at. Note
// the LineAt starts a 1.
type LineCountingReader struct {
r io.Reader
line int
}
// Read implements the `io.Reader` interface.
func (r *LineCountingReader) Read(p []byte) (n int, err error) {
n, err = r.r.Read(p)
// We can do this byte search and count because '\n' < utf8.RuneSelf.
for i := 0; i < n; i++ {
// On Windows a line ends with "\r\n" on Linux/Unix/MacOS it ends
// with "\n". Either way, searching for "\n" for is good enough for
// line counting purpose.
if p[i] == '\n' {
r.line++
}
}
return n, err
}
// AtLine returns the current line number. Note it starts with 1.
func (r *LineCountingReader) AtLine() int {
return r.line
}
// NewLineCountingReader creates new LineCountingReader wrapping around an input io.Reader.
func NewLineCountingReader(r io.Reader) *LineCountingReader {
return &LineCountingReader{r: r, line: 1}
}