forked from CorentinB/warc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspooled.go
203 lines (163 loc) · 3.85 KB
/
spooled.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
package warc
// The following code is heavily inspired by: https://github.com/tgulacsi/go/blob/master/temp/memfile.go
import (
"bytes"
"errors"
"fmt"
"io"
"os"
"sync"
)
// MaxInMemorySize is the max number of bytes (currently 1MB)
// to hold in memory before starting to write to disk
const MaxInMemorySize = 1000000
var spooledPool = sync.Pool{
New: func() interface{} {
return bytes.NewBuffer(nil)
},
}
// ReaderAt is the interface for ReadAt - read at position, without moving pointer.
type ReaderAt interface {
ReadAt(p []byte, off int64) (n int, err error)
}
// ReadSeekCloser is an io.Reader + ReaderAt + io.Seeker + io.Closer + Stat
type ReadSeekCloser interface {
io.Reader
io.Seeker
ReaderAt
io.Closer
FileName() string
}
// spooledTempFile writes to memory (or to disk if
// over MaxInMemorySize) and deletes the file on Close
type spooledTempFile struct {
buf *bytes.Buffer
mem *bytes.Reader
file *os.File
filePrefix string
tempDir string
maxInMemorySize int
fullOnDisk bool
reading bool // transitions at most once from false -> true
closed bool
}
// ReadWriteSeekCloser is an io.Writer + io.Reader + io.Seeker + io.Closer.
type ReadWriteSeekCloser interface {
ReadSeekCloser
io.Writer
}
// NewSpooledTempFile returns an ReadWriteSeekCloser,
// with some important constraints:
// you can Write into it, but whenever you call Read or Seek on it,
// Write is forbidden, will return an error.
func NewSpooledTempFile(filePrefix string, tempDir string, fullOnDisk bool) ReadWriteSeekCloser {
return &spooledTempFile{
filePrefix: filePrefix,
tempDir: tempDir,
buf: spooledPool.Get().(*bytes.Buffer),
maxInMemorySize: MaxInMemorySize,
fullOnDisk: fullOnDisk,
}
}
func (s *spooledTempFile) prepareRead() error {
if s.closed {
return io.EOF
}
if s.reading && (s.file != nil || s.buf == nil || s.mem != nil) {
return nil
}
s.reading = true
if s.file != nil {
if _, err := s.file.Seek(0, 0); err != nil {
return fmt.Errorf("file=%v: %w", s.file, err)
}
return nil
}
s.mem = bytes.NewReader(s.buf.Bytes())
return nil
}
func (s *spooledTempFile) Read(p []byte) (n int, err error) {
if err := s.prepareRead(); err != nil {
return 0, err
}
if s.file != nil {
return s.file.Read(p)
}
return s.mem.Read(p)
}
func (s *spooledTempFile) ReadAt(p []byte, off int64) (n int, err error) {
if err := s.prepareRead(); err != nil {
return 0, err
}
if s.file != nil {
return s.file.ReadAt(p, off)
}
return s.mem.ReadAt(p, off)
}
func (s *spooledTempFile) Seek(offset int64, whence int) (int64, error) {
if err := s.prepareRead(); err != nil {
return 0, err
}
if s.file != nil {
return s.file.Seek(offset, whence)
}
return s.mem.Seek(offset, whence)
}
func (s *spooledTempFile) Write(p []byte) (n int, err error) {
if s.closed {
return 0, io.EOF
}
if s.reading {
panic("write after read")
}
if s.file != nil {
n, err = s.file.Write(p)
return
}
if (s.buf.Len()+len(p) > s.maxInMemorySize) || s.fullOnDisk {
s.file, err = os.CreateTemp(s.tempDir, s.filePrefix+"-")
if err != nil {
return
}
_, err = io.Copy(s.file, s.buf)
if err != nil {
s.file.Close()
s.file = nil
return
}
s.buf.Reset()
spooledPool.Put(s.buf)
s.buf = nil
if n, err = s.file.Write(p); err != nil {
s.file.Close()
s.file = nil
}
return
}
return s.buf.Write(p)
}
func (s *spooledTempFile) Close() error {
s.closed = true
s.mem = nil
if s.buf != nil {
s.buf.Reset()
spooledPool.Put(s.buf)
s.buf = nil
}
if s.file == nil {
return nil
}
s.file.Close()
if err := os.Remove(s.file.Name()); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
s.file = nil
return nil
}
func (s *spooledTempFile) FileName() string {
if s.file != nil {
return s.file.Name()
} else {
return ""
}
}