-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Image.Extent() interface #38
Changes from all commits
a4bc0dd
5864483
55fbff0
1982b2b
3358492
89056c6
f7d8676
c3792ed
82009e9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,20 @@ | ||
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= | ||
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= | ||
github.com/cheggaaa/pb/v3 v3.1.5 h1:QuuUzeM2WsAqG2gMqtzaWithDJv0i+i6UlnwSCI4QLk= | ||
github.com/cheggaaa/pb/v3 v3.1.5/go.mod h1:CrxkeghYTXi1lQBEI7jSn+3svI3cuc19haAj6jM60XI= | ||
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= | ||
github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= | ||
github.com/klauspost/compress v1.16.5 h1:IFV2oUNUzZaz+XyusxpLzpzS8Pt5rh0Z16For/djlyI= | ||
github.com/klauspost/compress v1.16.5/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= | ||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= | ||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= | ||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= | ||
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= | ||
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= | ||
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= | ||
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= | ||
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= | ||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= | ||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= | ||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package main | ||
|
||
import ( | ||
"bufio" | ||
"encoding/json" | ||
"errors" | ||
"flag" | ||
"fmt" | ||
"os" | ||
|
||
"github.com/lima-vm/go-qcow2reader" | ||
"github.com/lima-vm/go-qcow2reader/log" | ||
) | ||
|
||
func cmdMap(args []string) error { | ||
var ( | ||
// Required | ||
filename string | ||
|
||
// Options | ||
debug bool | ||
) | ||
|
||
fs := flag.NewFlagSet("map", flag.ExitOnError) | ||
fs.Usage = func() { | ||
fmt.Fprintf(fs.Output(), "Usage: %s map [OPTIONS...] FILE\n", os.Args[0]) | ||
flag.PrintDefaults() | ||
} | ||
fs.BoolVar(&debug, "debug", false, "enable printing debug messages") | ||
if err := fs.Parse(args); err != nil { | ||
return err | ||
} | ||
|
||
if debug { | ||
log.SetDebugFunc(logDebug) | ||
} | ||
|
||
switch len(fs.Args()) { | ||
case 0: | ||
return errors.New("no file was specified") | ||
case 1: | ||
filename = fs.Arg(0) | ||
default: | ||
return errors.New("too many files were specified") | ||
} | ||
|
||
f, err := os.Open(filename) | ||
if err != nil { | ||
return err | ||
} | ||
defer f.Close() | ||
|
||
img, err := qcow2reader.Open(f) | ||
if err != nil { | ||
return err | ||
} | ||
defer img.Close() | ||
|
||
writer := bufio.NewWriter(os.Stdout) | ||
encoder := json.NewEncoder(writer) | ||
|
||
var start int64 | ||
end := img.Size() | ||
for start < end { | ||
extent, err := img.Extent(start, end-start) | ||
if err != nil { | ||
return err | ||
} | ||
encoder.Encode(extent) | ||
start += extent.Length | ||
} | ||
return writer.Flush() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,18 +6,29 @@ import ( | |
"fmt" | ||
"io" | ||
"sync" | ||
|
||
"github.com/lima-vm/go-qcow2reader/image" | ||
) | ||
|
||
// The size of the buffer used to read data from non-zero extents of the image. | ||
// For best performance, the size should be aligned to the image cluster size or | ||
// the file system block size. | ||
const BufferSize = 1024 * 1024 | ||
|
||
// Smaller value may increase the overhead of synchornizing multiple works. | ||
// To maxmimze performance we use multiple goroutines to read data from the | ||
// source image, decompress data, and write data to the target image. To | ||
// schedule the work to multiple goroutines, the image is split to multiple | ||
// segments, each processed by a single worker goroutine. | ||
// | ||
// Smaller value may increase the overhead of synchornizing multiple workers. | ||
// Larger value may be less efficient for smaller images. The default value | ||
// gives good results for the lima default Ubuntu image. | ||
// gives good results for the lima default Ubuntu image. Must be aligned to | ||
// BufferSize. | ||
const SegmentSize = 32 * BufferSize | ||
|
||
// For best I/O throughput we want to have enough in-flight requests, regardless | ||
// of number of cores. For best decompression we want to use one worker per | ||
// core, but too many workers is less effective. The default value gives good | ||
// core, but too many workers are less effective. The default value gives good | ||
// results with lima default Ubuntu image. | ||
const Workers = 8 | ||
|
||
|
@@ -67,6 +78,14 @@ func (o *Options) Validate() error { | |
return nil | ||
} | ||
|
||
// Updater is an interface for tracking conversion progress. | ||
type Updater interface { | ||
// Called from multiple goroutines after a byte range of length was converted. | ||
// If the conversion is successfu, the total number of bytes will be the image | ||
// virtual size. | ||
Update(n int64) | ||
} | ||
|
||
type Converter struct { | ||
// Read only after starting. | ||
size int64 | ||
|
@@ -129,10 +148,13 @@ func (c *Converter) reset(size int64) { | |
c.offset = 0 | ||
} | ||
|
||
// Convert copy size bytes from io.ReaderAt to io.WriterAt. Unallocated areas or | ||
// areas full of zeros in the source are keep unallocated in the destination. | ||
// The destination must be new empty or full of zeroes. | ||
func (c *Converter) Convert(wa io.WriterAt, ra io.ReaderAt, size int64) error { | ||
// Convert copy size bytes from image to io.WriterAt. Unallocated extents in the | ||
// source image or read data which is all zeros are converted to unallocated | ||
// byte range in the target image. The target image must be new empty file or a | ||
// file full of zeroes. To get a sparse target image, the image must be a new | ||
// empty file, since Convert does not punch holes for zero ranges even if the | ||
// underlying file system supports hole punching. | ||
func (c *Converter) Convert(wa io.WriterAt, img image.Image, size int64, progress Updater) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is possible, but even if we do we accept now an image instead of a ReaderAt and size. Also progress scope is one convert, while convert instance can convert many images using the same options, so it is better to accept progress per convert operation. The options are not something you change per image, it may change depending on the machine (e.g number of cores). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another way to accept options to convert(), and use the zero value of the converter: var c convert.Converter
c.Convert(target, src, convert.Options{Progress: bar}) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There other options that we may want to add in the future that may be per image options, so moving the options to Convert makes sense anyway. |
||
c.reset(size) | ||
|
||
zero := make([]byte, c.bufferSize) | ||
|
@@ -151,40 +173,64 @@ func (c *Converter) Convert(wa io.WriterAt, ra io.ReaderAt, size int64) error { | |
} | ||
|
||
for start < end { | ||
// The last read may be shorter. | ||
n := len(buf) | ||
if end-start < int64(len(buf)) { | ||
n = int(end - start) | ||
// Get next extent in this segment. | ||
nirs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
extent, err := img.Extent(start, end-start) | ||
if err != nil { | ||
c.setError(err) | ||
return | ||
} | ||
if extent.Zero { | ||
start += extent.Length | ||
nirs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if progress != nil { | ||
progress.Update(extent.Length) | ||
} | ||
continue | ||
} | ||
|
||
// Read more data. | ||
nr, err := ra.ReadAt(buf[:n], start) | ||
if err != nil { | ||
if !errors.Is(err, io.EOF) { | ||
c.setError(err) | ||
return | ||
// Consume data from this extent. | ||
for extent.Length > 0 { | ||
// The last read may be shorter. | ||
n := len(buf) | ||
if extent.Length < int64(len(buf)) { | ||
n = int(extent.Length) | ||
} | ||
|
||
// EOF for the last read of the last segment is expected, but since we | ||
// read exactly size bytes, we should never get a zero read. | ||
if nr == 0 { | ||
c.setError(errors.New("unexpected EOF")) | ||
return | ||
// Read more data. | ||
nr, err := img.ReadAt(buf[:n], start) | ||
if err != nil { | ||
if !errors.Is(err, io.EOF) { | ||
c.setError(err) | ||
return | ||
} | ||
|
||
// EOF for the last read of the last segment is expected, but since we | ||
// read exactly size bytes, we should never get a zero read. | ||
if nr == 0 { | ||
c.setError(errors.New("unexpected EOF")) | ||
return | ||
} | ||
} | ||
} | ||
|
||
// If the data is all zeros we skip it to create a hole. Otherwise | ||
// write the data. | ||
if !bytes.Equal(buf[:nr], zero[:nr]) { | ||
if nw, err := wa.WriteAt(buf[:nr], start); err != nil { | ||
c.setError(err) | ||
return | ||
} else if nw != nr { | ||
c.setError(fmt.Errorf("read %d, but wrote %d bytes", nr, nw)) | ||
return | ||
// If the data is all zeros we skip it to create a hole. Otherwise | ||
// write the data. | ||
if !bytes.Equal(buf[:nr], zero[:nr]) { | ||
if nw, err := wa.WriteAt(buf[:nr], start); err != nil { | ||
c.setError(err) | ||
return | ||
} else if nw != nr { | ||
c.setError(fmt.Errorf("read %d, but wrote %d bytes", nr, nw)) | ||
return | ||
} | ||
} | ||
|
||
if progress != nil { | ||
progress.Update(int64(nr)) | ||
} | ||
|
||
extent.Length -= int64(nr) | ||
extent.Start += int64(nr) | ||
start += int64(nr) | ||
nirs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
start += int64(nr) | ||
} | ||
} | ||
}() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,10 +8,28 @@ import ( | |
// Type must be a "Backing file format name string" that appears in QCOW2. | ||
type Type string | ||
|
||
// Extent describes a byte range in the image with the same allocation, | ||
// compression, or zero status. Extents are aligned to the underlying file | ||
// system block size (e.g. 4k), or the image format cluster size (e.g. 64k). One | ||
// extent can describe one or more file system blocks or image clusters. | ||
type Extent struct { | ||
nirs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Offset from start of the image in bytes. | ||
Start int64 `json:"start"` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not strictly needed, since when calling Next(start, length) we always return the requested start value. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing Start makes testing easier and eliminate the error of invalid extents list. When consuming the extents we keep current offset externally so we don't really need it. When printing extents having start is helpful. If we don't report it we have to add another type with Start for encoding json. |
||
// Length of this extent in bytes. | ||
Length int64 `json:"length"` | ||
// Set if this extent is allocated. | ||
Allocated bool `json:"allocated"` | ||
// Set if this extent is read as zeros. | ||
Zero bool `json:"zero"` | ||
// Set if this extent is compressed. | ||
Compressed bool `json:"compressed"` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Allocated and Compressed are not needed for sparse copy, only Zero is needed. They can be useful for other purposes, for example dumping all extents like |
||
} | ||
|
||
// Image implements [io.ReaderAt] and [io.Closer]. | ||
type Image interface { | ||
io.ReaderAt | ||
io.Closer | ||
Extent(start, length int64) (Extent, error) | ||
Type() Type | ||
Size() int64 // -1 if unknown | ||
Readable() error | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can n be negative?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, but this is the type the go and the kernel use for file size (off_t). It is easier to use when we can use Extent.Length when updating.
We an add a check for n < 0 in the progress bar example implementation.