From 4dc623694f5c31a32758cb10765c1b1c6914b63f Mon Sep 17 00:00:00 2001 From: Noah Treuhaft Date: Fri, 16 Feb 2024 12:32:35 -0500 Subject: [PATCH] Remove zngio.Trailer and dependent "zed dev" subcommands (#5034) As of #4984, VNG no longer includes a zngio.Trailer. Remove it and the "zed dev dig section" and "zed dev dig trailer" commands, which depend on it. --- cmd/zed/dev/dig/section/command.go | 104 ------------------ cmd/zed/dev/dig/trailer/command.go | 86 --------------- cmd/zed/main.go | 2 - docs/formats/vng.md | 70 ------------ zio/zngio/trailer.go | 170 ----------------------------- 5 files changed, 432 deletions(-) delete mode 100644 cmd/zed/dev/dig/section/command.go delete mode 100644 cmd/zed/dev/dig/trailer/command.go delete mode 100644 zio/zngio/trailer.go diff --git a/cmd/zed/dev/dig/section/command.go b/cmd/zed/dev/dig/section/command.go deleted file mode 100644 index d3430b587a..0000000000 --- a/cmd/zed/dev/dig/section/command.go +++ /dev/null @@ -1,104 +0,0 @@ -package section - -import ( - "errors" - "flag" - "fmt" - "io" - "strconv" - - "github.com/brimdata/zed" - "github.com/brimdata/zed/cli/outputflags" - "github.com/brimdata/zed/cmd/zed/dev/dig" - "github.com/brimdata/zed/pkg/charm" - "github.com/brimdata/zed/pkg/storage" - "github.com/brimdata/zed/zio" - "github.com/brimdata/zed/zio/zngio" -) - -var Section = &charm.Spec{ - Name: "section", - Usage: "section [flags] number file", - Short: "extract a section of a sectioned Zed file", - Long: ` -The section command takes an integer section number and a file argument -(which must be a sectioned Zed file having a Zed trailer), -extracts the requested section of the file (where the section must be encoded -in the ZNG format) and outputs the section in any Zed format.`, - New: newCommand, -} - -func init() { - dig.Cmd.Add(Section) -} - -type Command struct { - *dig.Command - outputFlags outputflags.Flags -} - -func newCommand(parent charm.Command, f *flag.FlagSet) (charm.Command, error) { - c := &Command{Command: parent.(*dig.Command)} - c.outputFlags.SetFlags(f) - return c, nil -} - -func (c *Command) Run(args []string) error { - ctx, cleanup, err := c.Init(&c.outputFlags) - if err != nil { - return err - } - defer cleanup() - if len(args) != 2 { - return errors.New("two arguments required") - } - uri, err := storage.ParseURI(args[1]) - if err != nil { - return err - } - engine := storage.NewLocalEngine() - r, err := engine.Get(ctx, uri) - if err != nil { - return err - } - defer r.Close() - size, err := storage.Size(r) - if err != nil { - return err - } - trailer, err := zngio.ReadTrailer(r, size) - if err != nil { - return err - } - which, err := strconv.Atoi(args[0]) - if err != nil { - return fmt.Errorf("bad section number: %w", err) - } - reader, err := newSectionReader(r, which, trailer.Sections) - if err != nil { - return err - } - defer reader.Close() - writer, err := c.outputFlags.Open(ctx, engine) - if err != nil { - return err - } - if err := zio.Copy(writer, reader); err != nil { - writer.Close() - return err - } - return writer.Close() -} - -func newSectionReader(r io.ReaderAt, which int, sections []int64) (*zngio.Reader, error) { - if which >= len(sections) { - return nil, fmt.Errorf("section %d does not exist", which) - } - off := int64(0) - var k int - for ; k < which; k++ { - off += sections[k] - } - reader := io.NewSectionReader(r, off, sections[which]) - return zngio.NewReader(zed.NewContext(), reader), nil -} diff --git a/cmd/zed/dev/dig/trailer/command.go b/cmd/zed/dev/dig/trailer/command.go deleted file mode 100644 index dcad4ff5a1..0000000000 --- a/cmd/zed/dev/dig/trailer/command.go +++ /dev/null @@ -1,86 +0,0 @@ -package trailer - -import ( - "bytes" - "errors" - "flag" - - "github.com/brimdata/zed" - "github.com/brimdata/zed/cli/outputflags" - "github.com/brimdata/zed/cmd/zed/dev/dig" - "github.com/brimdata/zed/pkg/charm" - "github.com/brimdata/zed/pkg/storage" - "github.com/brimdata/zed/zio" - "github.com/brimdata/zed/zio/zngio" -) - -var Trailer = &charm.Spec{ - Name: "trailer", - Usage: "trailer file", - Short: "read a Zed trailer and output it as Zed", - Long: ` -The trailer command takes a file argument -(which must be a sectioned Zed file having a Zed trailer), -extracts the trailer from the sectioned file, and outputs the trailer in any Zed format. -`, - New: New, -} - -func init() { - dig.Cmd.Add(Trailer) -} - -type Command struct { - *dig.Command - outputFlags outputflags.Flags -} - -func MibToBytes(mib float64) int { - return int(mib * 1024 * 1024) -} - -func New(parent charm.Command, f *flag.FlagSet) (charm.Command, error) { - c := &Command{Command: parent.(*dig.Command)} - c.outputFlags.SetFlags(f) - return c, nil -} - -func (c *Command) Run(args []string) error { - ctx, cleanup, err := c.Init(&c.outputFlags) - if err != nil { - return err - } - defer cleanup() - if len(args) != 1 { - return errors.New("zed dev trailer: requires a single file argument") - } - uri, err := storage.ParseURI(args[0]) - if err != nil { - return err - } - engine := storage.NewLocalEngine() - r, err := engine.Get(ctx, uri) - if err != nil { - return err - } - defer r.Close() - size, err := storage.Size(r) - if err != nil { - return err - } - b, err := zngio.ReadTrailerAsBytes(r, size) - if err != nil { - return err - } - zr := zngio.NewReader(zed.NewContext(), bytes.NewReader(b)) - defer zr.Close() - writer, err := c.outputFlags.Open(ctx, engine) - if err != nil { - return err - } - err = zio.Copy(writer, zr) - if err2 := writer.Close(); err == nil { - err = err2 - } - return err -} diff --git a/cmd/zed/main.go b/cmd/zed/main.go index f04bd48da5..f02fea0410 100644 --- a/cmd/zed/main.go +++ b/cmd/zed/main.go @@ -12,9 +12,7 @@ import ( "github.com/brimdata/zed/cmd/zed/dev" _ "github.com/brimdata/zed/cmd/zed/dev/compile" _ "github.com/brimdata/zed/cmd/zed/dev/dig/frames" - _ "github.com/brimdata/zed/cmd/zed/dev/dig/section" _ "github.com/brimdata/zed/cmd/zed/dev/dig/slice" - _ "github.com/brimdata/zed/cmd/zed/dev/dig/trailer" _ "github.com/brimdata/zed/cmd/zed/dev/vector/agg" _ "github.com/brimdata/zed/cmd/zed/dev/vector/copy" _ "github.com/brimdata/zed/cmd/zed/dev/vector/project" diff --git a/docs/formats/vng.md b/docs/formats/vng.md index 09af044413..f358bbdcc7 100644 --- a/docs/formats/vng.md +++ b/docs/formats/vng.md @@ -397,73 +397,3 @@ gracie 0 === ``` -To see the detailed VNG structure described as ZSON, you can use the `vng` -command like this: -``` -zed dev dig section -Z 1 hello.vng -``` -which provides the Zed output (comments added with explanations): -``` -// First, all of the types of the encoded value sequence are declared -// with null values (just one here). - -null ({a:string,b:string}) - -// Then comes the root reassembly map. - -[ - { - offset: 29, - length: 2 (int32) - } -] - -// Finally comes the column assembly records. -// (Again, only one schema in this example, so only one such record.) - -{ - a: { - column: [ - { - offset: 0, - length: 16 (int32) - } - ], - presence: [] ([{offset:int64,length:int32}]) - }, - b: { - column: [ - { - offset: 16, - length: 13 (int32) - } - ], - presence: [] ([{offset:int64,length:int32}]) - } -} - -``` -The VNG trailer can be viewed with this command: -``` -zed dev dig trailer -Z hello.vng -``` -giving -``` -{ - magic: "ZNG Trailer", - type: "vng", - version: 2, - sections: [ - 31, - 95 - ], - meta: { - skew_thresh: 26214400, - segment_thresh: 5242880 - } (=vng.FileMeta) -} (=zngio.Trailer) -``` - -> Note finally, if there were 10MB of ZNG row data here, the reassembly section -> would be basically the same size, with perhaps a few segmaps. This emphasizes -> just how small this data structure is compared to the data section. diff --git a/zio/zngio/trailer.go b/zio/zngio/trailer.go deleted file mode 100644 index 99744102db..0000000000 --- a/zio/zngio/trailer.go +++ /dev/null @@ -1,170 +0,0 @@ -package zngio - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "io" - - "github.com/brimdata/zed" - "github.com/brimdata/zed/zson" -) - -var ErrTrailerNotFound = errors.New("trailer not found") - -const ( - Magic = "ZNG Trailer" - TrailerMaxSize = 4096 -) - -type Trailer struct { - Magic string `zed:"magic"` - Type string `zed:"type"` - Version int `zed:"version"` - Sections []int64 `zed:"sections"` - Meta zed.Value `zed:"meta"` -} - -func MarshalTrailer(typ string, version int, sections []int64, meta interface{}) (zed.Value, error) { - m := zson.NewZNGMarshaler() - m.Decorate(zson.StylePackage) - metaVal, err := m.Marshal(meta) - if err != nil { - return zed.Null, err - } - val, err := m.Marshal(&Trailer{ - Magic: Magic, - Type: typ, - Version: version, - Sections: sections, - Meta: metaVal, - }) - if err != nil { - return zed.Null, err - } - return val, nil -} - -func ReadTrailer(r io.ReaderAt, fileSize int64) (*Trailer, error) { - b, err := readTail(r, fileSize) - if err != nil { - return nil, err - } - trailer, _, err := findTrailer(b) - return trailer, err -} - -func ReadTrailerAsBytes(r io.ReaderAt, fileSize int64) ([]byte, error) { - b, err := readTail(r, fileSize) - if err != nil { - return nil, err - } - _, bytes, err := findTrailer(b) - return bytes, err -} - -func readTail(r io.ReaderAt, fileSize int64) ([]byte, error) { - n := fileSize - if n > TrailerMaxSize { - n = TrailerMaxSize - } - buf := make([]byte, n) - cc, err := r.ReadAt(buf, fileSize-n) - if err != nil { - return nil, err - } - if int64(cc) != n { - // This shouldn't happen but maybe could occur under a corner case - // or I/O problems. - return nil, fmt.Errorf("couldn't read trailer: expected %d bytes but read %d", n, cc) - } - return buf, nil -} - -// FindTrailer finds the last valid, EOS-terminated ZNG stream in the -// buffer provided. -func findTrailer(b []byte) (*Trailer, []byte, error) { - u := zson.NewZNGUnmarshaler() - err := ErrTrailerNotFound - off := len(b) - 1 - for { - off = findCandidate(b, off) - if off < 0 { - return nil, nil, err - } - if val := readTrailer(b[off:]); val != nil { - var trailer Trailer - uErr := u.Unmarshal(*val, &trailer) - if uErr == nil { - if trailer.Magic != Magic { - return nil, nil, errors.New("bad trailer magic") - } - return &trailer, b[off:], nil - } - // If unmarshal fails, keep looking for candidates but - // remember the error if we never succeed as we prefer this - // more specific unmarshaling error over ErrTrailerNotFound. - if err == ErrTrailerNotFound { - err = uErr - } - } - } -} - -func findCandidate(b []byte, off int) int { - for { - off-- - if off < 0 { - return -1 - } - if off == 0 || b[off-1] == EOS { - if ok := validStream(b, off); ok { - return off - } - } - } -} - -func readTrailer(b []byte) *zed.Value { - zr := NewReader(zed.NewContext(), bytes.NewReader(b)) - defer zr.Close() - val, _ := zr.Read() - return val -} - -func validStream(b []byte, off int) bool { - for off < len(b) { - code := b[off] - if code == EOS { - return true - } - if (code & 0x80) != 0 { - // Bad format - return false - } - typ := (code >> 4) & 3 - if typ == 3 { - // bad message block type - return false - } - len, ok := decodeLength(b[off:], code) - if !ok || len < 1 { - // len < 1 can loop forever or cause off < 0. - return false - } - off += len - } - return false -} - -func decodeLength(b []byte, code byte) (int, bool) { - if len(b) < 2 { - return 0, false - } - v, n := binary.Uvarint(b[1:]) - if n == 0 { - return 0, false - } - return ((int(v) << 4) | (int(code) & 0xf)) + n + 1, true -}