From 23295495f86485f00f12d89eaea864bb0a4d2dce Mon Sep 17 00:00:00 2001 From: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Date: Tue, 5 Sep 2023 07:56:50 -0500 Subject: [PATCH 1/2] Omit empty next slab ID in encoded array data slab Currently, we omit empty next slab ID in encoded root data slabs because next slab ID is always empty in root data slabs. However, next slab ID is also empty for non-root data slabs if the non-root data slab is the last data slab. This commit sets hasNextSlabID flag during encoding and only encodes non-empty next slab ID for array data slab. This change saves 16 bytes for the last non-root data slabs. Also, we don't special case the omission of next slab ID in root slabs. NOTE: omission of empty next slab ID doesn't affect slab size computation which is used for slab operations, such as splitting and merging. This commit is an optimization during slab encoding. --- array.go | 19 +++++++++---- array_debug.go | 75 ++++++++++++++++++++++++++++++++----------------- array_test.go | 4 +-- storage_test.go | 4 +-- 4 files changed, 64 insertions(+), 38 deletions(-) diff --git a/array.go b/array.go index d6c50294..4e546717 100644 --- a/array.go +++ b/array.go @@ -417,16 +417,17 @@ func newArrayDataSlabFromDataV1( var extraData *ArrayExtraData var next SlabID - // Decode header + // Decode extra data if h.isRoot() { - // Decode extra data extraData, data, err = newArrayExtraDataFromData(data, decMode, decodeTypeInfo) if err != nil { // err is categorized already by newArrayExtraDataFromData. return nil, err } - } else { - // Decode next slab ID + } + + // Decode next slab ID + if h.hasNextSlabID() { next, err = NewSlabIDFromRawBytes(data) if err != nil { // error returned from NewSlabIDFromRawBytes is categorized already. @@ -516,6 +517,10 @@ func (a *ArrayDataSlab) Encode(enc *Encoder) error { h.setHasPointers() } + if a.next != SlabIDUndefined { + h.setHasNextSlabID() + } + if a.extraData != nil { h.setRoot() } @@ -534,8 +539,10 @@ func (a *ArrayDataSlab) Encode(enc *Encoder) error { // err is already categorized by ArrayExtraData.Encode(). return err } - } else { - // Encode next slab ID to scratch + } + + // Encode next slab ID + if a.next != SlabIDUndefined { n, err := a.next.ToRawBytes(enc.Scratch[:]) if err != nil { // Don't need to wrap because err is already categorized by SlabID.ToRawBytes(). diff --git a/array_debug.go b/array_debug.go index dc4eeaa2..64cf0a07 100644 --- a/array_debug.go +++ b/array_debug.go @@ -446,17 +446,15 @@ func validArraySlabSerialization( } // Extra check: encoded data size == header.size - encodedExtraDataSize, err := getEncodedArrayExtraDataSize(slab.ExtraData(), cborEncMode) + encodedSlabSize, err := computeSlabSize(data) if err != nil { - // Don't need to wrap error as external error because err is already categorized by getEncodedArrayExtraDataSize(). + // Don't need to wrap error as external error because err is already categorized by computeSlabSize(). return err } - // Need to exclude extra data size from encoded data size. - encodedSlabSize := uint32(len(data) - encodedExtraDataSize) - if slab.Header().size != encodedSlabSize { - return NewFatalError(fmt.Errorf("slab %d encoded size %d != header.size %d (encoded extra data size %d)", - id, encodedSlabSize, slab.Header().size, encodedExtraDataSize)) + if slab.Header().size != uint32(encodedSlabSize) { + return NewFatalError(fmt.Errorf("slab %d encoded size %d != header.size %d", + id, encodedSlabSize, slab.Header().size)) } // Compare encoded data of original slab with encoded data of decoded slab @@ -640,25 +638,6 @@ func arrayExtraDataEqual(expected, actual *ArrayExtraData) error { return nil } -func getEncodedArrayExtraDataSize(extraData *ArrayExtraData, cborEncMode cbor.EncMode) (int, error) { - if extraData == nil { - return 0, nil - } - - var buf bytes.Buffer - enc := NewEncoder(&buf, cborEncMode) - - // Normally the flag shouldn't be 0. But in this case we just need the encoded data size - // so the content of the flag doesn't matter. - err := extraData.Encode(enc) - if err != nil { - // Don't need to wrap error as external error because err is already categorized by ArrayExtraData.Encode(). - return 0, err - } - - return len(buf.Bytes()), nil -} - func ValidValueSerialization( value Value, cborDecMode cbor.DecMode, @@ -690,3 +669,47 @@ func ValidValueSerialization( } return nil } + +func computeSlabSize(data []byte) (int, error) { + if len(data) < versionAndFlagSize { + return 0, NewDecodingError(fmt.Errorf("data is too short")) + } + + h, err := newHeadFromData(data[:versionAndFlagSize]) + if err != nil { + return 0, NewDecodingError(err) + } + + slabExtraDataSize, err := getExtraDataSize(h, data[versionAndFlagSize:]) + if err != nil { + return 0, err + } + + // Computed slab size (slab header size): + // - excludes slab extra data size + // - adds next slab ID for non-root data slab if not encoded + size := len(data) - slabExtraDataSize + + isDataSlab := h.getSlabArrayType() == slabArrayData || + h.getSlabMapType() == slabMapData || + h.getSlabMapType() == slabMapCollisionGroup + + if !h.isRoot() && isDataSlab && !h.hasNextSlabID() { + size += slabIDSize + } + + return size, nil +} + +func getExtraDataSize(h head, data []byte) (int, error) { + if h.isRoot() { + dec := cbor.NewStreamDecoder(bytes.NewBuffer(data)) + b, err := dec.DecodeRawBytes() + if err != nil { + return 0, NewDecodingError(err) + } + return len(b), nil + } + + return 0, nil +} diff --git a/array_test.go b/array_test.go index 88b261ef..cc3a44b2 100644 --- a/array_test.go +++ b/array_test.go @@ -1899,7 +1899,7 @@ func TestArrayEncodeDecode(t *testing.T) { // (data slab) next: 3, data: [aaaaaaaaaaaaaaaaaaaaaa ... aaaaaaaaaaaaaaaaaaaaaa] id2: { // version - 0x10, + 0x12, // array data slab flag 0x00, // next slab id @@ -1924,8 +1924,6 @@ func TestArrayEncodeDecode(t *testing.T) { 0x10, // array data slab flag 0x40, - // next slab id - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x0b, // CBOR encoded array elements diff --git a/storage_test.go b/storage_test.go index 1e16141a..40a4e6c8 100644 --- a/storage_test.go +++ b/storage_test.go @@ -927,7 +927,7 @@ func TestPersistentStorageSlabIterator(t *testing.T) { // (data slab) next: 3, data: [aaaaaaaaaaaaaaaaaaaaaa ... aaaaaaaaaaaaaaaaaaaaaa] id2: { // version - 0x10, + 0x12, // array data slab flag 0x00, // next slab id @@ -952,8 +952,6 @@ func TestPersistentStorageSlabIterator(t *testing.T) { 0x10, // array data slab flag 0x40, - // next slab id - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x0b, // CBOR encoded array elements From cbc820490d2d813e623ba8f3ed86df7dd0195552 Mon Sep 17 00:00:00 2001 From: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Date: Tue, 5 Sep 2023 08:21:27 -0500 Subject: [PATCH 2/2] Omit empty next slab ID in encoded map data slab Currently, we omit empty next slab ID in encoded root data slabs because next slab ID is always empty in root data slabs. However, next slab ID is also empty for non-root data slabs if the non-root data slab is the last data slab. This commit sets hasNextSlabID flag during encoding and only encodes non-empty next slab ID for map data slab. This change saves 16 bytes for the last non-root data slabs. Also, we don't special case the omission of next slab ID in root slabs. NOTE: omission of empty next slab ID doesn't affect slab size computation which is used for slab operations, such as splitting and merging. This commit is an optimization during slab encoding. --- map.go | 22 ++++++++++++++-------- map_debug.go | 29 +++++------------------------ map_test.go | 11 +++-------- 3 files changed, 22 insertions(+), 40 deletions(-) diff --git a/map.go b/map.go index 0821dc92..328b20c8 100644 --- a/map.go +++ b/map.go @@ -2200,20 +2200,21 @@ func newMapDataSlabFromDataV1( var extraData *MapExtraData var next SlabID - // Decode header + // Decode extra data if h.isRoot() { - // Decode extra data extraData, data, err = newMapExtraDataFromData(data, decMode, decodeTypeInfo) if err != nil { // Don't need to wrap error as external error because err is already categorized by newMapExtraDataFromData(). return nil, err } - } else { + } + + // Decode next slab ID + if h.hasNextSlabID() { if len(data) < slabIDSize { return nil, NewDecodingErrorf("data is too short for map data slab") } - // Decode next slab ID next, err = NewSlabIDFromRawBytes(data) if err != nil { // Don't need to wrap error as external error because err is already categorized by NewSlabIDFromRawBytes(). @@ -2291,6 +2292,10 @@ func (m *MapDataSlab) Encode(enc *Encoder) error { h.setHasPointers() } + if m.next != SlabIDUndefined { + h.setHasNextSlabID() + } + if m.anySize { h.setNoSizeLimit() } @@ -2305,16 +2310,17 @@ func (m *MapDataSlab) Encode(enc *Encoder) error { return NewEncodingError(err) } - // Encode header + // Encode extra data if m.extraData != nil { - // Encode extra data err = m.extraData.Encode(enc) if err != nil { // Don't need to wrap error as external error because err is already categorized by MapExtraData.Encode(). return err } - } else { - // Encode next slab ID to scratch + } + + // Encode next slab ID + if m.next != SlabIDUndefined { n, err := m.next.ToRawBytes(enc.Scratch[:]) if err != nil { // Don't need to wrap error as external error because err is already categorized by SlabID.ToRawBytes(). diff --git a/map_debug.go b/map_debug.go index 7954b403..051b7acb 100644 --- a/map_debug.go +++ b/map_debug.go @@ -849,18 +849,16 @@ func validMapSlabSerialization( } // Extra check: encoded data size == header.size - encodedExtraDataSize, err := getEncodedMapExtraDataSize(slab.ExtraData(), cborEncMode) + encodedSlabSize, err := computeSlabSize(data) if err != nil { - // Don't need to wrap error as external error because err is already categorized by getEncodedMapExtraDataSize(). + // Don't need to wrap error as external error because err is already categorized by computeSlabSize(). return err } - // Need to exclude extra data size from encoded data size. - encodedSlabSize := uint32(len(data) - encodedExtraDataSize) - if slab.Header().size != encodedSlabSize { + if slab.Header().size != uint32(encodedSlabSize) { return NewFatalError( - fmt.Errorf("slab %d encoded size %d != header.size %d (encoded extra data size %d)", - id, encodedSlabSize, slab.Header().size, encodedExtraDataSize)) + fmt.Errorf("slab %d encoded size %d != header.size %d", + id, encodedSlabSize, slab.Header().size)) } // Compare encoded data of original slab with encoded data of decoded slab @@ -1357,20 +1355,3 @@ func mapExtraDataEqual(expected, actual *MapExtraData) error { return nil } - -func getEncodedMapExtraDataSize(extraData *MapExtraData, cborEncMode cbor.EncMode) (int, error) { - if extraData == nil { - return 0, nil - } - - var buf bytes.Buffer - enc := NewEncoder(&buf, cborEncMode) - - err := extraData.Encode(enc) - if err != nil { - // Don't need to wrap error as external error because err is already categorized by MapExtraData.Encode(). - return 0, err - } - - return len(buf.Bytes()), nil -} diff --git a/map_test.go b/map_test.go index e325e1d3..ed4619b6 100644 --- a/map_test.go +++ b/map_test.go @@ -2737,7 +2737,7 @@ func TestMapEncodeDecode(t *testing.T) { // data slab id2: { // version - 0x10, + 0x12, // flag: map data 0x08, // next slab id @@ -2789,8 +2789,6 @@ func TestMapEncodeDecode(t *testing.T) { 0x10, // flag: has pointer + map data 0x48, - // next slab id - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // the following encoded data is valid CBOR @@ -2864,7 +2862,8 @@ func TestMapEncodeDecode(t *testing.T) { require.True(t, ok) require.Equal(t, 2, len(meta.childrenHeaders)) require.Equal(t, uint32(len(stored[id2])), meta.childrenHeaders[0].size) - require.Equal(t, uint32(len(stored[id3])), meta.childrenHeaders[1].size) + // Need to add slabIDSize to encoded data slab here because empty slab ID is omitted during encoding. + require.Equal(t, uint32(len(stored[id3])+slabIDSize), meta.childrenHeaders[1].size) // Decode data to new storage storage2 := newTestPersistentStorageWithData(t, stored) @@ -3392,8 +3391,6 @@ func TestMapEncodeDecode(t *testing.T) { 0x10, // flag: any size + collision group 0x2b, - // next slab id - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // the following encoded data is valid CBOR @@ -3457,8 +3454,6 @@ func TestMapEncodeDecode(t *testing.T) { 0x10, // flag: any size + collision group 0x2b, - // next slab id - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // the following encoded data is valid CBOR