From b1c93515efefc2e3395c3fcbba03c62912dcad39 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Sun, 3 Mar 2024 08:58:35 -0500 Subject: [PATCH] chore: delete protocol directory (#155) * chore: delete protocol directory Closes #153 * check no occurences of old protocol in ts * prettier --------- Co-authored-by: Matthew <38759997+friendlymatthew@users.noreply.github.com> --- pkg/protocol/protocol.go | 189 --------------------------------------- src/cache.ts | 23 ++--- 2 files changed, 12 insertions(+), 200 deletions(-) delete mode 100644 pkg/protocol/protocol.go diff --git a/pkg/protocol/protocol.go b/pkg/protocol/protocol.go deleted file mode 100644 index 0a9736ee..00000000 --- a/pkg/protocol/protocol.go +++ /dev/null @@ -1,189 +0,0 @@ -package protocol - -import ( - "encoding/csv" - "encoding/json" - "fmt" - "io" - "log/slog" - "reflect" - "strconv" - "strings" -) - -/* -The overall index file for AppendableDB is structured as: - -+-----------------------+ -| Version | -+-----------------------+ -| IndexFileHeader | -+-----------------------+ -| IndexHeader | -+-----------------------+ -| ... | -+-----------------------+ -| IndexHeader | -+-----------------------+ -| IndexRecord | -+-----------------------+ -| ... | -+-----------------------+ -| IndexRecord | -+-----------------------+ -| EndByteOffset | -+-----------------------+ -| ... | -+-----------------------+ -| EndByteOffset | -+-----------------------+ -| Checksum | -+-----------------------+ -| ... | -+-----------------------+ -| Checksum | -+-----------------------+ -*/ - -// Version is the version of AppendableDB this library is compatible with. -type Version byte - -// IndexFileHeader is the header of the index file. -type IndexFileHeader struct { - // IndexLength represents the number of bytes the IndexHeaders occupy. - IndexLength uint64 - - // DataCount represents the number of data records indexed by this index - // file. - DataCount uint64 -} - -// IndexHeader is the header of each index record. This represents the fields -// available in the data file. -type IndexHeader struct { - FieldName string - - // FieldType represents the type of data stored in the field. Note that the - // field data doesn't need to follow this type, but it is used to determine - // the TypeScript typings for the field. - FieldType FieldType - - IndexRecordCount uint64 -} - -// FieldType represents the type of data stored in the field, which follows -// JSON types excluding Object and null. Object is broken down into subfields -// and null is not stored. -// -// FieldType is left as a uint64 to avoid shooting ourselves in the foot if we -// want to support more types in the future via other file formats. -type FieldType uint64 - -const ( - FieldTypeString FieldType = (1 << iota) - FieldTypeNumber - FieldTypeObject - FieldTypeArray - FieldTypeBoolean - FieldTypeNull -) - -func (t FieldType) TypescriptType() string { - components := []string{} - if t&FieldTypeString != 0 { - components = append(components, "string") - } - if t&FieldTypeNumber != 0 { - components = append(components, "number") - } - if t&FieldTypeObject != 0 { - components = append(components, "Record") - } - if t&FieldTypeArray != 0 { - components = append(components, "any[]") - } - if t&FieldTypeBoolean != 0 { - components = append(components, "boolean") - } - if t&FieldTypeNull != 0 { - components = append(components, "null") - } - if len(components) == 0 { - return "unknown" - } - return strings.Join(components, " | ") -} - -type IndexRecord struct { - DataNumber uint64 - // FieldByteOffset represents the byte offset of the field in the data - // file to fetch exactly the field value. - FieldStartByteOffset uint64 - // FieldLength is pessimistic: it is an encoded value that is at least as - // long as the actual field value. - FieldLength int -} - -func InferCSVField(fieldValue string) (interface{}, FieldType) { - if fieldValue == "" { - return nil, FieldTypeNull - } - - if i, err := strconv.Atoi(fieldValue); err == nil { - return float64(i), FieldTypeNumber - } - - if f, err := strconv.ParseFloat(fieldValue, 64); err == nil { - return float64(f), FieldTypeNumber - } - - if b, err := strconv.ParseBool(fieldValue); err == nil { - return b, FieldTypeBoolean - } - - return fieldValue, FieldTypeString -} - -func (i IndexRecord) CSVField(r io.ReadSeeker) (any, error) { - offset, err := r.Seek(0, io.SeekCurrent) - if err != nil { - return nil, fmt.Errorf("failed to get current offset: %w", err) - } - - if _, err := r.Seek(int64(i.FieldStartByteOffset), io.SeekStart); err != nil { - return nil, fmt.Errorf("failed to seek to field start byte offset: %w", err) - } - - fields, err := csv.NewReader(io.LimitReader(r, int64(i.FieldLength))).Read() - if err != nil { - return nil, fmt.Errorf("failed to decode field: %w", err) - } - - if _, err := r.Seek(offset, io.SeekStart); err != nil { - return nil, fmt.Errorf("failed to seek to original offset: %w", err) - } - - slog.Debug("fields", slog.Any("F", fields), slog.Any("len", len(fields))) - slog.Debug("CSVField", slog.Any("fields", fields), slog.Any("firstField", fields[0]), slog.Any("firstFieldType", reflect.TypeOf(fields[0]).String())) - - field, _ := InferCSVField(fields[0]) - return field, nil -} - -func (i IndexRecord) Token(r io.ReadSeeker) (json.Token, error) { - offset, err := r.Seek(0, io.SeekCurrent) - if err != nil { - return nil, fmt.Errorf("failed to get current offset: %w", err) - } - if _, err := r.Seek(int64(i.FieldStartByteOffset), io.SeekStart); err != nil { - return nil, fmt.Errorf("failed to seek to field start byte offset: %w", err) - } - token, err := json.NewDecoder(io.LimitReader(r, int64(i.FieldLength))).Token() - if err != nil { - return nil, fmt.Errorf("failed to decode field: %w", err) - } - if _, err := r.Seek(offset, io.SeekStart); err != nil { - return nil, fmt.Errorf("failed to seek to original offset: %w", err) - } - return token, nil -} diff --git a/src/cache.ts b/src/cache.ts index eb8f493d..66151639 100644 --- a/src/cache.ts +++ b/src/cache.ts @@ -3,31 +3,32 @@ import { RangeResolver } from "./resolver"; export function cache(resolver: RangeResolver): RangeResolver { const cache: [ [number, number], - Promise<{ data: ArrayBuffer; totalLength: number }>, + Promise<{ data: ArrayBuffer; totalLength: number }[]>, ][] = []; - return async ({ - start, - end, - }): Promise<{ data: ArrayBuffer; totalLength: number }> => { + return async ([{ start, end }]): Promise< + { data: ArrayBuffer; totalLength: number }[] + > => { // check if start-end is contained in any of the cached ranges const cached = cache.find(([[s, e]]) => s <= start && end <= e); if (cached) { return cached[1].then((cachedData) => { - const data = cachedData.data.slice( + const data = cachedData[0].data.slice( start - cached[0][0], end - cached[0][0], ); - return { - data, - totalLength: cachedData.totalLength, - }; + return [ + { + data, + totalLength: cachedData[0].totalLength, + }, + ]; }); } // TODO: check if start-end overlaps with any of the cached ranges - const promise = resolver({ start, end }); + const promise = resolver([{ start, end }]); cache.push([[start, end], promise]); return promise; };