Skip to content

Commit

Permalink
Merge pull request #31 from reproio/parquet-go-1.5.2
Browse files Browse the repository at this point in the history
Fix #30 upgrade parquet-go to 1.5.2
  • Loading branch information
syucream authored May 11, 2020
2 parents 2167f0b + ee00c83 commit 5622bf2
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 17 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ clean:
fmt:
gofmt -w **/*.go

# TODO Enable -race after we resolve data race in parquet-go
# ref. https://github.com/xitongsys/parquet-go/issues/256
.PHONY: test
test:
go test -race -cover ./...
go test -cover ./...

.PHONY: it
it: build
Expand Down
16 changes: 16 additions & 0 deletions columnifier/parquet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,22 @@ func TestWriteClose(t *testing.T) {
isErr: true,
},

// Mismatch schema & record
{
st: schema.SchemaTypeAvro,
sf: "../testdata/mismatch.avsc",
rt: record.RecordTypeJsonl,
config: Config{
Parquet: Parquet{
PageSize: 8 * 1024,
RowGroupSize: 128 * 1024 * 1024,
CompressionCodec: parquet.CompressionCodec_SNAPPY,
},
},
input: "../testdata/primitives.jsonl",
isErr: true,
},

// Valid
{
st: schema.SchemaTypeAvro,
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ require (
github.com/apache/arrow/go/arrow v0.0.0-20200504153628-d13e8f3ed647
github.com/linkedin/goavro/v2 v2.9.7
github.com/vmihailenco/msgpack/v4 v4.3.11
github.com/xitongsys/parquet-go v1.5.1
github.com/xitongsys/parquet-go v1.5.2
github.com/xitongsys/parquet-go-source v0.0.0-20200225073416-429277801fe4
)
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ github.com/vmihailenco/msgpack/v4 v4.3.11 h1:Q47CePddpNGNhk4GCnAx9DDtASi2rasatE0
github.com/vmihailenco/msgpack/v4 v4.3.11/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4=
github.com/vmihailenco/tagparser v0.1.1 h1:quXMXlA39OCbd2wAdTsGDlK9RkOk6Wuw+x37wVyIuWY=
github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
github.com/xitongsys/parquet-go v1.5.1 h1:GFjQXrFmqI2XvmAaj7k73QtW3eECFVwaLX2/Mv3Fnuo=
github.com/xitongsys/parquet-go v1.5.1/go.mod h1:xUxwM8ELydxh4edHGegYq1pA8NnMKDx0K/GyB0o2bww=
github.com/xitongsys/parquet-go v1.5.2 h1:t8kVBM+7jPIbM+9ptrpZajWV1lOyHHVIQkTRUTlbK84=
github.com/xitongsys/parquet-go v1.5.2/go.mod h1:90swTgY6VkNM4MkMDsNxq8h30m6Yj1Arv9UMEl5V5DM=
github.com/xitongsys/parquet-go-source v0.0.0-20190524061010-2b72cbee77d5/go.mod h1:xxCx7Wpym/3QCo6JhujJX51dzSXrwmb0oH6FQb39SEA=
github.com/xitongsys/parquet-go-source v0.0.0-20200225073416-429277801fe4 h1:KvGGKrTAA489Xkfw1xwz59bj3hH50hC6HjG3Sby+aa4=
github.com/xitongsys/parquet-go-source v0.0.0-20200225073416-429277801fe4/go.mod h1:xxCx7Wpym/3QCo6JhujJX51dzSXrwmb0oH6FQb39SEA=
Expand Down
14 changes: 1 addition & 13 deletions parquet/parquet.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (

"github.com/xitongsys/parquet-go/common"
"github.com/xitongsys/parquet-go/layout"
"github.com/xitongsys/parquet-go/parquet"
"github.com/xitongsys/parquet-go/schema"
)

Expand Down Expand Up @@ -42,23 +41,12 @@ func prepareTables(schemaHandler *schema.SchemaHandler) (map[string]*layout.Tabl
return nil, err
}

var tpe parquet.Type
if index, ok := schemaHandler.MapIndex[pathStr]; ok {
if int(index) < len(schemaHandler.SchemaElements) {
tpe = schemaHandler.SchemaElements[index].GetType()
} else {
return nil, fmt.Errorf("invalid index %v to schema elements %v: %w", index, schemaHandler.SchemaElements, ErrInvalidParquetSchema)
}
} else {
return nil, fmt.Errorf("invalid schema key %v: %w", pathStr, ErrInvalidParquetSchema)
}

tables[pathStr] = &layout.Table{
Path: path,
MaxDefinitionLevel: maxDefinitionLevel,
MaxRepetitionLevel: maxRepetitionLevel,
RepetitionType: e.GetRepetitionType(),
Type: tpe,
Schema: schemaHandler.SchemaElements[schemaHandler.MapIndex[pathStr]],
Info: schemaHandler.Infos[i],
}
}
Expand Down
7 changes: 7 additions & 0 deletions testdata/mismatch.avsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"type": "record",
"name": "Mismatch",
"fields" : [
{"name": "f", "type": "bytes"}
]
}

0 comments on commit 5622bf2

Please sign in to comment.