forked from ZJONSSON/parquetjs
-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into fix-typelength
- Loading branch information
Showing
64 changed files
with
116 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
export * as PLAIN from './plain' | ||
export * as RLE from './rle' | ||
export * as PLAIN_DICTIONARY from './plain_dictionary' | ||
|
||
export * as RLE_DICTIONARY from './plain_dictionary' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# References Tests | ||
|
||
This is a set of tests that use the reference files from https://github.com/apache/parquet-testing/. | ||
|
||
## Updating the Reference Files | ||
|
||
This assumes that parquetjs is in the same folder as the clone of parquet-testing. | ||
|
||
1. `git clone [email protected]:apache/parquet-testing.git` | ||
1. `cd ../parquetjs` | ||
1. `cp ../parquet-testing/data/*.parquet ./test/reference-test/files/` | ||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+2.82 KB
test/reference-test/files/data_index_bloom_encoding_with_length.parquet
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+3.3 KB
test/reference-test/files/datapage_v1-snappy-compressed-checksum.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+814 Bytes
test/reference-test/files/rle-dict-uncompressed-corrupt-checksum.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import { expect } from "chai"; | ||
import path from "node:path"; | ||
import fs from "node:fs"; | ||
|
||
import parquet from '../../parquet'; | ||
|
||
// Used for testing a single file. Example: | ||
// const onlyTest = 'single_nan.parquet'; | ||
const onlyTest = null; | ||
|
||
// Test files currently unsupported / needing separate test | ||
const unsupported = [ | ||
'byte_stream_split.zstd.parquet', // ZSTD unsupported | ||
'hadoop_lz4_compressed.parquet', // LZ4 unsupported | ||
'hadoop_lz4_compressed_larger.parquet', // LZ4 unsupported | ||
'lz4_raw_compressed.parquet', // LZ4_RAW unsupported | ||
'lz4_raw_compressed_larger.parquet', // LZ4_RAW unsupported | ||
'nested_structs.rust.parquet', // ZSTD unsupported | ||
'non_hadoop_lz4_compressed.parquet', // ZSTD unsupported | ||
'rle_boolean_encoding.parquet', // BUG?: https://github.com/LibertyDSNP/parquetjs/issues/113 | ||
'datapage_v2.snappy.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_binary_packed.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_byte_array.parquet', // DELTA_BYTE_ARRAY unsupported | ||
'delta_encoding_optional_column.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_encoding_required_column.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_length_byte_array.parquet', // ZSTD unsupported, DELTA_BINARY_PACKED unsupported | ||
'float16_nonzeros_and_nans.parquet', // missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY) | ||
'float16_zeros_and_nans.parquet', // missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY) | ||
'large_string_map.brotli.parquet', // BUG? | ||
]; | ||
|
||
describe("Read Test for all files", function () { | ||
|
||
const listOfFiles = fs.readdirSync(path.join(__dirname, 'files')) | ||
.filter(x => x.endsWith(".parquet") && !unsupported.includes(x)); | ||
|
||
for (const filename of listOfFiles) { | ||
if (onlyTest && onlyTest !== filename) continue; | ||
it(`Reading ${filename}`, async function () { | ||
const reader = await parquet.ParquetReader.openFile(path.join(__dirname, 'files', filename)); | ||
const schema = reader.getSchema(); | ||
expect(schema.fieldList).to.have.length.greaterThan(0); | ||
const cursor = reader.getCursor(); | ||
const record = await cursor.next() as any; | ||
// Expect the same keys as top-level fields | ||
const expectedRecordKeys = schema.fieldList.filter(x => x.path.length === 1).map(x => x.name); | ||
expect(Object.keys(record)).to.deep.equal(expectedRecordKeys); | ||
}) | ||
} | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.