-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reference Tests and Breaking Change: Optional nullable fields are now null instead of undefined #114
Reference Tests and Breaking Change: Optional nullable fields are now null instead of undefined #114
Changes from all commits
6345ab3
5ee911d
f3a5cd1
f7ab74b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -227,6 +227,8 @@ function materializeRecordField(record: Record<string, unknown>, branch: Array<P | |
const node = branch[0]; | ||
|
||
if (dLevel < node.dLevelMax) { | ||
// This ensures that nulls are correctly processed | ||
record[node.name] = value; | ||
Comment on lines
+230
to
+231
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the change that makes null values shred to null instead of undefined. |
||
return; | ||
} | ||
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixes for using null instead of undefined |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# References Tests | ||
|
||
This is a set of tests that use the reference files from https://github.com/apache/parquet-testing/. | ||
|
||
## Updating the Reference Files | ||
|
||
This assumes that parquetjs is in the same folder as the clone of parquet-testing. | ||
|
||
1. `git clone [email protected]:apache/parquet-testing.git` | ||
1. `cd ../parquetjs` | ||
1. `cp ../parquet-testing/data/*.parquet ./test/reference-test/files/` | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import { expect } from "chai"; | ||
import path from "node:path"; | ||
import fs from "node:fs"; | ||
|
||
import parquet from '../../parquet'; | ||
|
||
// Used for testing a single file. Example: | ||
// const onlyTest = 'single_nan.parquet'; | ||
const onlyTest = null; | ||
|
||
// Test files currently unsupported / needing separate test | ||
const unsupported = [ | ||
'byte_stream_split.zstd.parquet', // ZSTD unsupported | ||
'hadoop_lz4_compressed.parquet', // LZ4 unsupported | ||
'hadoop_lz4_compressed_larger.parquet', // LZ4 unsupported | ||
'lz4_raw_compressed.parquet', // LZ4_RAW unsupported | ||
'lz4_raw_compressed_larger.parquet', // LZ4_RAW unsupported | ||
'nested_structs.rust.parquet', // ZSTD unsupported | ||
'non_hadoop_lz4_compressed.parquet', // ZSTD unsupported | ||
'rle_boolean_encoding.parquet', // BUG?: https://github.com/LibertyDSNP/parquetjs/issues/113 | ||
'datapage_v2.snappy.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_binary_packed.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_byte_array.parquet', // DELTA_BYTE_ARRAY unsupported | ||
'delta_encoding_optional_column.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_encoding_required_column.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_length_byte_array.parquet', // ZSTD unsupported, DELTA_BINARY_PACKED unsupported | ||
'float16_nonzeros_and_nans.parquet', // missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY) | ||
'float16_zeros_and_nans.parquet', // missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY) | ||
Comment on lines
+27
to
+28
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These can be removed once #108 is rebased on it and will work as a test to prove it is working. |
||
'large_string_map.brotli.parquet', // BUG? | ||
]; | ||
|
||
describe("Read Test for all files", function () { | ||
|
||
const listOfFiles = fs.readdirSync(path.join(__dirname, 'files')) | ||
.filter(x => x.endsWith(".parquet") && !unsupported.includes(x)); | ||
|
||
for (const filename of listOfFiles) { | ||
if (onlyTest && onlyTest !== filename) continue; | ||
it(`Reading ${filename}`, async function () { | ||
const reader = await parquet.ParquetReader.openFile(path.join(__dirname, 'files', filename)); | ||
const schema = reader.getSchema(); | ||
expect(schema.fieldList).to.have.length.greaterThan(0); | ||
const cursor = reader.getCursor(); | ||
const record = await cursor.next() as any; | ||
// Expect the same keys as top-level fields | ||
const expectedRecordKeys = schema.fieldList.filter(x => x.path.length === 1).map(x => x.name); | ||
expect(Object.keys(record)).to.deep.equal(expectedRecordKeys); | ||
}) | ||
} | ||
}); |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixes for using null instead of undefined |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixes for using null instead of undefined |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based in part around the information in https://issues.apache.org/jira/browse/PARQUET-1402