From 7be774f7576f7e65710907708eb95bae7b664853 Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 07:06:24 -0800 Subject: [PATCH 01/10] Don't use 64-bit test values on 32-bit platforms --- arrow-buffer/src/util/bit_util.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arrow-buffer/src/util/bit_util.rs b/arrow-buffer/src/util/bit_util.rs index ed5d363d607f..ecb9caf66c38 100644 --- a/arrow-buffer/src/util/bit_util.rs +++ b/arrow-buffer/src/util/bit_util.rs @@ -257,7 +257,7 @@ mod tests { } #[test] - fn test_ceil() { + fn test_ceil_with_32_bits() { assert_eq!(ceil(0, 1), 0); assert_eq!(ceil(1, 1), 1); assert_eq!(ceil(1, 2), 1); @@ -266,8 +266,16 @@ mod tests { assert_eq!(ceil(8, 8), 1); assert_eq!(ceil(9, 8), 2); assert_eq!(ceil(9, 9), 1); - assert_eq!(ceil(10000000000, 10), 1000000000); - assert_eq!(ceil(10, 10000000000), 1); - assert_eq!(ceil(10000000000, 1000000000), 10); + assert_eq!(ceil(1_000_000_000, 10), 100_000_000); + assert_eq!(ceil(10, 1_000_000_000), 1); + assert_eq!(ceil(1_000_000_000, 100_000_000), 10); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_ceil_with_64_bits() { + assert_eq!(ceil(10_000_000_000_000, 10), 1_000_000_000_000); + assert_eq!(ceil(10, 10_000_000_000_000), 1); + assert_eq!(ceil(10_000_000_000_000, 1_000_000_000_000), 10); } } From 3c830e5702c53870995dfc8949fb761e64499402 Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 07:18:19 -0800 Subject: [PATCH 02/10] Disable tests using 64-bit test values on 32-bit platforms --- arrow-json/src/reader/mod.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index bcacf6f706b8..4b3cf289be46 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -1250,7 +1250,8 @@ mod tests { test_timestamp::(); } - fn test_time() { + #[cfg(target_pointer_width = "64")] + fn test_time_with_64_bits() { let buf = r#" {"a": 1, "b": "09:26:56.123 AM", "c": 38.30} {"a": 2, "b": "23:59:59", "c": 123.456} @@ -1323,11 +1324,12 @@ mod tests { } #[test] - fn test_times() { - test_time::(); - test_time::(); - test_time::(); - test_time::(); + #[cfg(target_pointer_width = "64")] + fn test_times_with_64_bits() { + test_time_with_64_bits::(); + test_time_with_64_bits::(); + test_time_with_64_bits::(); + test_time_with_64_bits::(); } #[test] From de42e82d1ea16a9a08fac3d908a494ad3adce51e Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 07:29:48 -0800 Subject: [PATCH 03/10] Don't test overflow behavior that is specific to 64-bits on 32-bit platforms --- arrow/tests/array_validation.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/arrow/tests/array_validation.rs b/arrow/tests/array_validation.rs index 1321f10f9438..3580d9b25545 100644 --- a/arrow/tests/array_validation.rs +++ b/arrow/tests/array_validation.rs @@ -54,6 +54,7 @@ fn test_bad_number_of_buffers() { #[should_panic( expected = "Need at least 18446744073709551615 bytes in buffers[0] in array of type Int64, but got 8" )] +#[cfg(target_pointer_width = "64")] fn test_fixed_width_overflow() { let buffer = Buffer::from_slice_ref([0i32, 2i32]); ArrayData::try_new(DataType::Int64, usize::MAX, None, 0, vec![buffer], vec![]).unwrap(); From 68987e18e85a23d38a84069979963d82f44adb99 Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 07:48:14 -0800 Subject: [PATCH 04/10] A test that is specific to counting memory size on 64-bit platforms should not run on 32-bit platforms --- arrow-array/src/record_batch.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs index 78108d441b05..c97ecfc619bd 100644 --- a/arrow-array/src/record_batch.rs +++ b/arrow-array/src/record_batch.rs @@ -678,6 +678,7 @@ mod tests { } #[test] + #[cfg(target_pointer_width = "64")] fn byte_size_should_not_regress() { let schema = Schema::new(vec![ Field::new("a", DataType::Int32, false), From 8228fc2d746dc442d91ddbb13c476c3c6f7a5086 Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 08:15:35 -0800 Subject: [PATCH 05/10] Fix a few assertions for 32-bit platforms --- arrow-array/src/builder/generic_bytes_view_builder.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs index d12c2b7db468..7f2df352b4d6 100644 --- a/arrow-array/src/builder/generic_bytes_view_builder.rs +++ b/arrow-array/src/builder/generic_bytes_view_builder.rs @@ -665,9 +665,18 @@ mod tests { ); let err = v.try_append_view(0, u32::MAX, 1).unwrap_err(); + #[cfg(target_pointer_width = "32")] + assert_eq!(err.to_string(), "Invalid argument error: Range 4294967295..4294967295 out of bounds for block of length 17"); + #[cfg(target_pointer_width = "64")] assert_eq!(err.to_string(), "Invalid argument error: Range 4294967295..4294967296 out of bounds for block of length 17"); let err = v.try_append_view(0, 1, u32::MAX).unwrap_err(); + #[cfg(target_pointer_width = "32")] + assert_eq!( + err.to_string(), + "Invalid argument error: Range 1..4294967295 out of bounds for block of length 17" + ); + #[cfg(target_pointer_width = "64")] assert_eq!( err.to_string(), "Invalid argument error: Range 1..4294967296 out of bounds for block of length 17" From 62b49deb3d01048aa8db0308823fcd8ecfae187f Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 10:28:04 -0800 Subject: [PATCH 06/10] Don't test precision on a 32-bit platform that doesn't support it --- arrow-buffer/src/native.rs | 6 ++++++ parquet/src/arrow/arrow_reader/mod.rs | 14 +++++++++++--- parquet/src/bloom_filter/mod.rs | 1 + parquet/tests/arrow_reader/bad_data.rs | 1 + parquet_derive/src/parquet_field.rs | 2 ++ parquet_derive_test/src/lib.rs | 6 ++++-- 6 files changed, 25 insertions(+), 5 deletions(-) diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs index c563f73cf5b9..986b7b1a6bab 100644 --- a/arrow-buffer/src/native.rs +++ b/arrow-buffer/src/native.rs @@ -345,10 +345,16 @@ mod tests { assert_eq!(IntervalDayTime::new(1, 0).as_usize(), 1); assert_eq!(IntervalMonthDayNano::new(1, 0, 0).as_usize(), 1); + #[cfg(target_pointer_width = "32")] + let a = IntervalDayTime::new(23, 0); + #[cfg(target_pointer_width = "64")] let a = IntervalDayTime::new(23, 53); let b = IntervalDayTime::usize_as(a.as_usize()); assert_eq!(a, b); + #[cfg(target_pointer_width = "32")] + let a = IntervalMonthDayNano::new(23, 0, 0); + #[cfg(target_pointer_width = "64")] let a = IntervalMonthDayNano::new(23, 53, 0); let b = IntervalMonthDayNano::usize_as(a.as_usize()); assert_eq!(a, b); diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index d3709c03e99a..a1a2d19728af 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -925,18 +925,23 @@ mod tests { use bytes::Bytes; use half::f16; + #[cfg(target_pointer_width = "64")] use num::PrimInt; use rand::{thread_rng, Rng, RngCore}; use tempfile::tempfile; use arrow_array::builder::*; use arrow_array::cast::AsArray; + #[cfg(target_pointer_width = "64")] + use arrow_array::types::DecimalType; use arrow_array::types::{ - Decimal128Type, Decimal256Type, DecimalType, Float16Type, Float32Type, Float64Type, - Time32MillisecondType, Time64MicrosecondType, + Decimal128Type, Float16Type, Float32Type, Float64Type, Time32MillisecondType, + Time64MicrosecondType, }; use arrow_array::*; - use arrow_buffer::{i256, ArrowNativeType, Buffer, IntervalDayTime}; + #[cfg(target_pointer_width = "64")] + use arrow_buffer::ArrowNativeType; + use arrow_buffer::{i256, Buffer, IntervalDayTime}; use arrow_data::ArrayDataBuilder; use arrow_schema::{ ArrowError, DataType as ArrowDataType, Field, Fields, Schema, SchemaRef, TimeUnit, @@ -3848,6 +3853,7 @@ mod tests { assert_eq!(out, batch.slice(2, 1)); } + #[cfg(target_pointer_width = "64")] fn test_decimal_roundtrip() { // Precision <= 9 -> INT32 // Precision <= 18 -> INT64 @@ -3896,7 +3902,9 @@ mod tests { } #[test] + #[cfg(target_pointer_width = "64")] fn test_decimal() { + use arrow_array::types::Decimal256Type; test_decimal_roundtrip::(); test_decimal_roundtrip::(); } diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs index 7d6dccdd2378..86fac9723bb0 100644 --- a/parquet/src/bloom_filter/mod.rs +++ b/parquet/src/bloom_filter/mod.rs @@ -517,6 +517,7 @@ mod tests { (0.1, 1000000, 5772541), (0.01, 1000000, 9681526), (0.001, 1000000, 14607697), + #[cfg(target_pointer_width = "64")] (1e-50, 1_000_000_000_000, 14226231280773240832), ] { assert_eq!(*num_bits, num_of_bits_from_ndv_fpp(*ndv, *fpp) as u64); diff --git a/parquet/tests/arrow_reader/bad_data.rs b/parquet/tests/arrow_reader/bad_data.rs index 74342031432a..a9e761fe47c1 100644 --- a/parquet/tests/arrow_reader/bad_data.rs +++ b/parquet/tests/arrow_reader/bad_data.rs @@ -102,6 +102,7 @@ fn test_arrow_gh_41317() { } #[test] +#[cfg(target_pointer_width = "64")] fn test_arrow_rs_gh_6229_dict_header() { let err = read_file("ARROW-RS-GH-6229-DICTHEADER.parquet").unwrap_err(); assert_eq!( diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs index f99ea3e0356c..97c4088e52fd 100644 --- a/parquet_derive/src/parquet_field.rs +++ b/parquet_derive/src/parquet_field.rs @@ -841,6 +841,7 @@ mod test { } #[test] + #[cfg(target_pointer_width = "64")] fn test_generating_a_simple_writer_snippet() { let snippet: proc_macro2::TokenStream = quote! { struct ABoringStruct { @@ -868,6 +869,7 @@ mod test { } #[test] + #[cfg(target_pointer_width = "64")] fn test_generating_a_simple_reader_snippet() { let snippet: proc_macro2::TokenStream = quote! { struct ABoringStruct { diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs index 2cd69d03d731..9703c6f71337 100644 --- a/parquet_derive_test/src/lib.rs +++ b/parquet_derive_test/src/lib.rs @@ -106,16 +106,18 @@ mod tests { use super::*; use chrono::SubsecRound; - use std::{env, fs, io::Write, sync::Arc}; + use std::{env, fs, io::Write}; use parquet::{ file::writer::SerializedFileWriter, record::{RecordReader, RecordWriter}, - schema::parser::parse_message_type, }; #[test] + #[cfg(target_pointer_width = "64")] fn test_parquet_derive_hello() { + use parquet::schema::parser::parse_message_type; + use std::sync::Arc; let file = get_temp_file("test_parquet_derive_hello", &[]); // The schema is not required, but this tests that the generated From 5231e68e1c649f5c37e96483da855b42d9cd1ee8 Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 08:47:29 -0800 Subject: [PATCH 07/10] Skip a test that fails on 32-bit platforms where content size is not supported: IpcError("Invalid uncompressed length: 1751093230692204784") --- arrow-integration-testing/tests/ipc_writer.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrow-integration-testing/tests/ipc_writer.rs b/arrow-integration-testing/tests/ipc_writer.rs index d780eb2ee0b5..8db83fc53581 100644 --- a/arrow-integration-testing/tests/ipc_writer.rs +++ b/arrow-integration-testing/tests/ipc_writer.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use arrow::ipc; use arrow::ipc::reader::{FileReader, StreamReader}; use arrow::ipc::writer::{FileWriter, IpcWriteOptions, StreamWriter}; use arrow::util::test_util::arrow_test_data; @@ -91,7 +90,9 @@ fn write_1_0_0_littleendian() { } #[test] +#[cfg(target_pointer_width = "64")] fn write_2_0_0_compression() { + use arrow::ipc; let testdata = arrow_test_data(); let version = "2.0.0-compression"; let paths = ["generated_lz4", "generated_zstd"]; From 6245e948831a4f6cb44f5934d84d2549ddc7872c Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 08:52:29 -0800 Subject: [PATCH 08/10] Don't test Decimal128 on 32-bit platforms where it's unsupported --- arrow-ipc/src/writer.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index b5c4dd95ed9f..25f6a8901248 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -2678,6 +2678,7 @@ mod tests { } #[test] + #[cfg(target_pointer_width = "64")] fn test_decimal128_alignment8_is_unaligned() { const IPC_ALIGNMENT: usize = 8; From e9dca8e39b89bab8bef616b54536eb2f599b4942 Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 08:59:11 -0800 Subject: [PATCH 09/10] A test that is specific to counting memory size on 64-bit platforms should not run on 32-bit platforms --- arrow-schema/src/datatype.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index ff5832dfa68c..3a00870c9440 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -1058,6 +1058,7 @@ mod tests { } #[test] + #[cfg(target_pointer_width = "64")] fn size_should_not_regress() { assert_eq!(std::mem::size_of::(), 24); } From 0340592cccf24f8dc2ec7508057aae6bc2506996 Mon Sep 17 00:00:00 2001 From: David Rauschenbach Date: Mon, 4 Nov 2024 09:18:06 -0800 Subject: [PATCH 10/10] A test that is specific to counting memory size has different sizes on 64-bit vs 32-bit platforms --- parquet/src/file/metadata/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index 32b985710023..2c628051795e 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -1861,6 +1861,9 @@ mod tests { let parquet_meta = ParquetMetaDataBuilder::new(file_metadata.clone()) .set_row_groups(row_group_meta_with_stats) .build(); + #[cfg(target_pointer_width = "32")] + let base_expected_size = 1632; + #[cfg(target_pointer_width = "64")] let base_expected_size = 2312; assert_eq!(parquet_meta.memory_size(), base_expected_size); @@ -1888,6 +1891,9 @@ mod tests { ]])) .build(); + #[cfg(target_pointer_width = "32")] + let bigger_expected_size = 1972; + #[cfg(target_pointer_width = "64")] let bigger_expected_size = 2816; // more set fields means more memory usage assert!(bigger_expected_size > base_expected_size);