diff --git a/arrow-array/benches/fixed_size_list_array.rs b/arrow-array/benches/fixed_size_list_array.rs index 5f001a4f3d3a..5270a4a5def3 100644 --- a/arrow-array/benches/fixed_size_list_array.rs +++ b/arrow-array/benches/fixed_size_list_array.rs @@ -26,7 +26,7 @@ fn gen_fsl(len: usize, value_len: usize) -> FixedSizeListArray { let values = Arc::new(Int32Array::from( (0..len).map(|_| rng.gen::()).collect::>(), )); - let field = Arc::new(Field::new("item", values.data_type().clone(), true)); + let field = Arc::new(Field::new_list_field(values.data_type().clone(), true)); FixedSizeListArray::new(field, value_len as i32, values, None) } diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs index 350661a85d1f..4c47bf136224 100644 --- a/arrow-array/src/array/binary_array.rs +++ b/arrow-array/src/array/binary_array.rs @@ -372,11 +372,9 @@ mod tests { .unwrap(); let binary_array1 = GenericBinaryArray::::from(array_data1); - let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new( - "item", - DataType::UInt8, - false, - ))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new( + Field::new_list_field(DataType::UInt8, false), + )); let array_data2 = ArrayData::builder(data_type) .len(3) @@ -421,11 +419,9 @@ mod tests { let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap()); let null_buffer = Buffer::from_slice_ref([0b101]); - let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new( - "item", - DataType::UInt8, - false, - ))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new( + Field::new_list_field(DataType::UInt8, false), + )); // [None, Some(b"Parquet")] let array_data = ArrayData::builder(data_type) @@ -466,11 +462,9 @@ mod tests { .unwrap(); let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap()); - let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new( - "item", - DataType::UInt8, - true, - ))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new( + Field::new_list_field(DataType::UInt8, true), + )); // [None, Some(b"Parquet")] let array_data = ArrayData::builder(data_type) @@ -558,7 +552,7 @@ mod tests { .unwrap(); let offsets: [i32; 4] = [0, 5, 5, 12]; - let data_type = DataType::List(Arc::new(Field::new("item", DataType::UInt32, false))); + let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, false))); let array_data = ArrayData::builder(data_type) .len(3) .add_buffer(Buffer::from_slice_ref(offsets)) diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index 83e984459760..ee6cc8021bca 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -722,7 +722,7 @@ mod tests { // [null, [10, 11, 12, 13]] let array_data = unsafe { ArrayData::builder(DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::UInt8, false)), + Arc::new(Field::new_list_field(DataType::UInt8, false)), 4, )) .len(2) @@ -758,7 +758,7 @@ mod tests { let array_data = unsafe { ArrayData::builder(DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::Binary, false)), + Arc::new(Field::new_list_field(DataType::Binary, false)), 4, )) .len(3) @@ -782,7 +782,7 @@ mod tests { let array_data = unsafe { ArrayData::builder(DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::UInt8, false)), + Arc::new(Field::new_list_field(DataType::UInt8, false)), 4, )) .len(3) diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index 00a3144a87ad..863733484c1c 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -95,7 +95,7 @@ use std::sync::Arc; /// .build() /// .unwrap(); /// let list_data_type = DataType::FixedSizeList( -/// Arc::new(Field::new("item", DataType::Int32, false)), +/// Arc::new(Field::new_list_field(DataType::Int32, false)), /// 3, /// ); /// let list_data = ArrayData::builder(list_data_type.clone()) @@ -487,7 +487,7 @@ mod tests { // Construct a list array from the above two let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3); let list_data = ArrayData::builder(list_data_type.clone()) .len(3) .add_child_data(value_data.clone()) @@ -540,7 +540,7 @@ mod tests { // Construct a list array from the above two let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3); let list_data = unsafe { ArrayData::builder(list_data_type) .len(3) @@ -569,7 +569,7 @@ mod tests { // Construct a fixed size list array from the above two let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 2); let list_data = ArrayData::builder(list_data_type) .len(5) .add_child_data(value_data.clone()) @@ -627,7 +627,7 @@ mod tests { // Construct a fixed size list array from the above two let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 2); let list_data = ArrayData::builder(list_data_type) .len(5) .add_child_data(value_data) @@ -650,7 +650,7 @@ mod tests { Some(4), ])); - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let list = FixedSizeListArray::new(field.clone(), 2, values.clone(), None); assert_eq!(list.len(), 3); @@ -674,7 +674,7 @@ mod tests { let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err(); assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeListArray, expected 3 got 2"); - let field = Arc::new(Field::new("item", DataType::Int32, false)); + let field = Arc::new(Field::new_list_field(DataType::Int32, false)); let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None).unwrap_err(); assert_eq!(err.to_string(), "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\""); @@ -682,14 +682,14 @@ mod tests { let nulls = NullBuffer::new(BooleanBuffer::new(Buffer::from([0b0000101]), 0, 3)); FixedSizeListArray::new(field, 2, values.clone(), Some(nulls)); - let field = Arc::new(Field::new("item", DataType::Int64, true)); + let field = Arc::new(Field::new_list_field(DataType::Int64, true)); let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err(); assert_eq!(err.to_string(), "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\""); } #[test] fn empty_fixed_size_list() { - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let nulls = NullBuffer::new_null(2); let values = new_empty_array(&DataType::Int32); let list = FixedSizeListArray::new(field.clone(), 0, values, Some(nulls)); diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index 1fab0009f2cc..ebb285e2032b 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -565,7 +565,7 @@ mod tests { // [[0, 1, 2], [3, 4, 5], [6, 7]] let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8])); - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); ListArray::new(field, offsets, Arc::new(values), None) } @@ -595,7 +595,8 @@ mod tests { let value_offsets = Buffer::from([]); // Construct a list array from the above two - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let list_data_type = + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = ArrayData::builder(list_data_type) .len(0) .add_buffer(value_offsets) @@ -621,7 +622,8 @@ mod tests { let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]); // Construct a list array from the above two - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let list_data_type = + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = ArrayData::builder(list_data_type.clone()) .len(3) .add_buffer(value_offsets.clone()) @@ -766,7 +768,8 @@ mod tests { bit_util::set_bit(&mut null_bits, 8); // Construct a list array from the above two - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let list_data_type = + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = ArrayData::builder(list_data_type) .len(9) .add_buffer(value_offsets) @@ -917,7 +920,8 @@ mod tests { .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) .build_unchecked() }; - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let list_data_type = + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) .len(3) @@ -934,7 +938,8 @@ mod tests { #[cfg(not(feature = "force_validate"))] fn test_list_array_invalid_child_array_len() { let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]); - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let list_data_type = + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) .len(3) @@ -964,7 +969,8 @@ mod tests { let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]); - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let list_data_type = + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) @@ -1010,7 +1016,8 @@ mod tests { .build_unchecked() }; - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let list_data_type = + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) .add_buffer(buf2) diff --git a/arrow-array/src/array/list_view_array.rs b/arrow-array/src/array/list_view_array.rs index 4e949a642701..cd358e031e8f 100644 --- a/arrow-array/src/array/list_view_array.rs +++ b/arrow-array/src/array/list_view_array.rs @@ -490,7 +490,7 @@ mod tests { fn test_empty_list_view_array() { // Construct an empty value array let vec: Vec = vec![]; - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let sizes = ScalarBuffer::from(vec![]); let offsets = ScalarBuffer::from(vec![]); let values = Int32Array::from(vec); @@ -508,7 +508,7 @@ mod tests { .build() .unwrap(); - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let sizes = ScalarBuffer::from(vec![3i32, 3, 2]); let offsets = ScalarBuffer::from(vec![0i32, 3, 6]); let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); @@ -544,7 +544,7 @@ mod tests { .build() .unwrap(); - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let sizes = ScalarBuffer::from(vec![3i64, 3, 2]); let offsets = ScalarBuffer::from(vec![0i64, 3, 6]); let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); @@ -590,7 +590,7 @@ mod tests { let buffer = BooleanBuffer::new(Buffer::from(null_bits), 0, 9); let null_buffer = NullBuffer::new(buffer); - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let sizes = ScalarBuffer::from(vec![2, 0, 0, 2, 2, 0, 3, 0, 1]); let offsets = ScalarBuffer::from(vec![0, 2, 2, 2, 4, 6, 6, 9, 9]); let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); @@ -656,7 +656,7 @@ mod tests { let null_buffer = NullBuffer::new(buffer); // Construct a large list view array from the above two - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let sizes = ScalarBuffer::from(vec![2i64, 0, 0, 2, 2, 0, 3, 0, 1]); let offsets = ScalarBuffer::from(vec![0i64, 2, 2, 2, 4, 6, 6, 9, 9]); let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); @@ -718,7 +718,7 @@ mod tests { // Construct a buffer for value offsets, for the nested array: // [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]] // Construct a list array from the above two - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let sizes = ScalarBuffer::from(vec![2i32, 0, 0, 2, 2, 0, 3, 0, 1]); let offsets = ScalarBuffer::from(vec![0i32, 2, 2, 2, 4, 6, 6, 9, 9]); let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); @@ -741,7 +741,7 @@ mod tests { .build_unchecked() }; let list_data_type = - DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) .len(3) @@ -759,7 +759,7 @@ mod tests { fn test_list_view_array_invalid_child_array_len() { let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]); let list_data_type = - DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) .len(3) @@ -771,7 +771,7 @@ mod tests { #[test] fn test_list_view_array_offsets_need_not_start_at_zero() { - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let sizes = ScalarBuffer::from(vec![0i32, 0, 3]); let offsets = ScalarBuffer::from(vec![2i32, 2, 5]); let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); @@ -800,7 +800,7 @@ mod tests { }; let list_data_type = - DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) .add_buffer(offset_buf2) @@ -942,7 +942,7 @@ mod tests { .build_unchecked() }; let list_data_type = - DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) .len(2) @@ -976,7 +976,7 @@ mod tests { .build_unchecked() }; let list_data_type = - DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) .len(3) @@ -1015,7 +1015,7 @@ mod tests { .build_unchecked() }; let list_data_type = - DataType::ListView(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = unsafe { ArrayData::builder(list_data_type) .len(3) diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 87577166ea3d..e2ce49422978 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -911,7 +911,7 @@ mod tests { #[test] fn test_empty_list_primitive() { - let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let array = new_empty_array(&data_type); let a = array.as_any().downcast_ref::().unwrap(); assert_eq!(a.len(), 0); @@ -969,7 +969,7 @@ mod tests { #[test] fn test_null_list_primitive() { - let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); let array = new_null_array(&data_type, 9); let a = array.as_any().downcast_ref::().unwrap(); assert_eq!(a.len(), 9); diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs index c87c797bcf2b..a2c74e7c4961 100644 --- a/arrow-array/src/array/string_array.rs +++ b/arrow-array/src/array/string_array.rs @@ -388,11 +388,9 @@ mod tests { let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap()); let null_buffer = Buffer::from_slice_ref([0b101]); - let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new( - "item", - DataType::UInt8, - false, - ))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new( + Field::new_list_field(DataType::UInt8, false), + )); // [None, Some("Parquet")] let array_data = ArrayData::builder(data_type) @@ -436,11 +434,9 @@ mod tests { // It is possible to create a null struct containing a non-nullable child // see https://github.com/apache/arrow-rs/pull/3244 for details - let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new( - "item", - DataType::UInt8, - true, - ))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new( + Field::new_list_field(DataType::UInt8, true), + )); // [None, Some(b"Parquet")] let array_data = ArrayData::builder(data_type) @@ -474,11 +470,9 @@ mod tests { .unwrap(); let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap()); - let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new( - "item", - DataType::UInt16, - false, - ))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new( + Field::new_list_field(DataType::UInt16, false), + )); let array_data = ArrayData::builder(data_type) .len(2) diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs index 5dff67650687..5c142b277d14 100644 --- a/arrow-array/src/builder/fixed_size_list_builder.rs +++ b/arrow-array/src/builder/fixed_size_list_builder.rs @@ -182,7 +182,7 @@ where let field = self .field .clone() - .unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true))); + .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true))); FixedSizeListArray::new(field, self.list_len, values, nulls) } @@ -204,7 +204,7 @@ where let field = self .field .clone() - .unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true))); + .unwrap_or_else(|| Arc::new(Field::new_list_field(values.data_type().clone(), true))); FixedSizeListArray::new(field, self.list_len, values, nulls) } diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs index 14c3ba79cdf7..a9c88ec6c586 100644 --- a/arrow-array/src/builder/generic_list_builder.rs +++ b/arrow-array/src/builder/generic_list_builder.rs @@ -297,7 +297,7 @@ where let field = match &self.field { Some(f) => f.clone(), - None => Arc::new(Field::new("item", values.data_type().clone(), true)), + None => Arc::new(Field::new_list_field(values.data_type().clone(), true)), }; GenericListArray::new(field, offsets, values, nulls) @@ -314,7 +314,7 @@ where let field = match &self.field { Some(f) => f.clone(), - None => Arc::new(Field::new("item", values.data_type().clone(), true)), + None => Arc::new(Field::new_list_field(values.data_type().clone(), true)), }; GenericListArray::new(field, offsets, values, nulls) @@ -584,7 +584,7 @@ mod tests { fn test_boxed_list_list_array_builder() { // This test is same as `test_list_list_array_builder` but uses boxed builders. let values_builder = make_builder( - &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), 10, ); test_boxed_generic_list_generic_list_array_builder::(values_builder); @@ -594,7 +594,7 @@ mod tests { fn test_boxed_large_list_large_list_array_builder() { // This test is same as `test_list_list_array_builder` but uses boxed builders. let values_builder = make_builder( - &DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true))), + &DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))), 10, ); test_boxed_generic_list_generic_list_array_builder::(values_builder); @@ -789,7 +789,7 @@ mod tests { #[test] #[should_panic(expected = "Non-nullable field of ListArray \\\"item\\\" cannot contain nulls")] fn test_checks_nullability() { - let field = Arc::new(Field::new("item", DataType::Int32, false)); + let field = Arc::new(Field::new_list_field(DataType::Int32, false)); let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone()); builder.append_value([Some(1), None]); builder.finish(); @@ -798,7 +798,7 @@ mod tests { #[test] #[should_panic(expected = "ListArray expected data type Int64 got Int32")] fn test_checks_data_type() { - let field = Arc::new(Field::new("item", DataType::Int64, false)); + let field = Arc::new(Field::new_list_field(DataType::Int64, false)); let mut builder = ListBuilder::new(Int32Builder::new()).with_field(field.clone()); builder.append_value([Some(1)]); builder.finish(); diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs index dd1a5c3ae722..89a96280eb87 100644 --- a/arrow-array/src/builder/mod.rs +++ b/arrow-array/src/builder/mod.rs @@ -123,7 +123,7 @@ //! let string_field = Arc::new(Field::new("i32", DataType::Utf8, false)); //! //! let i32_list = Arc::new(self.i32_list.finish()) as ArrayRef; -//! let value_field = Arc::new(Field::new("item", DataType::Int32, true)); +//! let value_field = Arc::new(Field::new_list_field(DataType::Int32, true)); //! let i32_list_field = Arc::new(Field::new("i32_list", DataType::List(value_field), true)); //! //! StructArray::from(vec![ diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 396ab2fed851..f1ce5fa857d2 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -46,8 +46,7 @@ use std::sync::Arc; /// let mut example_col = ListBuilder::new(StructBuilder::from_fields( /// vec![Field::new( /// "value_list", -/// DataType::List(Arc::new(Field::new( -/// "item", +/// DataType::List(Arc::new(Field::new_list_field( /// DataType::Struct(Fields::from(vec![ /// Field::new("key", DataType::Utf8, true), /// Field::new("value", DataType::Utf8, true), diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 7c1e04bc7e78..144f2a21afec 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -722,7 +722,7 @@ mod tests_to_then_from_ffi { // Construct a list array from the above two let list_data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Arc::new( - Field::new("item", DataType::Int32, false), + Field::new_list_field(DataType::Int32, false), )); let list_data = ArrayData::builder(list_data_type) @@ -1481,7 +1481,7 @@ mod tests_from_ffi { let offsets: Vec = vec![0, 2, 4, 6, 8, 10, 12, 14, 16]; let value_offsets = Buffer::from_slice_ref(offsets); let inner_list_data_type = - DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let inner_list_data = ArrayData::builder(inner_list_data_type.clone()) .len(8) .add_buffer(value_offsets) diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs index 7eaab24f9e83..8958ca6fae62 100644 --- a/arrow-array/src/record_batch.rs +++ b/arrow-array/src/record_batch.rs @@ -946,7 +946,7 @@ mod tests { fn create_record_batch_field_name_mismatch() { let fields = vec![ Field::new("a1", DataType::Int32, false), - Field::new_list("a2", Field::new("item", DataType::Int8, false), false), + Field::new_list("a2", Field::new_list_field(DataType::Int8, false), false), ]; let schema = Arc::new(Schema::new(vec![Field::new_struct("a", fields, true)])); diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs index 1e2acd99d828..35fa0339d69d 100644 --- a/arrow-avro/src/codec.rs +++ b/arrow-avro/src/codec.rs @@ -139,7 +139,9 @@ impl Codec { } Self::Duration => DataType::Interval(IntervalUnit::MonthDayNano), Self::Fixed(size) => DataType::FixedSizeBinary(*size), - Self::List(f) => DataType::List(Arc::new(f.field_with_name("item"))), + Self::List(f) => { + DataType::List(Arc::new(f.field_with_name(Field::LIST_FIELD_DEFAULT_NAME))) + } Self::Struct(f) => DataType::Struct(f.iter().map(|x| x.field()).collect()), } } diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 78a702e8c174..e6363025f4ae 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -3636,7 +3636,7 @@ mod tests { let array = Int32Array::from(vec![5, 6, 7, 8, 9]); let b = cast( &array, - &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), ) .unwrap(); assert_eq!(5, b.len()); @@ -3660,7 +3660,7 @@ mod tests { let array = Int32Array::from(vec![Some(5), None, Some(7), Some(8), Some(9)]); let b = cast( &array, - &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + &DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), ) .unwrap(); assert_eq!(5, b.len()); @@ -3688,7 +3688,7 @@ mod tests { let array = array.slice(2, 4); let b = cast( &array, - &DataType::List(Arc::new(Field::new("item", DataType::Float64, true))), + &DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))), ) .unwrap(); assert_eq!(4, b.len()); @@ -4009,7 +4009,7 @@ mod tests { // Construct a list array from the above two // [[0,0,0], [-1, -2, -1], [2, 100000000]] - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) @@ -4020,7 +4020,7 @@ mod tests { let cast_array = cast( &list_array, - &DataType::List(Arc::new(Field::new("item", DataType::UInt16, true))), + &DataType::List(Arc::new(Field::new_list_field(DataType::UInt16, true))), ) .unwrap(); @@ -4060,7 +4060,7 @@ mod tests { let value_offsets = Buffer::from_slice_ref([0, 3, 6, 9]); // Construct a list array from the above two - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) @@ -4071,8 +4071,7 @@ mod tests { let actual = cast( &list_array, - &DataType::List(Arc::new(Field::new( - "item", + &DataType::List(Arc::new(Field::new_list_field( DataType::Timestamp(TimeUnit::Microsecond, None), true, ))), @@ -4082,11 +4081,10 @@ mod tests { let expected = cast( &cast( &list_array, - &DataType::List(Arc::new(Field::new("item", DataType::Int64, true))), + &DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))), ) .unwrap(), - &DataType::List(Arc::new(Field::new( - "item", + &DataType::List(Arc::new(Field::new_list_field( DataType::Timestamp(TimeUnit::Microsecond, None), true, ))), @@ -7119,12 +7117,12 @@ mod tests { cast_from_null_to_other(&data_type); // Cast null from and to list - let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); cast_from_null_to_other(&data_type); - let data_type = DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true))); + let data_type = DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))); cast_from_null_to_other(&data_type); let data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 4); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 4); cast_from_null_to_other(&data_type); // Cast null from and to dictionary @@ -7241,11 +7239,11 @@ mod tests { assert_eq!(actual.data_type(), to_array.data_type()); let invalid_target = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Binary, true)), 2); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Binary, true)), 2); assert!(!can_cast_types(from_array.data_type(), &invalid_target)); let invalid_size = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float16, true)), 5); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Float16, true)), 5); assert!(!can_cast_types(from_array.data_type(), &invalid_size)); } @@ -7398,7 +7396,7 @@ mod tests { [(Some([Some(5)]))], 1, )) as ArrayRef; - let to_field_inner = Arc::new(Field::new("item", DataType::Float32, false)); + let to_field_inner = Arc::new(Field::new_list_field(DataType::Float32, false)); let to_field = Arc::new(Field::new( "dummy", DataType::FixedSizeList(to_field_inner.clone(), 1), @@ -7488,7 +7486,7 @@ mod tests { // 4. Nulls that are correctly sized (same as target list size) // Non-null case - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let values = vec![ Some(vec![Some(1), Some(2), Some(3)]), Some(vec![Some(4), Some(5), Some(6)]), @@ -7564,7 +7562,7 @@ mod tests { let res = cast_with_options( array.as_ref(), - &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3), + &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 3), &CastOptions { safe: false, ..Default::default() @@ -7578,7 +7576,7 @@ mod tests { // too short and truncate lists that are too long. let res = cast( array.as_ref(), - &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3), + &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 3), ) .unwrap(); let expected = Arc::new(FixedSizeListArray::from_iter_primitive::( @@ -7600,7 +7598,7 @@ mod tests { ])) as ArrayRef; let res = cast_with_options( array.as_ref(), - &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3), + &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 3), &CastOptions { safe: false, ..Default::default() @@ -7625,7 +7623,7 @@ mod tests { )) as ArrayRef; let actual = cast( array.as_ref(), - &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 2), + &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 2), ) .unwrap(); assert_eq!(expected.as_ref(), actual.as_ref()); @@ -7648,14 +7646,14 @@ mod tests { )) as ArrayRef; let actual = cast( array.as_ref(), - &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 2), + &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, true)), 2), ) .unwrap(); assert_eq!(expected.as_ref(), actual.as_ref()); let res = cast_with_options( array.as_ref(), - &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int16, true)), 2), + &DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int16, true)), 2), &CastOptions { safe: false, ..Default::default() @@ -7667,7 +7665,7 @@ mod tests { #[test] fn test_cast_list_to_fsl_empty() { - let field = Arc::new(Field::new("item", DataType::Int32, true)); + let field = Arc::new(Field::new_list_field(DataType::Int32, true)); let array = new_empty_array(&DataType::List(field.clone())); let target_type = DataType::FixedSizeList(field.clone(), 3); @@ -7690,7 +7688,7 @@ mod tests { let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]); // Construct a list array from the above two - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) @@ -7714,7 +7712,7 @@ mod tests { // Construct a list array from the above two let list_data_type = - DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true))); + DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) @@ -7733,7 +7731,7 @@ mod tests { .unwrap(); let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 4); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 4); let list_data = ArrayData::builder(list_data_type) .len(2) .add_child_data(value_data) @@ -7751,7 +7749,7 @@ mod tests { .unwrap(); let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 4); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, true)), 4); let list_data = ArrayData::builder(list_data_type) .len(2) .add_child_data(value_data) @@ -8013,7 +8011,7 @@ mod tests { let array1 = make_list_array().slice(1, 2); let array2 = Arc::new(make_list_array()) as ArrayRef; - let dt = DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true))); + let dt = DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))); let out1 = cast(&array1, &dt).unwrap(); let out2 = cast(&array2, &dt).unwrap(); @@ -8026,7 +8024,7 @@ mod tests { let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]); let value_data = str_array.into_data(); - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))); + let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index 4a3cbda283a5..ad3b952c327d 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -296,7 +296,7 @@ mod tests { fn test_pretty_format_fixed_size_list() { // define a schema. let field_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 3); let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)])); let keys_builder = Int32Array::builder(3); diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs index e65295e8750f..66ceab308f27 100644 --- a/arrow-flight/src/encode.rs +++ b/arrow-flight/src/encode.rs @@ -934,7 +934,7 @@ mod tests { let mut decoder = FlightDataDecoder::new(encoder); let expected_schema = Schema::new(vec![Field::new_list( "dict_list", - Field::new("item", DataType::Utf8, true), + Field::new_list_field(DataType::Utf8, true), true, )]); @@ -1038,7 +1038,7 @@ mod tests { "struct", vec![Field::new_list( "dict_list", - Field::new("item", DataType::Utf8, true), + Field::new_list_field(DataType::Utf8, true), true, )], true, @@ -1218,12 +1218,16 @@ mod tests { let hydrated_struct_fields = vec![Field::new_list( "dict_list", - Field::new("item", DataType::Utf8, true), + Field::new_list_field(DataType::Utf8, true), true, )]; let hydrated_union_fields = vec![ - Field::new_list("dict_list", Field::new("item", DataType::Utf8, true), true), + Field::new_list( + "dict_list", + Field::new_list_field(DataType::Utf8, true), + true, + ), Field::new_struct("struct", hydrated_struct_fields.clone(), true), Field::new("string", DataType::Utf8, true), ]; diff --git a/arrow-flight/src/sql/metadata/sql_info.rs b/arrow-flight/src/sql/metadata/sql_info.rs index 2ea30df7fc2f..58b228530942 100644 --- a/arrow-flight/src/sql/metadata/sql_info.rs +++ b/arrow-flight/src/sql/metadata/sql_info.rs @@ -172,7 +172,7 @@ static UNION_TYPE: Lazy = Lazy::new(|| { // treat list as nullable b/c that is what the builders make Field::new( "string_list", - DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))), true, ), Field::new( @@ -184,7 +184,7 @@ static UNION_TYPE: Lazy = Lazy::new(|| { Field::new("keys", DataType::Int32, false), Field::new( "values", - DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), true, ), ])), diff --git a/arrow-flight/src/sql/metadata/xdbc_info.rs b/arrow-flight/src/sql/metadata/xdbc_info.rs index 485bedaebfb0..a3a18ca10888 100644 --- a/arrow-flight/src/sql/metadata/xdbc_info.rs +++ b/arrow-flight/src/sql/metadata/xdbc_info.rs @@ -330,7 +330,7 @@ static GET_XDBC_INFO_SCHEMA: Lazy = Lazy::new(|| { Field::new("literal_suffix", DataType::Utf8, true), Field::new( "create_params", - DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))), + DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, false))), true, ), Field::new("nullable", DataType::Int32, false), diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs index ea5b545f2e81..a25f07ded1d9 100644 --- a/arrow-integration-test/src/lib.rs +++ b/arrow-integration-test/src/lib.rs @@ -1192,7 +1192,7 @@ mod tests { Field::new("utf8s", DataType::Utf8, true), Field::new( "lists", - DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), true, ), Field::new( @@ -1249,7 +1249,7 @@ mod tests { let value_data = Int32Array::from(vec![None, Some(2), None, None]); let value_offsets = Buffer::from_slice_ref([0, 3, 4, 4]); - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) diff --git a/arrow-integration-test/src/schema.rs b/arrow-integration-test/src/schema.rs index 541a1ec746ac..fb91aba00df3 100644 --- a/arrow-integration-test/src/schema.rs +++ b/arrow-integration-test/src/schema.rs @@ -150,7 +150,7 @@ mod tests { Field::new("c21", DataType::Interval(IntervalUnit::MonthDayNano), false), Field::new( "c22", - DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))), false, ), Field::new( diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs index 18f5193bf038..0aa07a6a47d2 100644 --- a/arrow-ipc/src/convert.rs +++ b/arrow-ipc/src/convert.rs @@ -1026,10 +1026,14 @@ mod tests { Field::new("utf8_view", DataType::Utf8View, false), Field::new("binary", DataType::Binary, false), Field::new("binary_view", DataType::BinaryView, false), - Field::new_list("list[u8]", Field::new("item", DataType::UInt8, false), true), + Field::new_list( + "list[u8]", + Field::new_list_field(DataType::UInt8, false), + true, + ), Field::new_fixed_size_list( "fixed_size_list[u8]", - Field::new("item", DataType::UInt8, false), + Field::new_list_field(DataType::UInt8, false), 2, true, ), diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs index 0aea191a617a..481fefbd5fbc 100644 --- a/arrow-ipc/src/reader.rs +++ b/arrow-ipc/src/reader.rs @@ -1407,10 +1407,10 @@ mod tests { fn create_test_projection_schema() -> Schema { // define field types - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); let fixed_size_list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3); let union_fields = UnionFields::new( vec![0, 1], @@ -1424,7 +1424,7 @@ mod tests { let struct_fields = Fields::from(vec![ Field::new("id", DataType::Int32, false), - Field::new_list("list", Field::new("item", DataType::Int8, true), false), + Field::new_list("list", Field::new_list_field(DataType::Int8, true), false), ]); let struct_data_type = DataType::Struct(struct_fields); diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index e6fc9d81df67..23cefede7b37 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -2556,7 +2556,7 @@ mod tests { #[test] fn encode_lists() { - let val_inner = Field::new("item", DataType::UInt32, true); + let val_inner = Field::new_list_field(DataType::UInt32, true); let val_list_field = Field::new("val", DataType::List(Arc::new(val_inner)), false); let schema = Arc::new(Schema::new(vec![val_list_field])); @@ -2568,7 +2568,7 @@ mod tests { #[test] fn encode_empty_list() { - let val_inner = Field::new("item", DataType::UInt32, true); + let val_inner = Field::new_list_field(DataType::UInt32, true); let val_list_field = Field::new("val", DataType::List(Arc::new(val_inner)), false); let schema = Arc::new(Schema::new(vec![val_list_field])); @@ -2583,7 +2583,7 @@ mod tests { #[test] fn encode_large_lists() { - let val_inner = Field::new("item", DataType::UInt32, true); + let val_inner = Field::new_list_field(DataType::UInt32, true); let val_list_field = Field::new("val", DataType::LargeList(Arc::new(val_inner)), false); let schema = Arc::new(Schema::new(vec![val_list_field])); @@ -2597,8 +2597,8 @@ mod tests { #[test] fn encode_nested_lists() { - let inner_int = Arc::new(Field::new("item", DataType::UInt32, true)); - let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_int), true)); + let inner_int = Arc::new(Field::new_list_field(DataType::UInt32, true)); + let inner_list_field = Arc::new(Field::new_list_field(DataType::List(inner_int), true)); let list_field = Field::new("val", DataType::List(inner_list_field), true); let schema = Arc::new(Schema::new(vec![list_field])); diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index c9013b10fb5f..de2871fff68a 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -1761,12 +1761,12 @@ mod tests { assert_eq!(&DataType::Int64, a.1.data_type()); let b = schema.column_with_name("b").unwrap(); assert_eq!( - &DataType::List(Arc::new(Field::new("item", DataType::Float64, true))), + &DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))), b.1.data_type() ); let c = schema.column_with_name("c").unwrap(); assert_eq!( - &DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))), + &DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))), c.1.data_type() ); let d = schema.column_with_name("d").unwrap(); @@ -1805,7 +1805,7 @@ mod tests { let schema = Arc::new(Schema::new(vec![Field::new( "items", - DataType::List(FieldRef::new(Field::new("item", DataType::Null, true))), + DataType::List(FieldRef::new(Field::new_list_field(DataType::Null, true))), true, )])); @@ -1829,9 +1829,8 @@ mod tests { let schema = Arc::new(Schema::new(vec![Field::new( "items", - DataType::List(FieldRef::new(Field::new( - "item", - DataType::List(FieldRef::new(Field::new("item", DataType::Null, true))), + DataType::List(FieldRef::new(Field::new_list_field( + DataType::List(FieldRef::new(Field::new_list_field(DataType::Null, true))), true, ))), true, diff --git a/arrow-json/src/reader/schema.rs b/arrow-json/src/reader/schema.rs index ace7b0ea5cb6..07eb40106de0 100644 --- a/arrow-json/src/reader/schema.rs +++ b/arrow-json/src/reader/schema.rs @@ -77,7 +77,7 @@ impl InferredType { /// Shorthand for building list data type of `ty` fn list_type_of(ty: DataType) -> DataType { - DataType::List(Arc::new(Field::new("item", ty, true))) + DataType::List(Arc::new(Field::new_list_field(ty, true))) } /// Coerce data type during inference diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs index a37aa5ff8c2c..ee6d83a0a1f0 100644 --- a/arrow-json/src/writer/mod.rs +++ b/arrow-json/src/writer/mod.rs @@ -1771,7 +1771,7 @@ mod tests { #[test] fn test_writer_fixed_size_list() { let size = 3; - let field = FieldRef::new(Field::new("item", DataType::Int32, true)); + let field = FieldRef::new(Field::new_list_field(DataType::Int32, true)); let schema = SchemaRef::new(Schema::new(vec![Field::new( "list", DataType::FixedSizeList(field, size), diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs index d60bc3b8de88..bb82f54d4918 100644 --- a/arrow-ord/src/comparison.rs +++ b/arrow-ord/src/comparison.rs @@ -821,7 +821,7 @@ mod tests { .into_data(); let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 6, 9]); let list_data_type = - DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true))); + DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type) .len(4) .add_buffer(value_offsets) diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs index db15d36d6eb7..547422b40d53 100644 --- a/arrow-ord/src/ord.rs +++ b/arrow-ord/src/ord.rs @@ -849,7 +849,7 @@ pub mod tests { fn test_struct() { let fields = Fields::from(vec![ Field::new("a", DataType::Int32, true), - Field::new_list("b", Field::new("item", DataType::Int32, true), true), + Field::new_list("b", Field::new_list_field(DataType::Int32, true), true), ]); let a = Int32Array::from(vec![Some(1), Some(2), None, None]); diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 5780bdbfefb9..d0fad12210db 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -2317,7 +2317,7 @@ mod tests { let values_len = offsets.last().unwrap().to_usize().unwrap(); let values = values(values_len); let nulls = NullBuffer::from_iter((0..len).map(|_| rng.gen_bool(valid_percent))); - let field = Arc::new(Field::new("item", values.data_type().clone(), true)); + let field = Arc::new(Field::new_list_field(values.data_type().clone(), true)); ListArray::new(field, offsets, values, Some(nulls)) } diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index ff5832dfa68c..a6333c804805 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -40,7 +40,7 @@ use crate::{ArrowError, Field, FieldRef, Fields, UnionFields}; /// # use arrow_schema::{DataType, Field}; /// # use std::sync::Arc; /// // create a new list of 32-bit signed integers directly -/// let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); +/// let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); /// // Create the same list type with constructor /// let list_data_type2 = DataType::new_list(DataType::Int32, true); /// assert_eq!(list_data_type, list_data_type2); @@ -837,21 +837,21 @@ mod tests { #[test] fn test_list_datatype_equality() { // tests that list type equality is checked while ignoring list names - let list_a = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let list_a = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_b = DataType::List(Arc::new(Field::new("array", DataType::Int32, true))); - let list_c = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); - let list_d = DataType::List(Arc::new(Field::new("item", DataType::UInt32, true))); + let list_c = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); + let list_d = DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, true))); assert!(list_a.equals_datatype(&list_b)); assert!(!list_a.equals_datatype(&list_c)); assert!(!list_b.equals_datatype(&list_c)); assert!(!list_a.equals_datatype(&list_d)); let list_e = - DataType::FixedSizeList(Arc::new(Field::new("item", list_a.clone(), false)), 3); + DataType::FixedSizeList(Arc::new(Field::new_list_field(list_a.clone(), false)), 3); let list_f = DataType::FixedSizeList(Arc::new(Field::new("array", list_b.clone(), false)), 3); let list_g = DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::FixedSizeBinary(3), true)), + Arc::new(Field::new_list_field(DataType::FixedSizeBinary(3), true)), 3, ); assert!(list_e.equals_datatype(&list_f)); diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs index 4378950329f3..bf557d8941dc 100644 --- a/arrow-schema/src/datatype_parse.rs +++ b/arrow-schema/src/datatype_parse.rs @@ -90,8 +90,8 @@ impl<'a> Parser<'a> { self.expect_token(Token::LParen)?; let data_type = self.parse_next_type()?; self.expect_token(Token::RParen)?; - Ok(DataType::List(Arc::new(Field::new( - "item", data_type, true, + Ok(DataType::List(Arc::new(Field::new_list_field( + data_type, true, )))) } @@ -100,8 +100,8 @@ impl<'a> Parser<'a> { self.expect_token(Token::LParen)?; let data_type = self.parse_next_type()?; self.expect_token(Token::RParen)?; - Ok(DataType::LargeList(Arc::new(Field::new( - "item", data_type, true, + Ok(DataType::LargeList(Arc::new(Field::new_list_field( + data_type, true, )))) } @@ -113,7 +113,7 @@ impl<'a> Parser<'a> { let data_type = self.parse_next_type()?; self.expect_token(Token::RParen)?; Ok(DataType::FixedSizeList( - Arc::new(Field::new("item", data_type, true)), + Arc::new(Field::new_list_field(data_type, true)), length, )) } diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index b532ea8616b6..131e23d68b9c 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -117,6 +117,9 @@ impl Hash for Field { } impl Field { + /// Default list member field name + pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item"; + /// Creates a new field with the given name, type, and nullability pub fn new(name: impl Into, data_type: DataType, nullable: bool) -> Self { Field { @@ -144,7 +147,7 @@ impl Field { /// ); /// ``` pub fn new_list_field(data_type: DataType, nullable: bool) -> Self { - Self::new("item", data_type, nullable) + Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable) } /// Creates a new field that has additional dictionary information diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs index 33328268e6d6..b4c4d4e59ecb 100644 --- a/arrow-schema/src/fields.rs +++ b/arrow-schema/src/fields.rs @@ -496,7 +496,12 @@ mod tests { Field::new("floats", DataType::Struct(floats.clone()), true), true, ), - Field::new_fixed_size_list("f", Field::new("item", DataType::Int32, false), 3, false), + Field::new_fixed_size_list( + "f", + Field::new_list_field(DataType::Int32, false), + 3, + false, + ), Field::new_map( "g", "entries", diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index a59d87aa8844..c91732848653 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -1331,7 +1331,7 @@ mod tests { let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8, 8]); let list_data_type = - DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = ArrayData::builder(list_data_type) .len(4) .add_buffer(value_offsets) @@ -1355,7 +1355,7 @@ mod tests { let value_offsets = Buffer::from_slice_ref([0i64, 3, 3]); let list_data_type = - DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, false))); let expected = ArrayData::builder(list_data_type) .len(2) .add_buffer(value_offsets) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index 07630a49fa11..71a7c77a8f92 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -1606,7 +1606,7 @@ mod tests { let value_offsets = Buffer::from_slice_ref(&value_offsets); // Construct a list array from the above two let list_data_type = - DataType::$list_data_type(Arc::new(Field::new("item", DataType::Int32, false))); + DataType::$list_data_type(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = ArrayData::builder(list_data_type.clone()) .len(4) .add_buffer(value_offsets) @@ -1672,7 +1672,7 @@ mod tests { let value_offsets = Buffer::from_slice_ref(&value_offsets); // Construct a list array from the above two let list_data_type = - DataType::$list_data_type(Arc::new(Field::new("item", DataType::Int32, true))); + DataType::$list_data_type(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type.clone()) .len(4) .add_buffer(value_offsets) @@ -1739,7 +1739,7 @@ mod tests { let value_offsets = Buffer::from_slice_ref(&value_offsets); // Construct a list array from the above two let list_data_type = - DataType::$list_data_type(Arc::new(Field::new("item", DataType::Int32, true))); + DataType::$list_data_type(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type.clone()) .len(4) .add_buffer(value_offsets) @@ -1904,7 +1904,8 @@ mod tests { // Construct offsets let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]); // Construct a list array from the above two - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let list_data_type = + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) @@ -2222,7 +2223,7 @@ mod tests { fn test_take_fixed_size_list_null_indices() { let indices = Int32Array::from_iter([Some(0), None]); let values = Arc::new(Int32Array::from(vec![0, 1, 2, 3])); - let arr_field = Arc::new(Field::new("item", values.data_type().clone(), true)); + let arr_field = Arc::new(Field::new_list_field(values.data_type().clone(), true)); let values = FixedSizeListArray::try_new(arr_field, 2, values, None).unwrap(); let r = take(&values, &indices, None).unwrap(); diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs index 6a28d44ea7aa..49fc244e72cc 100644 --- a/arrow-string/src/length.rs +++ b/arrow-string/src/length.rs @@ -710,7 +710,7 @@ mod tests { .build() .unwrap(); let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3); let nulls = NullBuffer::from(vec![true, false, true]); let list_data = ArrayData::builder(list_data_type) .len(3) diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs index 5ad452a17b12..d14662be7280 100644 --- a/arrow-string/src/regexp.rs +++ b/arrow-string/src/regexp.rs @@ -447,8 +447,7 @@ pub fn regexp_match( if regex.is_none() { return Ok(new_null_array( - &DataType::List(Arc::new(Field::new( - "item", + &DataType::List(Arc::new(Field::new_list_field( array.data_type().clone(), true, ))), diff --git a/arrow/benches/concatenate_kernel.rs b/arrow/benches/concatenate_kernel.rs index 0c553f8b3f3c..034f5f2a305c 100644 --- a/arrow/benches/concatenate_kernel.rs +++ b/arrow/benches/concatenate_kernel.rs @@ -86,14 +86,14 @@ fn add_benchmark(c: &mut Criterion) { }); let v1 = FixedSizeListArray::try_new( - Arc::new(Field::new("item", DataType::Int32, true)), + Arc::new(Field::new_list_field(DataType::Int32, true)), 1024, Arc::new(create_primitive_array::(1024 * 1024, 0.0)), None, ) .unwrap(); let v2 = FixedSizeListArray::try_new( - Arc::new(Field::new("item", DataType::Int32, true)), + Arc::new(Field::new_list_field(DataType::Int32, true)), 1024, Arc::new(create_primitive_array::(1024 * 1024, 0.0)), None, diff --git a/arrow/benches/json_reader.rs b/arrow/benches/json_reader.rs index 8f3898c51f9d..c698a93fe869 100644 --- a/arrow/benches/json_reader.rs +++ b/arrow/benches/json_reader.rs @@ -102,22 +102,22 @@ fn small_bench_list(c: &mut Criterion) { let schema = Arc::new(Schema::new(vec![ Field::new( "c1", - DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))), true, ), Field::new( "c2", - DataType::List(Arc::new(Field::new("item", DataType::Float64, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Float64, true))), true, ), Field::new( "c3", - DataType::List(Arc::new(Field::new("item", DataType::UInt32, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::UInt32, true))), true, ), Field::new( "c4", - DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))), true, ), ])); diff --git a/arrow/benches/lexsort.rs b/arrow/benches/lexsort.rs index cd952299df47..bb1c6081eaf9 100644 --- a/arrow/benches/lexsort.rs +++ b/arrow/benches/lexsort.rs @@ -83,7 +83,7 @@ impl Column { Column::RequiredI32List => { let field = Field::new( "_1", - DataType::List(Arc::new(Field::new("item", DataType::Int32, false))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))), true, ); create_random_array(&field, size, 0., 1.).unwrap() @@ -91,7 +91,7 @@ impl Column { Column::OptionalI32List => { let field = Field::new( "_1", - DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), true, ); create_random_array(&field, size, 0.2, 1.).unwrap() @@ -99,7 +99,7 @@ impl Column { Column::Required4CharStringList => { let field = Field::new( "_1", - DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))), + DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, false))), true, ); create_random_array(&field, size, 0., 1.).unwrap() @@ -107,7 +107,7 @@ impl Column { Column::Optional4CharStringList => { let field = Field::new( "_1", - DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))), true, ); create_random_array(&field, size, 0.2, 1.).unwrap() diff --git a/arrow/examples/builders.rs b/arrow/examples/builders.rs index bd0575bb5684..8043ad82fca6 100644 --- a/arrow/examples/builders.rs +++ b/arrow/examples/builders.rs @@ -97,7 +97,7 @@ fn main() { let value_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice()); // Construct a list array from the above two - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false))); + let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs index 56bbdefd522d..5f63812e51c0 100644 --- a/arrow/src/util/data_gen.rs +++ b/arrow/src/util/data_gen.rs @@ -538,7 +538,7 @@ mod tests { Field::new("a", DataType::Int32, false), Field::new( "b", - DataType::List(Arc::new(Field::new("item", DataType::LargeUtf8, false))), + DataType::List(Arc::new(Field::new_list_field(DataType::LargeUtf8, false))), false, ), Field::new("a", DataType::Int32, false), @@ -569,10 +569,8 @@ mod tests { Field::new("b", DataType::Boolean, true), Field::new( "c", - DataType::LargeList(Arc::new(Field::new( - "item", - DataType::List(Arc::new(Field::new( - "item", + DataType::LargeList(Arc::new(Field::new_list_field( + DataType::List(Arc::new(Field::new_list_field( DataType::FixedSizeBinary(6), true, ))), diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs index c5311bd8c409..ef5ca6041700 100644 --- a/arrow/tests/array_cast.rs +++ b/arrow/tests/array_cast.rs @@ -315,7 +315,7 @@ fn make_fixed_size_list_array() -> FixedSizeListArray { // Construct a fixed size list array from the above two let list_data_type = - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 2); + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 2); let list_data = ArrayData::builder(list_data_type) .len(5) .add_child_data(value_data) @@ -348,7 +348,7 @@ fn make_list_array() -> ListArray { let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]); // Construct a list array from the above two - let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) @@ -371,7 +371,8 @@ fn make_large_list_array() -> LargeListArray { let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]); // Construct a list array from the above two - let list_data_type = DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true))); + let list_data_type = + DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true))); let list_data = ArrayData::builder(list_data_type) .len(3) .add_buffer(value_offsets) @@ -466,12 +467,12 @@ fn get_all_types() -> Vec { LargeBinary, Utf8, LargeUtf8, - List(Arc::new(Field::new("item", DataType::Int8, true))), - List(Arc::new(Field::new("item", DataType::Utf8, true))), - FixedSizeList(Arc::new(Field::new("item", DataType::Int8, true)), 10), - FixedSizeList(Arc::new(Field::new("item", DataType::Utf8, false)), 10), - LargeList(Arc::new(Field::new("item", DataType::Int8, true))), - LargeList(Arc::new(Field::new("item", DataType::Utf8, false))), + List(Arc::new(Field::new_list_field(DataType::Int8, true))), + List(Arc::new(Field::new_list_field(DataType::Utf8, true))), + FixedSizeList(Arc::new(Field::new_list_field(DataType::Int8, true)), 10), + FixedSizeList(Arc::new(Field::new_list_field(DataType::Utf8, false)), 10), + LargeList(Arc::new(Field::new_list_field(DataType::Int8, true))), + LargeList(Arc::new(Field::new_list_field(DataType::Utf8, false))), Struct(Fields::from(vec![ Field::new("f1", DataType::Int32, true), Field::new("f2", DataType::Utf8, true), diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs index 7ed4dae1ed08..94fb85030bf3 100644 --- a/arrow/tests/array_equal.rs +++ b/arrow/tests/array_equal.rs @@ -409,8 +409,7 @@ fn test_empty_offsets_list_equal() { let values = Int32Array::from(empty); let empty_offsets: [u8; 0] = []; - let a: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new( - "item", + let a: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field( DataType::Int32, true, )))) @@ -422,8 +421,7 @@ fn test_empty_offsets_list_equal() { .unwrap() .into(); - let b: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new( - "item", + let b: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field( DataType::Int32, true, )))) @@ -437,8 +435,7 @@ fn test_empty_offsets_list_equal() { test_equal(&a, &b, true); - let c: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new( - "item", + let c: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field( DataType::Int32, true, )))) @@ -475,8 +472,7 @@ fn test_list_null() { // a list where the nullness of values is determined by the list's bitmap let c_values = Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]); - let c: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new( - "item", + let c: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field( DataType::Int32, true, )))) @@ -498,8 +494,7 @@ fn test_list_null() { None, None, ]); - let d: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new( - "item", + let d: ListArray = ArrayDataBuilder::new(DataType::List(Arc::new(Field::new_list_field( DataType::Int32, true, )))) diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs index 08f23c200d52..c6de9f4a3417 100644 --- a/arrow/tests/array_transform.rs +++ b/arrow/tests/array_transform.rs @@ -600,7 +600,7 @@ fn test_list_append() { ]); let list_value_offsets = Buffer::from_slice_ref([0i32, 3, 5, 11, 13, 13, 15, 15, 17]); let expected_list_data = ArrayData::try_new( - DataType::List(Arc::new(Field::new("item", DataType::Int64, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))), 8, None, 0, @@ -677,7 +677,7 @@ fn test_list_nulls_append() { let list_value_offsets = Buffer::from_slice_ref([0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23]); let expected_list_data = ArrayData::try_new( - DataType::List(Arc::new(Field::new("item", DataType::Int64, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))), 12, Some(Buffer::from(&[0b11011011, 0b1110])), 0, @@ -940,7 +940,7 @@ fn test_list_of_strings_append() { ]); let list_value_offsets = Buffer::from_slice_ref([0, 3, 5, 6, 9, 10, 13]); let expected_list_data = ArrayData::try_new( - DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))), 6, None, 0, @@ -1141,7 +1141,7 @@ fn test_fixed_size_list_append() { Some(12), ]); let expected_fixed_size_list_data = ArrayData::try_new( - DataType::FixedSizeList(Arc::new(Field::new("item", DataType::UInt16, true)), 2), + DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::UInt16, true)), 2), 12, Some(Buffer::from(&[0b11011101, 0b101])), 0, diff --git a/parquet/benches/arrow_reader.rs b/parquet/benches/arrow_reader.rs index c424d000694a..e5165fee212c 100644 --- a/parquet/benches/arrow_reader.rs +++ b/parquet/benches/arrow_reader.rs @@ -680,7 +680,7 @@ fn create_string_list_reader( column_desc: ColumnDescPtr, ) -> Box { let items = create_byte_array_reader(page_iterator, column_desc); - let field = Field::new("item", DataType::Utf8, true); + let field = Field::new_list_field(DataType::Utf8, true); let data_type = DataType::List(Arc::new(field)); Box::new(ListArrayReader::::new(items, data_type, 2, 1, true)) } diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs index cf39ee66f31a..bfa333db722c 100644 --- a/parquet/benches/arrow_writer.rs +++ b/parquet/benches/arrow_writer.rs @@ -189,17 +189,17 @@ fn create_list_primitive_bench_batch( let fields = vec![ Field::new( "_1", - DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), true, ), Field::new( "_2", - DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))), + DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))), true, ), Field::new( "_3", - DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, true))), + DataType::LargeList(Arc::new(Field::new_list_field(DataType::Utf8, true))), true, ), ]; @@ -220,17 +220,17 @@ fn create_list_primitive_bench_batch_non_null( let fields = vec![ Field::new( "_1", - DataType::List(Arc::new(Field::new("item", DataType::Int32, false))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))), false, ), Field::new( "_2", - DataType::List(Arc::new(Field::new("item", DataType::Boolean, false))), + DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, false))), false, ), Field::new( "_3", - DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, false))), + DataType::LargeList(Arc::new(Field::new_list_field(DataType::Utf8, false))), false, ), ]; @@ -274,10 +274,8 @@ fn _create_nested_bench_batch( ), Field::new( "_2", - DataType::LargeList(Arc::new(Field::new( - "item", - DataType::List(Arc::new(Field::new( - "item", + DataType::LargeList(Arc::new(Field::new_list_field( + DataType::List(Arc::new(Field::new_list_field( DataType::Struct(Fields::from(vec![ Field::new( "_1", diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs index 4be07ed68f1d..6b437be943d4 100644 --- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs +++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs @@ -508,8 +508,7 @@ mod tests { ); // [[], [1], [2, 3], null, [4], null, [6, 7, 8]] - let data = ArrayDataBuilder::new(ArrowType::List(Arc::new(Field::new( - "item", + let data = ArrayDataBuilder::new(ArrowType::List(Arc::new(Field::new_list_field( decimals.data_type().clone(), false, )))) diff --git a/parquet/src/arrow/array_reader/fixed_size_list_array.rs b/parquet/src/arrow/array_reader/fixed_size_list_array.rs index 75099d018fc9..732a3cf59870 100644 --- a/parquet/src/arrow/array_reader/fixed_size_list_array.rs +++ b/parquet/src/arrow/array_reader/fixed_size_list_array.rs @@ -277,7 +277,7 @@ mod tests { let mut list_array_reader = FixedSizeListArrayReader::new( Box::new(item_array_reader), 3, - ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 3), + ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 3), 2, 1, true, @@ -323,7 +323,7 @@ mod tests { let mut list_array_reader = FixedSizeListArrayReader::new( Box::new(item_array_reader), 2, - ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 2), + ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 2), 1, 1, false, @@ -347,9 +347,9 @@ mod tests { // [[null, null]], // ] let l2_type = - ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 2); + ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 2); let l1_type = - ArrowType::FixedSizeList(Arc::new(Field::new("item", l2_type.clone(), false)), 1); + ArrowType::FixedSizeList(Arc::new(Field::new_list_field(l2_type.clone(), false)), 1); let array = PrimitiveArray::::from(vec![ None, @@ -436,7 +436,7 @@ mod tests { let mut list_array_reader = FixedSizeListArrayReader::new( Box::new(item_array_reader), 0, - ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 0), + ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 0), 2, 1, true, @@ -481,9 +481,9 @@ mod tests { None, ])); - let inner_type = ArrowType::List(Arc::new(Field::new("item", ArrowType::Int32, true))); + let inner_type = ArrowType::List(Arc::new(Field::new_list_field(ArrowType::Int32, true))); let list_type = - ArrowType::FixedSizeList(Arc::new(Field::new("item", inner_type.clone(), true)), 2); + ArrowType::FixedSizeList(Arc::new(Field::new_list_field(inner_type.clone(), true)), 2); let item_array_reader = InMemoryArrayReader::new( ArrowType::Int32, @@ -534,7 +534,10 @@ mod tests { let schema = Arc::new(Schema::new(vec![ Field::new( "list", - ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 4), + ArrowType::FixedSizeList( + Arc::new(Field::new_list_field(ArrowType::Int32, true)), + 4 + ), true, ), Field::new("primitive", ArrowType::Int32, true), @@ -599,7 +602,7 @@ mod tests { let schema = Arc::new(Schema::new(vec![Field::new( "list", - ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 4), + ArrowType::FixedSizeList(Arc::new(Field::new_list_field(ArrowType::Int32, true)), 4), true, )])); diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs index ebff3286bed5..6e583ed00c19 100644 --- a/parquet/src/arrow/array_reader/list_array.rs +++ b/parquet/src/arrow/array_reader/list_array.rs @@ -265,7 +265,7 @@ mod tests { data_type: ArrowType, item_nullable: bool, ) -> ArrowType { - let field = Arc::new(Field::new("item", data_type, item_nullable)); + let field = Arc::new(Field::new_list_field(data_type, item_nullable)); GenericListArray::::DATA_TYPE_CONSTRUCTOR(field) } diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index e25b42fb37ad..1488395b4eec 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1542,8 +1542,7 @@ mod tests { let decimals = Decimal128Array::from_iter_values([1, 2, 3, 4, 5, 6, 7, 8]); // [[], [1], [2, 3], null, [4], null, [6, 7, 8]] - let data = ArrayDataBuilder::new(ArrowDataType::List(Arc::new(Field::new( - "item", + let data = ArrayDataBuilder::new(ArrowDataType::List(Arc::new(Field::new_list_field( decimals.data_type().clone(), false, )))) @@ -2874,7 +2873,7 @@ mod tests { let arrow_field = Field::new( "emptylist", - ArrowDataType::List(Arc::new(Field::new("item", ArrowDataType::Null, true))), + ArrowDataType::List(Arc::new(Field::new_list_field(ArrowDataType::Null, true))), true, ); @@ -3346,7 +3345,7 @@ mod tests { fn test_row_group_batch(row_group_size: usize, batch_size: usize) { let schema = Arc::new(Schema::new(vec![Field::new( "list", - ArrowDataType::List(Arc::new(Field::new("item", ArrowDataType::Int32, true))), + ArrowDataType::List(Arc::new(Field::new_list_field(ArrowDataType::Int32, true))), true, )])); @@ -3903,7 +3902,7 @@ mod tests { fn test_list_selection() { let schema = Arc::new(Schema::new(vec![Field::new_list( "list", - Field::new("item", ArrowDataType::Utf8, true), + Field::new_list_field(ArrowDataType::Utf8, true), false, )])); let mut buf = Vec::with_capacity(1024); @@ -3959,7 +3958,11 @@ mod tests { let mut rng = thread_rng(); let schema = Arc::new(Schema::new(vec![Field::new_list( "list", - Field::new_list("item", Field::new("item", ArrowDataType::Int32, true), true), + Field::new_list( + Field::LIST_FIELD_DEFAULT_NAME, + Field::new_list_field(ArrowDataType::Int32, true), + true, + ), true, )])); let mut buf = Vec::with_capacity(1024); diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index 3e828bbddd17..e4662b8f316c 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -632,7 +632,7 @@ mod tests { // based on the example at https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet.html // [[a, b, c], [d, e, f, g]], [[h], [i,j]] - let leaf_type = Field::new("item", DataType::Int32, false); + let leaf_type = Field::new_list_field(DataType::Int32, false); let inner_type = DataType::List(Arc::new(leaf_type)); let inner_field = Field::new("l2", inner_type.clone(), false); let outer_type = DataType::List(Arc::new(inner_field)); @@ -676,7 +676,7 @@ mod tests { fn test_calculate_one_level_1() { // This test calculates the levels for a non-null primitive array let array = Arc::new(Int32Array::from_iter(0..10)) as ArrayRef; - let field = Field::new("item", DataType::Int32, false); + let field = Field::new_list_field(DataType::Int32, false); let levels = calculate_array_levels(&array, &field).unwrap(); assert_eq!(levels.len(), 1); @@ -702,7 +702,7 @@ mod tests { Some(0), None, ])) as ArrayRef; - let field = Field::new("item", DataType::Int32, true); + let field = Field::new_list_field(DataType::Int32, true); let levels = calculate_array_levels(&array, &field).unwrap(); assert_eq!(levels.len(), 1); @@ -720,7 +720,7 @@ mod tests { #[test] fn test_calculate_array_levels_1() { - let leaf_field = Field::new("item", DataType::Int32, false); + let leaf_field = Field::new_list_field(DataType::Int32, false); let list_type = DataType::List(Arc::new(leaf_field)); // if all array values are defined (e.g. batch>) @@ -1046,7 +1046,7 @@ mod tests { let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); let a_value_offsets = arrow::buffer::Buffer::from_iter([0_i32, 1, 3, 3, 6, 10]); - let a_list_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let a_list_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); let a_list_data = ArrayData::builder(a_list_type.clone()) .len(5) .add_buffer(a_value_offsets) @@ -1059,7 +1059,7 @@ mod tests { let a = ListArray::from(a_list_data); - let item_field = Field::new("item", a_list_type, true); + let item_field = Field::new_list_field(a_list_type, true); let mut builder = levels(&item_field, a); builder.write(2..4); let levels = builder.finish(); @@ -1334,7 +1334,7 @@ mod tests { // define schema let int_field = Field::new("a", DataType::Int32, true); let fields = Fields::from([Arc::new(int_field)]); - let item_field = Field::new("item", DataType::Struct(fields.clone()), true); + let item_field = Field::new_list_field(DataType::Struct(fields.clone()), true); let list_field = Field::new("list", DataType::List(Arc::new(item_field)), true); let int_builder = Int32Builder::with_capacity(10); @@ -1568,7 +1568,7 @@ mod tests { let a = builder.finish(); let values = a.values().clone(); - let item_field = Field::new("item", a.data_type().clone(), true); + let item_field = Field::new_list_field(a.data_type().clone(), true); let mut builder = levels(&item_field, a); builder.write(1..4); let levels = builder.finish(); @@ -1594,7 +1594,7 @@ mod tests { let field_a = Field::new("a", DataType::Int32, true); let field_b = Field::new("b", DataType::Int64, false); let fields = Fields::from([Arc::new(field_a), Arc::new(field_b)]); - let item_field = Field::new("item", DataType::Struct(fields.clone()), true); + let item_field = Field::new_list_field(DataType::Struct(fields.clone()), true); let list_field = Field::new( "list", DataType::FixedSizeList(Arc::new(item_field), 2), @@ -1758,7 +1758,7 @@ mod tests { let array = builder.finish(); let values = array.values().clone(); - let item_field = Field::new("item", array.data_type().clone(), true); + let item_field = Field::new_list_field(array.data_type().clone(), true); let mut builder = levels(&item_field, array); builder.write(0..3); let levels = builder.finish(); @@ -1797,7 +1797,7 @@ mod tests { let a = builder.finish(); let values = a.values().as_list::().values().clone(); - let item_field = Field::new("item", a.data_type().clone(), true); + let item_field = Field::new_list_field(a.data_type().clone(), true); let mut builder = levels(&item_field, a); builder.write(0..4); let levels = builder.finish(); @@ -1827,7 +1827,7 @@ mod tests { // [NULL, NULL, 3, 0] let dict = DictionaryArray::new(keys, Arc::new(values)); - let item_field = Field::new("item", dict.data_type().clone(), true); + let item_field = Field::new_list_field(dict.data_type().clone(), true); let mut builder = levels(&item_field, dict.clone()); builder.write(0..4); @@ -1846,7 +1846,7 @@ mod tests { #[test] fn mismatched_types() { let array = Arc::new(Int32Array::from_iter(0..10)) as ArrayRef; - let field = Field::new("item", DataType::Float64, false); + let field = Field::new_list_field(DataType::Float64, false); let err = LevelInfoBuilder::try_new(&field, Default::default(), &array) .unwrap_err() diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 115b9a2f1966..c9f9114481d8 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -1194,7 +1194,7 @@ mod tests { // define schema let schema = Schema::new(vec![Field::new( "a", - DataType::List(Arc::new(Field::new("item", DataType::Int32, false))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))), true, )]); @@ -1206,8 +1206,7 @@ mod tests { let a_value_offsets = arrow::buffer::Buffer::from([0, 1, 3, 3, 6, 10].to_byte_slice()); // Construct a list array from the above two - let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new( - "item", + let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new_list_field( DataType::Int32, false, )))) @@ -1234,7 +1233,7 @@ mod tests { // define schema let schema = Schema::new(vec![Field::new( "a", - DataType::List(Arc::new(Field::new("item", DataType::Int32, false))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))), false, )]); @@ -1246,8 +1245,7 @@ mod tests { let a_value_offsets = arrow::buffer::Buffer::from([0, 1, 3, 3, 6, 10].to_byte_slice()); // Construct a list array from the above two - let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new( - "item", + let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new_list_field( DataType::Int32, false, )))) @@ -1365,12 +1363,12 @@ mod tests { let struct_field_f = Arc::new(Field::new("f", DataType::Float32, true)); let struct_field_g = Arc::new(Field::new_list( "g", - Field::new("item", DataType::Int16, true), + Field::new_list_field(DataType::Int16, true), false, )); let struct_field_h = Arc::new(Field::new_list( "h", - Field::new("item", DataType::Int16, false), + Field::new_list_field(DataType::Int16, false), true, )); let struct_field_e = Arc::new(Field::new_struct( @@ -2377,7 +2375,7 @@ mod tests { #[test] fn null_list_single_column() { - let null_field = Field::new("item", DataType::Null, true); + let null_field = Field::new_list_field(DataType::Null, true); let list_field = Field::new("emptylist", DataType::List(Arc::new(null_field)), true); let schema = Schema::new(vec![list_field]); @@ -2385,8 +2383,7 @@ mod tests { // Build [[], null, [null, null]] let a_values = NullArray::new(2); let a_value_offsets = arrow::buffer::Buffer::from([0, 0, 0, 2].to_byte_slice()); - let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new( - "item", + let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new_list_field( DataType::Null, true, )))) @@ -2415,8 +2412,7 @@ mod tests { fn list_single_column() { let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); let a_value_offsets = arrow::buffer::Buffer::from([0, 1, 3, 3, 6, 10].to_byte_slice()); - let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new( - "item", + let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new_list_field( DataType::Int32, false, )))) diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index e1b8b6a11342..aaa60fc592b8 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -1869,7 +1869,7 @@ mod tests { async fn test_nested_skip() { let schema = Arc::new(Schema::new(vec![ Field::new("col_1", DataType::UInt64, false), - Field::new_list("col_2", Field::new("item", DataType::Utf8, true), true), + Field::new_list("col_2", Field::new_list_field(DataType::Utf8, true), true), ])); // Default writer properties diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs index 3ed3bd24e0a8..bf1fb633227c 100644 --- a/parquet/src/arrow/schema/mod.rs +++ b/parquet/src/arrow/schema/mod.rs @@ -1665,7 +1665,7 @@ mod tests { Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false), Field::new_list( "c21", - Field::new("item", DataType::Boolean, true) + Field::new_list_field(DataType::Boolean, true) .with_metadata(meta(&[("Key", "Bar"), (PARQUET_FIELD_ID_META_KEY, "5")])), false, ) @@ -1673,7 +1673,7 @@ mod tests { Field::new( "c22", DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::Boolean, true)), + Arc::new(Field::new_list_field(DataType::Boolean, true)), 5, ), false, @@ -1682,8 +1682,7 @@ mod tests { "c23", Field::new_large_list( "inner", - Field::new( - "item", + Field::new_list_field( DataType::Struct( vec![ Field::new("a", DataType::Int16, true), @@ -1728,8 +1727,7 @@ mod tests { "c34", Field::new_list( "inner", - Field::new( - "item", + Field::new_list_field( DataType::Struct( vec![ Field::new("a", DataType::Int16, true), @@ -1762,7 +1760,7 @@ mod tests { .with_metadata(meta(&[(PARQUET_FIELD_ID_META_KEY, "8")])), Field::new_list( "my_value", - Field::new("item", DataType::Utf8, true) + Field::new_list_field(DataType::Utf8, true) .with_metadata(meta(&[(PARQUET_FIELD_ID_META_KEY, "10")])), true, ) @@ -1777,7 +1775,7 @@ mod tests { Field::new("my_key", DataType::Utf8, false), Field::new_list( "my_value", - Field::new("item", DataType::Utf8, true) + Field::new_list_field(DataType::Utf8, true) .with_metadata(meta(&[(PARQUET_FIELD_ID_META_KEY, "11")])), true, ),