diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py index 92782b9ed473..e2e8d66c0f29 100644 --- a/arrow-pyarrow-integration-testing/tests/test_sql.py +++ b/arrow-pyarrow-integration-testing/tests/test_sql.py @@ -421,6 +421,14 @@ def iter_batches(): with pytest.raises(ValueError, match="test error"): rust.reader_return_errors(reader) + # Due to a long-standing oversight, PyArrow allows binary values in schema + # metadata that are not valid UTF-8. This is not allowed in Rust, but we + # make sure we error and not panic here. + schema = schema.with_metadata({"key": b"\xff"}) + reader = pa.RecordBatchReader.from_batches(schema, iter_batches()) + with pytest.raises(ValueError, match="invalid utf-8"): + rust.round_trip_record_batch_reader(reader) + def test_reject_other_classes(): # Arbitrary type that is not a PyArrow type not_pyarrow = ["hello"] diff --git a/arrow/src/ffi_stream.rs b/arrow/src/ffi_stream.rs index 7005cadc623c..865a8d0e0a29 100644 --- a/arrow/src/ffi_stream.rs +++ b/arrow/src/ffi_stream.rs @@ -281,7 +281,7 @@ fn get_stream_schema(stream_ptr: *mut FFI_ArrowArrayStream) -> Result let ret_code = unsafe { (*stream_ptr).get_schema.unwrap()(stream_ptr, &mut schema) }; if ret_code == 0 { - let schema = Schema::try_from(&schema).unwrap(); + let schema = Schema::try_from(&schema)?; Ok(Arc::new(schema)) } else { Err(ArrowError::CDataInterface(format!(