Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into cleanup-take
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Aug 30, 2023
2 parents ab19a78 + 735f48d commit 5a0c448
Show file tree
Hide file tree
Showing 11 changed files with 594 additions and 732 deletions.
30 changes: 12 additions & 18 deletions arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,11 @@ pub fn string_to_datetime<T: TimeZone>(
let parser = TimestampParser::new(bytes);
let date = parser.date().ok_or_else(|| err("error parsing date"))?;
if bytes.len() == 10 {
let offset = timezone.offset_from_local_date(&date);
let offset = offset
let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
return timezone
.from_local_datetime(&datetime)
.single()
.ok_or_else(|| err("error computing timezone offset"))?;

let time = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
return Ok(DateTime::from_local(date.and_time(time), offset));
.ok_or_else(|| err("error computing timezone offset"));
}

if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
Expand All @@ -213,28 +211,24 @@ pub fn string_to_datetime<T: TimeZone>(
}

if bytes.len() <= tz_offset {
let offset = timezone.offset_from_local_datetime(&datetime);
let offset = offset
return timezone
.from_local_datetime(&datetime)
.single()
.ok_or_else(|| err("error computing timezone offset"))?;
return Ok(DateTime::from_local(datetime, offset));
.ok_or_else(|| err("error computing timezone offset"));
}

if bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z' {
let offset = timezone.offset_from_local_datetime(&datetime);
let offset = offset
.single()
.ok_or_else(|| err("error computing timezone offset"))?;
return Ok(DateTime::from_utc(datetime, offset));
return Ok(timezone.from_utc_datetime(&datetime));
}

// Parse remainder of string as timezone
let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
let offset = parsed_tz.offset_from_local_datetime(&datetime);
let offset = offset
let parsed = parsed_tz
.from_local_datetime(&datetime)
.single()
.ok_or_else(|| err("error computing timezone offset"))?;
Ok(DateTime::<Tz>::from_local(datetime, offset).with_timezone(timezone))

Ok(parsed.with_timezone(timezone))
}

/// Accepts a string in RFC3339 / ISO8601 standard format and some
Expand Down
14 changes: 6 additions & 8 deletions arrow-flight/src/sql/metadata/db_schemas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
use std::sync::Arc;

use arrow_arith::boolean::and;
use arrow_array::{builder::StringBuilder, ArrayRef, RecordBatch, Scalar, StringArray};
use arrow_array::{builder::StringBuilder, ArrayRef, RecordBatch, StringArray};
use arrow_ord::cmp::eq;
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use arrow_select::{filter::filter_record_batch, take::take};
use arrow_string::like::like_utf8_scalar;
use arrow_string::like::like;
use once_cell::sync::Lazy;

use super::lexsort_to_indices;
Expand Down Expand Up @@ -122,15 +122,13 @@ impl GetDbSchemasBuilder {

if let Some(db_schema_filter_pattern) = db_schema_filter_pattern {
// use like kernel to get wildcard matching
filters.push(like_utf8_scalar(
&db_schema_name,
&db_schema_filter_pattern,
)?)
let scalar = StringArray::new_scalar(db_schema_filter_pattern);
filters.push(like(&db_schema_name, &scalar)?)
}

if let Some(catalog_filter_name) = catalog_filter {
let scalar = StringArray::from_iter_values([catalog_filter_name]);
filters.push(eq(&catalog_name, &Scalar::new(&scalar))?);
let scalar = StringArray::new_scalar(catalog_filter_name);
filters.push(eq(&catalog_name, &scalar)?);
}

// `AND` any filters together
Expand Down
22 changes: 9 additions & 13 deletions arrow-flight/src/sql/metadata/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ use std::sync::Arc;

use arrow_arith::boolean::{and, or};
use arrow_array::builder::{BinaryBuilder, StringBuilder};
use arrow_array::{ArrayRef, RecordBatch, Scalar, StringArray};
use arrow_array::{ArrayRef, RecordBatch, StringArray};
use arrow_ord::cmp::eq;
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use arrow_select::{filter::filter_record_batch, take::take};
use arrow_string::like::like_utf8_scalar;
use arrow_string::like::like;
use once_cell::sync::Lazy;

use super::lexsort_to_indices;
Expand Down Expand Up @@ -184,16 +184,13 @@ impl GetTablesBuilder {
let mut filters = vec![];

if let Some(catalog_filter_name) = catalog_filter {
let scalar = StringArray::from_iter_values([catalog_filter_name]);
filters.push(eq(&catalog_name, &Scalar::new(&scalar))?);
let scalar = StringArray::new_scalar(catalog_filter_name);
filters.push(eq(&catalog_name, &scalar)?);
}

let tt_filter = table_types_filter
.into_iter()
.map(|tt| {
let scalar = StringArray::from_iter_values([tt]);
eq(&table_type, &Scalar::new(&scalar))
})
.map(|tt| eq(&table_type, &StringArray::new_scalar(tt)))
.collect::<std::result::Result<Vec<_>, _>>()?
.into_iter()
// We know the arrays are of same length as they are produced fromn the same root array
Expand All @@ -204,15 +201,14 @@ impl GetTablesBuilder {

if let Some(db_schema_filter_pattern) = db_schema_filter_pattern {
// use like kernel to get wildcard matching
filters.push(like_utf8_scalar(
&db_schema_name,
&db_schema_filter_pattern,
)?)
let scalar = StringArray::new_scalar(db_schema_filter_pattern);
filters.push(like(&db_schema_name, &scalar)?)
}

if let Some(table_name_filter_pattern) = table_name_filter_pattern {
// use like kernel to get wildcard matching
filters.push(like_utf8_scalar(&table_name, &table_name_filter_pattern)?)
let scalar = StringArray::new_scalar(table_name_filter_pattern);
filters.push(like(&table_name, &scalar)?)
}

let batch = if let Some(table_schema) = table_schema {
Expand Down
8 changes: 8 additions & 0 deletions arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,14 @@ def iter_batches():
with pytest.raises(ValueError, match="test error"):
rust.reader_return_errors(reader)

# Due to a long-standing oversight, PyArrow allows binary values in schema
# metadata that are not valid UTF-8. This is not allowed in Rust, but we
# make sure we error and not panic here.
schema = schema.with_metadata({"key": b"\xff"})
reader = pa.RecordBatchReader.from_batches(schema, iter_batches())
with pytest.raises(ValueError, match="invalid utf-8"):
rust.round_trip_record_batch_reader(reader)

def test_reject_other_classes():
# Arbitrary type that is not a PyArrow type
not_pyarrow = ["hello"]
Expand Down
24 changes: 17 additions & 7 deletions arrow-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,7 @@ impl Rows {
/// Sets the length of this [`Rows`] to 0
pub fn clear(&mut self) {
self.offsets.truncate(1);
self.buffer.clear();
}

/// Returns the number of [`Row`] in this [`Rows`]
Expand Down Expand Up @@ -2429,17 +2430,26 @@ mod tests {
RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
let mut rows = converter.empty_rows(3, 128);

let arrays = [
Int32Array::from(vec![None, Some(2), Some(4)]),
Int32Array::from(vec![Some(2), None, Some(4)]),
];
let first = Int32Array::from(vec![None, Some(2), Some(4)]);
let second = Int32Array::from(vec![Some(2), None, Some(4)]);
let arrays = vec![Arc::new(first) as ArrayRef, Arc::new(second) as ArrayRef];

for array in arrays {
for array in arrays.iter() {
rows.clear();
let array = Arc::new(array) as ArrayRef;
converter.append(&mut rows, &[array.clone()]).unwrap();
let back = converter.convert_rows(&rows).unwrap();
assert_eq!(&back[0], &array);
assert_eq!(&back[0], array);
}

let mut rows_expected = converter.empty_rows(3, 128);
converter.append(&mut rows_expected, &arrays[1..]).unwrap();

for (i, (actual, expected)) in rows.iter().zip(rows_expected.iter()).enumerate() {
assert_eq!(
actual, expected,
"For row {}: expected {:?}, actual: {:?}",
i, expected, actual
);
}
}

Expand Down
1 change: 1 addition & 0 deletions arrow-string/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@
pub mod concat_elements;
pub mod length;
pub mod like;
mod predicate;
pub mod regexp;
pub mod substring;
Loading

0 comments on commit 5a0c448

Please sign in to comment.