@@ -21,8 +21,8 @@ use std::sync::{Arc, OnceLock};
21
21
use crate :: strings:: { make_and_append_view, StringArrayType } ;
22
22
use crate :: utils:: { make_scalar_function, utf8_to_str_type} ;
23
23
use arrow:: array:: {
24
- Array , ArrayIter , ArrayRef , AsArray , GenericStringArray , Int64Array , OffsetSizeTrait ,
25
- StringViewArray ,
24
+ Array , ArrayIter , ArrayRef , AsArray , GenericStringBuilder , Int64Array ,
25
+ OffsetSizeTrait , StringViewArray ,
26
26
} ;
27
27
use arrow:: datatypes:: DataType ;
28
28
use arrow_buffer:: { NullBufferBuilder , ScalarBuffer } ;
@@ -448,10 +448,9 @@ where
448
448
match args. len ( ) {
449
449
1 => {
450
450
let iter = ArrayIter :: new ( string_array) ;
451
-
452
- let result = iter
453
- . zip ( start_array. iter ( ) )
454
- . map ( |( string, start) | match ( string, start) {
451
+ let mut result_builder = GenericStringBuilder :: < T > :: new ( ) ;
452
+ for ( string, start) in iter. zip ( start_array. iter ( ) ) {
453
+ match ( string, start) {
455
454
( Some ( string) , Some ( start) ) => {
456
455
let ( start, end) = get_true_start_end (
457
456
string,
@@ -460,47 +459,51 @@ where
460
459
enable_ascii_fast_path,
461
460
) ; // start, end is byte-based
462
461
let substr = & string[ start..end] ;
463
- Some ( substr . to_string ( ) )
462
+ result_builder . append_value ( substr ) ;
464
463
}
465
- _ => None ,
466
- } )
467
- . collect :: < GenericStringArray < T > > ( ) ;
468
- Ok ( Arc :: new ( result) as ArrayRef )
464
+ _ => {
465
+ result_builder. append_null ( ) ;
466
+ }
467
+ }
468
+ }
469
+ Ok ( Arc :: new ( result_builder. finish ( ) ) as ArrayRef )
469
470
}
470
471
2 => {
471
472
let iter = ArrayIter :: new ( string_array) ;
472
473
let count_array = count_array_opt. unwrap ( ) ;
474
+ let mut result_builder = GenericStringBuilder :: < T > :: new ( ) ;
473
475
474
- let result = iter
475
- . zip ( start_array. iter ( ) )
476
- . zip ( count_array. iter ( ) )
477
- . map ( |( ( string, start) , count) | {
478
- match ( string, start, count) {
479
- ( Some ( string) , Some ( start) , Some ( count) ) => {
480
- if count < 0 {
481
- exec_err ! (
476
+ for ( ( string, start) , count) in
477
+ iter. zip ( start_array. iter ( ) ) . zip ( count_array. iter ( ) )
478
+ {
479
+ match ( string, start, count) {
480
+ ( Some ( string) , Some ( start) , Some ( count) ) => {
481
+ if count < 0 {
482
+ return exec_err ! (
482
483
"negative substring length not allowed: substr(<str>, {start}, {count})"
483
- )
484
- } else {
485
- if start == i64:: MIN {
486
- return exec_err ! ( "negative overflow when calculating skip value" ) ;
487
- }
488
- let ( start, end) = get_true_start_end (
489
- string,
490
- start,
491
- Some ( count as u64 ) ,
492
- enable_ascii_fast_path,
493
- ) ; // start, end is byte-based
494
- let substr = & string[ start..end] ;
495
- Ok ( Some ( substr. to_string ( ) ) )
484
+ ) ;
485
+ } else {
486
+ if start == i64:: MIN {
487
+ return exec_err ! (
488
+ "negative overflow when calculating skip value"
489
+ ) ;
496
490
}
491
+ let ( start, end) = get_true_start_end (
492
+ string,
493
+ start,
494
+ Some ( count as u64 ) ,
495
+ enable_ascii_fast_path,
496
+ ) ; // start, end is byte-based
497
+ let substr = & string[ start..end] ;
498
+ result_builder. append_value ( substr) ;
497
499
}
498
- _ => Ok ( None ) ,
499
500
}
500
- } )
501
- . collect :: < Result < GenericStringArray < T > > > ( ) ?;
502
-
503
- Ok ( Arc :: new ( result) as ArrayRef )
501
+ _ => {
502
+ result_builder. append_null ( ) ;
503
+ }
504
+ }
505
+ }
506
+ Ok ( Arc :: new ( result_builder. finish ( ) ) as ArrayRef )
504
507
}
505
508
other => {
506
509
exec_err ! ( "substr was called with {other} arguments. It requires 2 or 3." )
0 commit comments