@@ -745,14 +745,17 @@ pub fn cast_with_options(
745
745
| Map ( _, _)
746
746
| Dictionary ( _, _) ,
747
747
) => Ok ( new_null_array ( to_type, array. len ( ) ) ) ,
748
- ( RunEndEncoded ( index_type, _) , _) => match index_type. data_type ( ) {
749
- Int16 => run_end_encoded_cast :: < Int16Type > ( array, to_type, cast_options) ,
750
- Int32 => run_end_encoded_cast :: < Int32Type > ( array, to_type, cast_options) ,
751
- Int64 => run_end_encoded_cast :: < Int64Type > ( array, to_type, cast_options) ,
748
+ ( RunEndEncoded ( index_type, _) , _) => {
749
+ let mut new_cast_options = cast_options. clone ( ) ;
750
+ new_cast_options. safe = false ;
751
+ match index_type. data_type ( ) {
752
+ Int16 => run_end_encoded_cast :: < Int16Type > ( array, to_type, & new_cast_options) ,
753
+ Int32 => run_end_encoded_cast :: < Int32Type > ( array, to_type, & new_cast_options) ,
754
+ Int64 => run_end_encoded_cast :: < Int64Type > ( array, to_type, & new_cast_options) ,
752
755
_ => Err ( ArrowError :: CastError ( format ! (
753
756
"Casting from run end encoded type {from_type:?} to {to_type:?} not supported" ,
754
757
) ) ) ,
755
- } ,
758
+ } } ,
756
759
( _, RunEndEncoded ( index_type, value_type) ) => match index_type. data_type ( ) {
757
760
Int16 => {
758
761
cast_to_run_end_encoded :: < Int16Type > ( array, value_type. data_type ( ) , cast_options)
@@ -10726,16 +10729,14 @@ mod tests {
10726
10729
let values = Int32Array :: from ( vec ! [ 1 , 2 , 3 ] ) ;
10727
10730
let run_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
10728
10731
let array_ref = Arc :: new ( run_array) as ArrayRef ;
10729
- println ! ( "1" ) ;
10730
10732
// Cast to Int64
10731
10733
let cast_result = cast ( & array_ref, & DataType :: Int64 ) . unwrap ( ) ;
10732
- println ! ( "2" ) ;
10733
10734
// Verify the result is a RunArray with Int64 values
10734
- let result_run_array = cast_result
10735
- . as_any ( )
10736
- . downcast_ref :: < Int64Array > ( )
10737
- . unwrap ( ) ;
10738
- assert_eq ! ( result_run_array . values ( ) , & [ 1i64 , 1i64 , 2i64 , 2i64 , 2i64 , 3i64 ] ) ;
10735
+ let result_run_array = cast_result. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
10736
+ assert_eq ! (
10737
+ result_run_array . values ( ) ,
10738
+ & [ 1i64 , 1i64 , 2i64 , 2i64 , 2i64 , 3i64 ]
10739
+ ) ;
10739
10740
}
10740
10741
10741
10742
/// Test casting FROM RunEndEncoded to string
@@ -10751,10 +10752,7 @@ mod tests {
10751
10752
let cast_result = cast ( & array_ref, & DataType :: Utf8 ) . unwrap ( ) ;
10752
10753
10753
10754
// Verify the result is a RunArray with String values
10754
- let result_array = cast_result
10755
- . as_any ( )
10756
- . downcast_ref :: < StringArray > ( )
10757
- . unwrap ( ) ;
10755
+ let result_array = cast_result. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
10758
10756
// Check that values are correct
10759
10757
assert_eq ! ( result_array. value( 0 ) , "10" ) ;
10760
10758
assert_eq ! ( result_array. value( 1 ) , "10" ) ;
@@ -10890,10 +10888,7 @@ mod tests {
10890
10888
let cast_result = cast ( & array_ref, & DataType :: Utf8 ) . unwrap ( ) ;
10891
10889
10892
10890
// Verify the result preserves nulls
10893
- let result_run_array = cast_result
10894
- . as_any ( )
10895
- . downcast_ref :: < StringArray > ( )
10896
- . unwrap ( ) ;
10891
+ let result_run_array = cast_result. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
10897
10892
assert_eq ! ( result_run_array. value( 0 ) , "1" ) ;
10898
10893
assert ! ( result_run_array. is_null( 2 ) ) ;
10899
10894
assert_eq ! ( result_run_array. value( 4 ) , "2" ) ;
@@ -10939,5 +10934,132 @@ mod tests {
10939
10934
// Expect this to fail
10940
10935
assert ! ( cast_result. is_err( ) ) ;
10941
10936
}
10937
+ #[ test]
10938
+ fn test_cast_run_end_encoded_int64_to_int16_should_fail ( ) {
10939
+ use arrow_array:: { Int64Array , RunArray , StringArray } ;
10940
+ use arrow_schema:: { DataType , Field } ;
10941
+ use std:: sync:: Arc ;
10942
+
10943
+ // Construct a valid REE array with Int64 run-ends
10944
+ let run_ends = Int64Array :: from ( vec ! [ 100_000 , 400_000 , 700_000 ] ) ; // values too large for Int16
10945
+ let values = StringArray :: from ( vec ! [ "a" , "b" , "c" ] ) ;
10946
+
10947
+ let ree_array = RunArray :: < Int64Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
10948
+ let array_ref = Arc :: new ( ree_array) as ArrayRef ;
10949
+
10950
+ // Attempt to cast to RunEndEncoded<Int16, Utf8>
10951
+ let target_type = DataType :: RunEndEncoded (
10952
+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int16 , false ) ) ,
10953
+ Arc :: new ( Field :: new ( "values" , DataType :: Utf8 , true ) ) ,
10954
+ ) ;
10955
+ let cast_options = CastOptions {
10956
+ safe : false , // This should make it fail instead of returning nulls
10957
+ format_options : FormatOptions :: default ( ) ,
10958
+ } ;
10959
+
10960
+ // This should fail due to run-end overflow
10961
+ let result: Result < Arc < dyn Array + ' static > , ArrowError > =
10962
+ cast_with_options ( & array_ref, & target_type, & cast_options) ;
10963
+
10964
+ match result {
10965
+ Err ( e) => {
10966
+ assert ! ( e. to_string( ) . contains( "Cast error: Can't cast value 100000 to type Int16" ) ) ;
10967
+ }
10968
+ Ok ( _array_ref) => {
10969
+ panic ! ( "This should not happen" ) ;
10970
+ }
10971
+ }
10972
+ }
10973
+ #[ test]
10974
+ fn test_cast_run_end_encoded_int16_to_int64_should_succeed ( ) {
10975
+ use arrow_array:: { Int16Array , RunArray , StringArray } ;
10976
+ use arrow_schema:: { DataType , Field } ;
10977
+ use std:: sync:: Arc ;
10978
+
10979
+ // Construct a valid REE array with Int16 run-ends
10980
+ let run_ends = Int16Array :: from ( vec ! [ 2 , 5 , 8 ] ) ; // values that fit in Int16
10981
+ let values = StringArray :: from ( vec ! [ "a" , "b" , "c" ] ) ;
10982
+
10983
+
10984
+ let ree_array = RunArray :: < Int16Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
10985
+ let array_ref = Arc :: new ( ree_array) as ArrayRef ;
10986
+
10987
+ // Attempt to cast to RunEndEncoded<Int64, Utf8> (upcast should succeed)
10988
+ let target_type = DataType :: RunEndEncoded (
10989
+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int64 , false ) ) ,
10990
+ Arc :: new ( Field :: new ( "values" , DataType :: Utf8 , true ) ) ,
10991
+ ) ;
10992
+ let cast_options = CastOptions {
10993
+ safe : false ,
10994
+ format_options : FormatOptions :: default ( ) ,
10995
+ } ;
10996
+
10997
+ // This should succeed due to valid upcast
10998
+ let result: Result < Arc < dyn Array + ' static > , ArrowError > =
10999
+ cast_with_options ( & array_ref, & target_type, & cast_options) ;
11000
+
11001
+ match result {
11002
+ Ok ( array_ref) => {
11003
+ // Downcast to RunArray<Int64Type>
11004
+ let run_array = array_ref
11005
+ . as_any ( )
11006
+ . downcast_ref :: < RunArray < Int64Type > > ( )
11007
+ . unwrap ( ) ;
11008
+
11009
+ // Verify the cast worked correctly
11010
+ // Assert the values were cast correctly
11011
+ assert_eq ! ( run_array. run_ends( ) . values( ) , & [ 2i64 , 5i64 , 8i64 ] ) ;
11012
+ assert_eq ! ( run_array. values( ) . as_string:: <i32 >( ) . value( 0 ) , "a" ) ;
11013
+ assert_eq ! ( run_array. values( ) . as_string:: <i32 >( ) . value( 1 ) , "b" ) ;
11014
+ assert_eq ! ( run_array. values( ) . as_string:: <i32 >( ) . value( 2 ) , "c" ) ;
11015
+ }
11016
+ Err ( e) => {
11017
+ panic ! ( "Cast should have succeeded but failed: {}" , e) ;
11018
+ }
11019
+ }
11020
+ }
11021
+
11022
+ #[ test]
11023
+ fn test_cast_run_end_encoded_int32_to_int16_should_fail ( ) {
11024
+ use arrow_array:: { Int32Array , RunArray , StringArray } ;
11025
+ use arrow_schema:: { DataType , Field } ;
11026
+ use std:: sync:: Arc ;
11027
+
11028
+ // Construct a valid REE array with Int32 run-ends
11029
+ let run_ends = Int32Array :: from ( vec ! [ 1000 , 50000 , 80000 ] ) ; // values too large for Int16
11030
+ let values = StringArray :: from ( vec ! [ "x" , "y" , "z" ] ) ;
11031
+
11032
+ println ! ( "Original run_ends null count: {}" , run_ends. null_count( ) ) ;
11033
+ println ! ( "Original run_ends values: {:?}" , run_ends. values( ) ) ;
11034
+
11035
+ let ree_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
11036
+ let array_ref = Arc :: new ( ree_array) as ArrayRef ;
11037
+
11038
+ // Attempt to cast to RunEndEncoded<Int16, Utf8> (downcast should fail)
11039
+ let target_type = DataType :: RunEndEncoded (
11040
+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int16 , false ) ) ,
11041
+ Arc :: new ( Field :: new ( "values" , DataType :: Utf8 , true ) ) ,
11042
+ ) ;
11043
+ let cast_options = CastOptions {
11044
+ safe : false ,
11045
+ format_options : FormatOptions :: default ( ) ,
11046
+ } ;
11047
+
11048
+ // This should fail due to run-end overflow
11049
+ let result: Result < Arc < dyn Array + ' static > , ArrowError > =
11050
+ cast_with_options ( & array_ref, & target_type, & cast_options) ;
11051
+
11052
+ match result {
11053
+ Ok ( _) => {
11054
+ panic ! ( "Cast should have failed due to overflow but succeeded" ) ;
11055
+ }
11056
+ Err ( e) => {
11057
+ // Verify the error is about overflow/out of range
11058
+ assert ! (
11059
+ e. to_string( ) . contains( "Can't cast value" )
11060
+ ) ;
11061
+ }
11062
+ }
11063
+ }
10942
11064
}
10943
11065
}
0 commit comments