Skip to content

Commit

Permalink
cleanup "array_has"
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Sep 13, 2024
1 parent 389f7f7 commit d4ab9d7
Showing 1 changed file with 30 additions and 39 deletions.
69 changes: 30 additions & 39 deletions datafusion/functions-nested/src/array_has.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,44 +95,33 @@ impl ScalarUDFImpl for ArrayHas {
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
// Always return null if the second argumet is null
// i.e. array_has(array, null) -> null
if let ColumnarValue::Scalar(s) = &args[1] {
if s.is_null() {
return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
match &args[1] {
ColumnarValue::Array(array_needle) => {
// the needle is already an array, convert the haystack to an array of the same length
let haystack = args[0].to_owned().into_array(array_needle.len())?;
let array = array_has_inner_for_array(&haystack, array_needle)?;
Ok(ColumnarValue::Array(array))
}
}

// first, identify if any of the arguments is an Array. If yes, store its `len`,
// as any scalar will need to be converted to an array of len `len`.
let len = args
.iter()
.fold(Option::<usize>::None, |acc, arg| match arg {
ColumnarValue::Scalar(_) => acc,
ColumnarValue::Array(a) => Some(a.len()),
});

let is_scalar = len.is_none();

let result = match args[1] {
ColumnarValue::Array(_) => {
let args = ColumnarValue::values_to_arrays(args)?;
array_has_inner_for_array(&args[0], &args[1])
}
ColumnarValue::Scalar(_) => {
ColumnarValue::Scalar(scalar_needle) => {
// Always return null if the second argument is null
// i.e. array_has(array, null) -> null
if scalar_needle.is_null() {
return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
}

// since the needle is a scalar, convert it to an array of size 1
let haystack = args[0].to_owned().into_array(1)?;
let needle = args[1].to_owned().into_array(1)?;
let needle = scalar_needle.to_array_of_size(1)?;
let needle = Scalar::new(needle);
array_has_inner_for_scalar(&haystack, &needle)
let array = array_has_inner_for_scalar(&haystack, &needle)?;
if let ColumnarValue::Scalar(_) = &args[0] {
// If both inputs are scalar, keeps output as scalar
let scalar_value = ScalarValue::try_from_array(&array, 0)?;
Ok(ColumnarValue::Scalar(scalar_value))
} else {
Ok(ColumnarValue::Array(array))
}
}
};

if is_scalar {
// If all inputs are scalar, keeps output as scalar
let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0));
result.map(ColumnarValue::Scalar)
} else {
result.map(ColumnarValue::Array)
}
}

Expand Down Expand Up @@ -203,24 +192,26 @@ fn array_has_dispatch_for_scalar<O: OffsetSizeTrait>(
return Ok(Arc::new(BooleanArray::from(vec![Some(false)])));
}
let eq_array = compare_with_eq(values, needle, is_nested)?;
let mut final_contained = vec![None; haystack.len()];
for (i, offset) in offsets.windows(2).enumerate() {
let mut final_contained = BooleanArray::builder(haystack.len());
for offset in offsets.windows(2) {
let start = offset[0].to_usize().unwrap();
let end = offset[1].to_usize().unwrap();
let length = end - start;
// For non-nested list, length is 0 for null
if length == 0 {
final_contained.append_null();
continue;
}
let sliced_array = eq_array.slice(start, length);
// For nested list, check number of nulls
if sliced_array.null_count() == length {
continue;
final_contained.append_null();
} else {
final_contained.append_value(sliced_array.true_count() > 0);
}
final_contained[i] = Some(sliced_array.true_count() > 0);
}

Ok(Arc::new(BooleanArray::from(final_contained)))
Ok(Arc::new(final_contained.finish()))
}

fn array_has_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
Expand Down

0 comments on commit d4ab9d7

Please sign in to comment.