Skip to content

Commit

Permalink
better extract logic and interval testing
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Sep 12, 2024
1 parent 6bba073 commit 268d478
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 80 deletions.
44 changes: 28 additions & 16 deletions datafusion/functions/src/datetime/date_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
// under the License.

use std::any::Any;
use std::str::FromStr;
use std::sync::Arc;

use arrow::array::{Array, ArrayRef, Float64Array};
use arrow::compute::kernels::cast_utils::IntervalUnit;
use arrow::compute::{binary, cast, date_part, DatePart};
use arrow::datatypes::DataType::{
Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, Utf8View,
Expand Down Expand Up @@ -161,22 +163,32 @@ impl ScalarUDFImpl for DatePartFunc {
return exec_err!("Date part '{part}' not supported");
}

let arr = match part_trim.to_lowercase().as_str() {
"year" => date_part_f64(array.as_ref(), DatePart::Year)?,
"quarter" => date_part_f64(array.as_ref(), DatePart::Quarter)?,
"month" => date_part_f64(array.as_ref(), DatePart::Month)?,
"week" => date_part_f64(array.as_ref(), DatePart::Week)?,
"day" => date_part_f64(array.as_ref(), DatePart::Day)?,
"doy" => date_part_f64(array.as_ref(), DatePart::DayOfYear)?,
"dow" => date_part_f64(array.as_ref(), DatePart::DayOfWeekSunday0)?,
"hour" => date_part_f64(array.as_ref(), DatePart::Hour)?,
"minute" => date_part_f64(array.as_ref(), DatePart::Minute)?,
"second" => seconds(array.as_ref(), Second)?,
"millisecond" => seconds(array.as_ref(), Millisecond)?,
"microsecond" => seconds(array.as_ref(), Microsecond)?,
"nanosecond" => seconds(array.as_ref(), Nanosecond)?,
"epoch" => epoch(array.as_ref())?,
_ => return exec_err!("Date part '{part}' not supported"),
// using IntervalUnit here means we hand off all the work of supporting plurals (like "seconds")
// and synonyms ( like "ms,msec,msecond,millisecond") to Arrow
let arr = if let Ok(interval_unit) = IntervalUnit::from_str(part_trim) {
match interval_unit {
IntervalUnit::Year => date_part_f64(array.as_ref(), DatePart::Year)?,
IntervalUnit::Month => date_part_f64(array.as_ref(), DatePart::Month)?,
IntervalUnit::Week => date_part_f64(array.as_ref(), DatePart::Week)?,
IntervalUnit::Day => date_part_f64(array.as_ref(), DatePart::Day)?,
IntervalUnit::Hour => date_part_f64(array.as_ref(), DatePart::Hour)?,
IntervalUnit::Minute => date_part_f64(array.as_ref(), DatePart::Minute)?,
IntervalUnit::Second => seconds(array.as_ref(), Second)?,
IntervalUnit::Millisecond => seconds(array.as_ref(), Millisecond)?,
IntervalUnit::Microsecond => seconds(array.as_ref(), Microsecond)?,
IntervalUnit::Nanosecond => seconds(array.as_ref(), Nanosecond)?,
// century and decade are not supported by `DatePart`, although they are supported in postgres
_ => return exec_err!("Date part '{part}' not supported"),
}
} else {
// special cases that can be extracted (in postgres) but are not interval units
match part_trim.to_lowercase().as_str() {
"qtr" | "quarter" => date_part_f64(array.as_ref(), DatePart::Quarter)?,
"doy" => date_part_f64(array.as_ref(), DatePart::DayOfYear)?,
"dow" => date_part_f64(array.as_ref(), DatePart::DayOfWeekSunday0)?,
"epoch" => epoch(array.as_ref())?,
_ => return exec_err!("Date part '{part}' not supported"),
}
};

Ok(if is_scalar {
Expand Down
15 changes: 15 additions & 0 deletions datafusion/sqllogictest/test_files/expr.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,16 @@ SELECT date_part('second', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanose
----
50.123456789

query R
select extract(second from '2024-08-09T12:13:14')
----
14

query R
select extract(seconds from '2024-08-09T12:13:14')
----
14

query R
SELECT extract(second from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)'))
----
Expand All @@ -1381,6 +1391,11 @@ SELECT extract(microsecond from arrow_cast('23:32:50.123456789'::time, 'Time64(N
----
50123456.789000005

query R
SELECT extract(us from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)'))
----
50123456.789000005

query R
SELECT date_part('nanosecond', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)'))
----
Expand Down
73 changes: 9 additions & 64 deletions datafusion/sqllogictest/test_files/interval.slt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# specific language governing permissions and limitations
# under the License.


# Use `interval` SQL literal syntax
# the types should be the same: https://github.com/apache/datafusion/issues/5801
query TT
Expand Down Expand Up @@ -206,87 +205,39 @@ select interval '5 YEAR 5 MONTH 5 DAY 5 HOUR 5 MINUTE 5 SECOND 5 MILLISECOND 5 M
----
65 mons 5 days 5 hours 5 mins 5.005005005 secs

# Interval with string literal addition
query ?
select interval '1 month' + '1 month'
----
2 mons

# Interval with string literal addition and leading field
# Interval mega nested literal addition
query ?
select interval '1' + '1' month
----
2 mons

# Interval with nested string literal addition
query ?
select interval '1 month' + '1 month' + '1 month'
----
3 mons

# Interval with nested string literal addition and leading field
query ?
select interval '1' + '1' + '1' month
----
3 mons

# Interval mega nested string literal addition
query ?
select interval '1 year' + '1 month' + '1 day' + '1 hour' + '1 minute' + '1 second' + '1 millisecond' + '1 microsecond' + '1 nanosecond'
select interval '1 year' + interval '1 month' + interval '1 day' + interval '1 hour' + interval '1 minute' + interval '1 second' + interval '1 millisecond' + interval '1 microsecond' + interval '1 nanosecond'
----
13 mons 1 days 1 hours 1 mins 1.001001001 secs

# Interval with string literal subtraction
query ?
select interval '1 month' - '1 day';
select interval '1 month' - interval '1 day';
----
1 mons -1 days

# Interval with string literal subtraction and leading field
query ?
select interval '5' - '1' - '2' year;
----
24 mons

# Interval with nested string literal subtraction
query ?
select interval '1 month' - '1 day' - '1 hour';
select interval '1 month' - interval '1 day' - interval '1 hour';
----
1 mons -1 days -1 hours

# Interval with nested string literal subtraction and leading field
query ?
select interval '10' - '1' - '1' month;
----
8 mons

# Interval mega nested string literal subtraction
query ?
select interval '1 year' - '1 month' - '1 day' - '1 hour' - '1 minute' - '1 second' - '1 millisecond' - '1 microsecond' - '1 nanosecond'
select interval '1 year' - interval '1 month' - interval '1 day' - interval '1 hour' - interval '1 minute' - interval '1 second' - interval '1 millisecond' - interval '1 microsecond' - interval '1 nanosecond'
----
11 mons -1 days -1 hours -1 mins -1.001001001 secs

# Interval with string literal negation and leading field
query ?
select -interval '5' - '1' - '2' year;
----
-96 mons

# Interval with nested string literal negation
# Interval with nested literal negation
query ?
select -interval '1 month' + '1 day' + '1 hour';
select -interval '1 month' + interval '1 day' + interval '1 hour';
----
-1 mons 1 days 1 hours

# Interval with nested string literal negation and leading field
# Interval mega nested literal negation
query ?
select -interval '10' - '1' - '1' month;
----
-12 mons

# Interval mega nested string literal negation
query ?
select -interval '1 year' - '1 month' - '1 day' - '1 hour' - '1 minute' - '1 second' - '1 millisecond' - '1 microsecond' - '1 nanosecond'
select -interval '1 year' - interval '1 month' - interval '1 day' - interval '1 hour' - interval '1 minute' - interval '1 second' - interval '1 millisecond' - interval '1 microsecond' - interval '1 nanosecond'
----
-13 mons -1 days -1 hours -1 mins -1.001001001 secs

Expand All @@ -296,12 +247,6 @@ select interval 1 month + interval 1 day + '2012-01-01'::date;
----
2012-02-02

# Interval string literal parenthesized + date
query D
select ( interval '1 month' + '1 day' ) + '2012-01-01'::date;
----
2012-02-02

# Interval nested string literal + date
query D
select interval 1 year + interval 1 month + interval 1 day + '2012-01-01'::date
Expand Down
71 changes: 71 additions & 0 deletions datafusion/sqllogictest/test_files/interval_mysql.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Use `interval` SQL literal syntax with MySQL dialect

# this should fail generic dialect
query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \+ Utf8 to valid types
select interval '1' + '1' month

statement ok
set datafusion.sql_parser.dialect = 'Mysql';

# Interval with string literal addition and leading field
query ?
select interval '1' + '1' month
----
2 mons

# Interval with nested string literal addition
query ?
select interval 1 + 1 + 1 month
----
3 mons

# Interval with nested string literal addition and leading field
query ?
select interval '1' + '1' + '1' month
----
3 mons

# Interval with string literal subtraction and leading field
query ?
select interval '5' - '1' - '2' year;
----
24 mons

# Interval with nested string literal subtraction and leading field
query ?
select interval '10' - '1' - '1' month;
----
8 mons

# Interval with string literal negation and leading field
query ?
select -interval '5' - '1' - '2' year;
----
-96 mons

# Interval with nested string literal negation and leading field
query ?
select -interval '10' - '1' - '1' month;
----
-12 mons

# revert to standard dialect
statement ok
set datafusion.sql_parser.dialect = 'Generic';

0 comments on commit 268d478

Please sign in to comment.