From 11ab169349cc71cdb59145eecc838043631409f5 Mon Sep 17 00:00:00 2001 From: Alan Cai Date: Thu, 20 Apr 2023 15:36:01 -0700 Subject: [PATCH] Adds lowering of DATE, TIME, and TIMESTAMP literals to logical plan --- CHANGELOG.md | 1 + partiql-ast/src/ast.rs | 7 +- partiql-conformance-tests/partiql-tests | 2 +- partiql-eval/src/eval/expr/mod.rs | 80 +++++++++------- partiql-logical-planner/src/lower.rs | 17 +++- partiql-parser/src/parse/partiql.lalrpop | 69 ++++++++++++-- partiql-value/Cargo.toml | 2 +- partiql-value/src/datetime.rs | 113 ++++++++++++++++------- 8 files changed, 209 insertions(+), 82 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c7c7996..f721a1ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - *BREAKING:* partiql-eval: modifies visibility of types implementing `EvalExpr` and `Evaluable` ### Added - Implements built-in function `EXTRACT` +- Adds lowering `DATE`/`TIME`/`TIMESTAMP` literals to logical plan ### Fixes - Fix parsing of `EXTRACT` datetime parts `YEAR`, `TIMEZONE_HOUR`, and `TIMEZONE_MINUTE` - Fix logical plan to eval plan conversion for `EvalOrderBySortSpec` with arguments `DESC` and `NULLS LAST` diff --git a/partiql-ast/src/ast.rs b/partiql-ast/src/ast.rs index f29084d7..53de2228 100644 --- a/partiql-ast/src/ast.rs +++ b/partiql-ast/src/ast.rs @@ -859,7 +859,7 @@ pub enum Type { NumericType, RealType, DoublePrecisionType, - TimestampType, + TimestampType(Option), CharacterType, CharacterVaryingType, MissingType, @@ -868,8 +868,9 @@ pub enum Type { BlobType, ClobType, DateType, - TimeType, - ZonedTimestampType, + TimeType(Option), + TimeTypeWithTimeZone(Option), + ZonedTimestampType(Option), StructType, TupleType, ListType, diff --git a/partiql-conformance-tests/partiql-tests b/partiql-conformance-tests/partiql-tests index fdf35b4d..5ee95565 160000 --- a/partiql-conformance-tests/partiql-tests +++ b/partiql-conformance-tests/partiql-tests @@ -1 +1 @@ -Subproject commit fdf35b4d09a134aa08ec15606cfa72d3ecfc33e1 +Subproject commit 5ee95565478f0f5671a397f94691b680165fc25a diff --git a/partiql-eval/src/eval/expr/mod.rs b/partiql-eval/src/eval/expr/mod.rs index 98834fa4..5fb2d889 100644 --- a/partiql-eval/src/eval/expr/mod.rs +++ b/partiql-eval/src/eval/expr/mod.rs @@ -10,6 +10,7 @@ use partiql_value::{ }; use regex::{Regex, RegexBuilder}; use rust_decimal::prelude::FromPrimitive; +use rust_decimal::RoundingStrategy; use std::borrow::{Borrow, Cow}; use std::fmt::Debug; @@ -952,10 +953,10 @@ impl EvalExpr for EvalFnExtractYear { Null => Null, Value::DateTime(dt) => match dt.as_ref() { DateTime::Date(d) => Value::from(d.year()), - DateTime::Timestamp(tstamp) => Value::from(tstamp.year()), - DateTime::TimestampWithTz(tstamp) => Value::from(tstamp.year()), - DateTime::Time(_) => Missing, - DateTime::TimeWithTz(_, _) => Missing, + DateTime::Timestamp(tstamp, _) => Value::from(tstamp.year()), + DateTime::TimestampWithTz(tstamp, _) => Value::from(tstamp.year()), + DateTime::Time(_, _) => Missing, + DateTime::TimeWithTz(_, _, _) => Missing, }, _ => Missing, }; @@ -977,10 +978,10 @@ impl EvalExpr for EvalFnExtractMonth { Null => Null, Value::DateTime(dt) => match dt.as_ref() { DateTime::Date(d) => Value::from(d.month() as u8), - DateTime::Timestamp(tstamp) => Value::from(tstamp.month() as u8), - DateTime::TimestampWithTz(tstamp) => Value::from(tstamp.month() as u8), - DateTime::Time(_) => Missing, - DateTime::TimeWithTz(_, _) => Missing, + DateTime::Timestamp(tstamp, _) => Value::from(tstamp.month() as u8), + DateTime::TimestampWithTz(tstamp, _) => Value::from(tstamp.month() as u8), + DateTime::Time(_, _) => Missing, + DateTime::TimeWithTz(_, _, _) => Missing, }, _ => Missing, }; @@ -1002,10 +1003,10 @@ impl EvalExpr for EvalFnExtractDay { Null => Null, Value::DateTime(dt) => match dt.as_ref() { DateTime::Date(d) => Value::from(d.day()), - DateTime::Timestamp(tstamp) => Value::from(tstamp.day()), - DateTime::TimestampWithTz(tstamp) => Value::from(tstamp.day()), - DateTime::Time(_) => Missing, - DateTime::TimeWithTz(_, _) => Missing, + DateTime::Timestamp(tstamp, _) => Value::from(tstamp.day()), + DateTime::TimestampWithTz(tstamp, _) => Value::from(tstamp.day()), + DateTime::Time(_, _) => Missing, + DateTime::TimeWithTz(_, _, _) => Missing, }, _ => Missing, }; @@ -1026,10 +1027,10 @@ impl EvalExpr for EvalFnExtractHour { let result = match value.borrow() { Null => Null, Value::DateTime(dt) => match dt.as_ref() { - DateTime::Time(t) => Value::from(t.hour()), - DateTime::TimeWithTz(t, _) => Value::from(t.hour()), - DateTime::Timestamp(tstamp) => Value::from(tstamp.hour()), - DateTime::TimestampWithTz(tstamp) => Value::from(tstamp.hour()), + DateTime::Time(t, _) => Value::from(t.hour()), + DateTime::TimeWithTz(t, _, _) => Value::from(t.hour()), + DateTime::Timestamp(tstamp, _) => Value::from(tstamp.hour()), + DateTime::TimestampWithTz(tstamp, _) => Value::from(tstamp.hour()), DateTime::Date(_) => Missing, }, _ => Missing, @@ -1051,10 +1052,10 @@ impl EvalExpr for EvalFnExtractMinute { let result = match value.borrow() { Null => Null, Value::DateTime(dt) => match dt.as_ref() { - DateTime::Time(t) => Value::from(t.minute()), - DateTime::TimeWithTz(t, _) => Value::from(t.minute()), - DateTime::Timestamp(tstamp) => Value::from(tstamp.minute()), - DateTime::TimestampWithTz(tstamp) => Value::from(tstamp.minute()), + DateTime::Time(t, _) => Value::from(t.minute()), + DateTime::TimeWithTz(t, _, _) => Value::from(t.minute()), + DateTime::Timestamp(tstamp, _) => Value::from(tstamp.minute()), + DateTime::TimestampWithTz(tstamp, _) => Value::from(tstamp.minute()), DateTime::Date(_) => Missing, }, _ => Missing, @@ -1069,10 +1070,17 @@ pub(crate) struct EvalFnExtractSecond { pub(crate) value: Box, } -fn total_seconds(second: u8, nanosecond: u32) -> Value { +fn total_seconds(second: u8, nanosecond: u32, precision: Option) -> Value { let result = rust_decimal::Decimal::from_f64(((second as f64 * 1e9) + nanosecond as f64) / 1e9) .expect("time as decimal"); - Value::from(result) + match precision { + None => Value::from(result), + Some(p) => { + // TODO: currently using `RoundingStrategy::MidpointAwayFromZero`, which follows what + // Kotlin does. Need to determine if this strategy is what we want or some configurability + Value::from(result.round_dp_with_strategy(p, RoundingStrategy::MidpointAwayFromZero)) + } + } } impl EvalExpr for EvalFnExtractSecond { @@ -1082,11 +1090,13 @@ impl EvalExpr for EvalFnExtractSecond { let result = match value.borrow() { Null => Null, Value::DateTime(dt) => match dt.as_ref() { - DateTime::Time(t) => total_seconds(t.second(), t.nanosecond()), - DateTime::TimeWithTz(t, _) => total_seconds(t.second(), t.nanosecond()), - DateTime::Timestamp(tstamp) => total_seconds(tstamp.second(), tstamp.nanosecond()), - DateTime::TimestampWithTz(tstamp) => { - total_seconds(tstamp.second(), tstamp.nanosecond()) + DateTime::Time(t, p) => total_seconds(t.second(), t.nanosecond(), *p), + DateTime::TimeWithTz(t, p, _) => total_seconds(t.second(), t.nanosecond(), *p), + DateTime::Timestamp(tstamp, p) => { + total_seconds(tstamp.second(), tstamp.nanosecond(), *p) + } + DateTime::TimestampWithTz(tstamp, p) => { + total_seconds(tstamp.second(), tstamp.nanosecond(), *p) } DateTime::Date(_) => Missing, }, @@ -1109,11 +1119,11 @@ impl EvalExpr for EvalFnExtractTimezoneHour { let result = match value.borrow() { Null => Null, Value::DateTime(dt) => match dt.as_ref() { - DateTime::TimeWithTz(_, tz) => Value::from(tz.whole_hours()), - DateTime::TimestampWithTz(tstamp) => Value::from(tstamp.offset().whole_hours()), + DateTime::TimeWithTz(_, _, tz) => Value::from(tz.whole_hours()), + DateTime::TimestampWithTz(tstamp, _) => Value::from(tstamp.offset().whole_hours()), DateTime::Date(_) => Missing, - DateTime::Time(_) => Missing, - DateTime::Timestamp(_) => Missing, + DateTime::Time(_, _) => Missing, + DateTime::Timestamp(_, _) => Missing, }, _ => Missing, }; @@ -1134,13 +1144,13 @@ impl EvalExpr for EvalFnExtractTimezoneMinute { let result = match value.borrow() { Null => Null, Value::DateTime(dt) => match dt.as_ref() { - DateTime::TimeWithTz(_, tz) => Value::from(tz.minutes_past_hour()), - DateTime::TimestampWithTz(tstamp) => { + DateTime::TimeWithTz(_, _, tz) => Value::from(tz.minutes_past_hour()), + DateTime::TimestampWithTz(tstamp, _) => { Value::from(tstamp.offset().minutes_past_hour()) } DateTime::Date(_) => Missing, - DateTime::Time(_) => Missing, - DateTime::Timestamp(_) => Missing, + DateTime::Time(_, _) => Missing, + DateTime::Timestamp(_, _) => Missing, }, _ => Missing, }; diff --git a/partiql-logical-planner/src/lower.rs b/partiql-logical-planner/src/lower.rs index 278a335f..72694d8b 100644 --- a/partiql-logical-planner/src/lower.rs +++ b/partiql-logical-planner/src/lower.rs @@ -10,7 +10,7 @@ use partiql_ast::ast::{ InsertValue, Item, Join, JoinKind, JoinSpec, Like, List, Lit, NodeId, NullOrderingSpec, OnConflict, OrderByExpr, OrderingSpec, Path, PathStep, ProjectExpr, Projection, ProjectionKind, Query, QuerySet, Remove, SearchedCase, Select, Set, SetExpr, SetQuantifier, Sexp, SimpleCase, - SortSpec, Struct, SymbolPrimitive, UniOp, UniOpKind, VarRef, + SortSpec, Struct, SymbolPrimitive, Type, UniOp, UniOpKind, VarRef, }; use partiql_ast::visit::{Visit, Visitor}; use partiql_logical as logical; @@ -20,7 +20,7 @@ use partiql_logical::{ PatternMatchExpr, SortSpecOrder, TupleExpr, ValueExpr, }; -use partiql_value::{BindingsName, Value}; +use partiql_value::{BindingsName, DateTime, Value}; use std::collections::{HashMap, HashSet}; @@ -843,7 +843,18 @@ impl<'ast> Visitor<'ast> for AstToLogical { Lit::BitStringLit(_) => todo!("BitStringLit"), Lit::HexStringLit(_) => todo!("HexStringLit"), Lit::CollectionLit(_) => todo!("CollectionLit"), - Lit::TypedLit(_, _) => todo!("TypedLit"), + Lit::TypedLit(s, t) => match t { + Type::DateType => Value::DateTime(Box::new(DateTime::from_yyyy_mm_dd(s))), + Type::TimeType(p) => Value::DateTime(Box::new(DateTime::from_hh_mm_ss(s, p))), + Type::TimeTypeWithTimeZone(p) => { + Value::DateTime(Box::new(DateTime::from_hh_mm_ss_time_zone(s, p))) + } + Type::TimestampType(p) => Value::DateTime(Box::new(DateTime::from_hh_mm_ss(s, p))), + Type::ZonedTimestampType(p) => { + Value::DateTime(Box::new(DateTime::from_hh_mm_ss_time_zone(s, p))) + } + _ => todo!("Other types"), + }, }; self.push_value(val); } diff --git a/partiql-parser/src/parse/partiql.lalrpop b/partiql-parser/src/parse/partiql.lalrpop index 19d6743f..abec7449 100644 --- a/partiql-parser/src/parse/partiql.lalrpop +++ b/partiql-parser/src/parse/partiql.lalrpop @@ -1184,14 +1184,7 @@ LiteralIon: ast::Lit = { } #[inline] -TypeKeywordStr: &'static str = { - "DATE" => "DATE", - "TIME" => "TIME", - "TIMESTAMP" => "TIMESTAMP", - "WITH" => "WITH", - "WITHOUT" => "WITHOUT", - "ZONE" => "ZONE", -} +TypeKeywordStr: &'static str = {} #[inline] TypeKeyword: ast::SymbolPrimitive = { @@ -1209,8 +1202,68 @@ TypeNamePart: ast::CustomTypePart = { "(" > ")" => ast::CustomTypePart::Parameterized( id, args ), } +#[inline] +TimePrecision: &'input str = { + "(" ")" => p +} + #[inline] TypeName: ast::Type = { + "DATE" => ast::Type::DateType, + "TIME" => { + match p { + None => ast::Type::TimeType(None), + Some(p) => { + let precision = p.parse::().unwrap(); + ast::Type::TimeType(Some(precision)) + } + } + }, + "TIME" "WITH" "TIME" "ZONE" => { + match p { + None => ast::Type::TimeTypeWithTimeZone(None), + Some(p) => { + let precision = p.parse::().unwrap(); + ast::Type::TimeTypeWithTimeZone(Some(precision)) + } + } + }, + "TIME" "WITHOUT" "TIME" "ZONE" => { + match p { + None => ast::Type::TimeType(None), + Some(p) => { + let precision = p.parse::().unwrap(); + ast::Type::TimeType(Some(precision)) + } + } + }, + "TIMESTAMP" => { + match p { + None => ast::Type::TimestampType(None), + Some(p) => { + let precision = p.parse::().unwrap(); + ast::Type::TimestampType(Some(precision)) + } + } + }, + "TIMESTAMP" "WITH" "TIME" "ZONE" => { + match p { + None => ast::Type::ZonedTimestampType(None), + Some(p) => { + let precision = p.parse::().unwrap(); + ast::Type::ZonedTimestampType(Some(precision)) + } + } + }, + "TIMESTAMP" "WITHOUT" "TIME" "ZONE" => { + match p { + None => ast::Type::TimestampType(None), + Some(p) => { + let precision = p.parse::().unwrap(); + ast::Type::TimestampType(Some(precision)) + } + } + }, => ast::Type::CustomType( ast::CustomType{ parts } ), } diff --git a/partiql-value/Cargo.toml b/partiql-value/Cargo.toml index 67aa5072..42d409eb 100644 --- a/partiql-value/Cargo.toml +++ b/partiql-value/Cargo.toml @@ -28,7 +28,7 @@ rust_decimal = { version = "1.25.0", default-features = false, features = ["std" rust_decimal_macros = "1.26" serde = { version = "1.*", features = ["derive"], optional = true } ion-rs = "0.16" -time = { version = "0.3", features = ["macros", "serde"] } +time = { version = "0.3", features = ["macros", "serde", "parsing"] } once_cell = "1" regex = "1.7" diff --git a/partiql-value/src/datetime.rs b/partiql-value/src/datetime.rs index 67fd2877..747461dc 100644 --- a/partiql-value/src/datetime.rs +++ b/partiql-value/src/datetime.rs @@ -4,21 +4,25 @@ use std::cmp::Ordering; use std::fmt::{Debug, Formatter}; use std::hash::Hash; use std::num::NonZeroU8; +use time::macros::format_description; use time::{Duration, UtcOffset}; #[derive(Hash, PartialEq, Eq, Clone)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum DateTime { Date(time::Date), - Time(time::Time), - TimeWithTz(time::Time, time::UtcOffset), - Timestamp(time::PrimitiveDateTime), - TimestampWithTz(time::OffsetDateTime), + Time(time::Time, Option), + TimeWithTz(time::Time, Option, time::UtcOffset), + Timestamp(time::PrimitiveDateTime, Option), + TimestampWithTz(time::OffsetDateTime, Option), } impl DateTime { pub fn from_hms(hour: u8, minute: u8, second: u8) -> Self { - DateTime::Time(time::Time::from_hms(hour, minute, second).expect("valid time value")) + DateTime::Time( + time::Time::from_hms(hour, minute, second).expect("valid time value"), + None, + ) } pub fn from_hms_nano(hour: u8, minute: u8, second: u8, nanosecond: u32) -> Self { @@ -63,11 +67,11 @@ impl DateTime { let date = time::Date::from_calendar_date(year, month, day).expect("valid ymd"); let time = time_from_hms_nano(hour, minute, second, nanosecond); match offset { - None => DateTime::Timestamp(date.with_time(time)), + None => DateTime::Timestamp(date.with_time(time), None), Some(o) => { let offset = UtcOffset::from_whole_seconds(o * 60).expect("offset in range"); let date = date.with_time(time).assume_offset(offset); - DateTime::TimestampWithTz(date) + DateTime::TimestampWithTz(date, None) } } } @@ -81,10 +85,49 @@ impl DateTime { ) -> Self { let time = time_from_hms_nano(hour, minute, second, nanosecond); match offset { - Some(offset) => DateTime::TimeWithTz(time, offset), - None => DateTime::Time(time), + Some(offset) => DateTime::TimeWithTz(time, None, offset), + None => DateTime::Time(time, None), } } + + pub fn from_yyyy_mm_dd(date: &str) -> Self { + let format = format_description!("[year]-[month]-[day]"); + let date = time::Date::parse(date, &format).expect("valid date string"); + DateTime::Date(date) + } + + pub fn from_hh_mm_ss(time: &str, precision: &Option) -> Self { + let format = format_description!("[hour]:[minute]:[second].[subsecond]"); + let time = time::Time::parse(time, &format).expect("valid time string"); + DateTime::Time(time, *precision) + } + + pub fn from_hh_mm_ss_time_zone(time: &str, precision: &Option) -> Self { + let time_format = format_description!( + "[hour]:[minute]:[second].[subsecond][offset_hour]:[offset_minute]" + ); + let time_part = time::Time::parse(time, &time_format).expect("valid time with time zone"); + let time_format = format_description!( + "[hour]:[minute]:[second].[subsecond][offset_hour]:[offset_minute]" + ); + let offset_part = time::UtcOffset::parse(time, &time_format).expect("valid time zone"); + DateTime::TimeWithTz(time_part, *precision, offset_part) + } + + pub fn from_yyyy_mm_dd_hh_mm_ss(timestamp: &str, precision: &Option) -> Self { + let format = + format_description!("[year]-[month]-[day] [hour]:[minute]:[second].[subsecond]"); + let time = + time::PrimitiveDateTime::parse(timestamp, &format).expect("valid timestamp string"); + DateTime::Timestamp(time, *precision) + } + + pub fn from_yyyy_mm_dd_hh_mm_ss_time_zone(timestamp: &str, precision: &Option) -> Self { + let format = format_description!("[year]-[month]-[day] [hour]:[minute]:[second].[subsecond][offset_hour]:[offset_minute]"); + let time = time::OffsetDateTime::parse(timestamp, &format) + .expect("valid timestamp string with time zone"); + DateTime::TimestampWithTz(time, *precision) + } } fn time_from_hms_nano(hour: u8, minute: u8, second: u8, nanosecond: u32) -> time::Time { @@ -97,18 +140,22 @@ impl Debug for DateTime { DateTime::Date(d) => { write!(f, "DATE '{d:?}'") } - DateTime::Time(t) => { - write!(f, "TIME '{t:?}'") - } - DateTime::TimeWithTz(t, tz) => { - write!(f, "TIME WITH TIME ZONE '{t:?} {tz:?}'") - } - DateTime::Timestamp(dt) => { - write!(f, "TIMESTAMP '{dt:?}'") - } - DateTime::TimestampWithTz(dt) => { - write!(f, "TIMESTAMP WITH TIME ZONE '{dt:?}'") - } + DateTime::Time(t, p) => match p { + None => write!(f, "TIME '{t:?}'"), + Some(p) => write!(f, "TIME ({p:?}) '{t:?}'"), + }, + DateTime::TimeWithTz(t, p, tz) => match p { + None => write!(f, "TIME WITH TIME ZONE '{t:?} {tz:?}'"), + Some(p) => write!(f, "TIME ({p:?}) WITH TIME ZONE '{t:?} {tz:?}'"), + }, + DateTime::Timestamp(dt, p) => match p { + None => write!(f, "TIMESTAMP '{dt:?}'"), + Some(p) => write!(f, "TIMESTAMP ({p:?}) '{dt:?}'"), + }, + DateTime::TimestampWithTz(dt, p) => match p { + None => write!(f, "TIMESTAMP WITH TIME ZONE '{dt:?}'"), + Some(p) => write!(f, "TIMESTAMP ({p:?}) WITH TIME ZONE '{dt:?}'"), + }, } } } @@ -127,25 +174,29 @@ impl Ord for DateTime { (DateTime::Date(_), _) => Ordering::Less, (_, DateTime::Date(_)) => Ordering::Greater, - (DateTime::Time(l), DateTime::Time(r)) => l.cmp(r), - (DateTime::Time(_), _) => Ordering::Less, - (_, DateTime::Time(_)) => Ordering::Greater, + (DateTime::Time(l, _lp), DateTime::Time(r, _rp)) => l.cmp(r), + // TODO: sorting using the time precisions + (DateTime::Time(_, _), _) => Ordering::Less, + (_, DateTime::Time(_, _)) => Ordering::Greater, - (DateTime::TimeWithTz(l, lo), DateTime::TimeWithTz(r, ro)) => { + (DateTime::TimeWithTz(l, _lp, lo), DateTime::TimeWithTz(r, _rp, ro)) => { + // TODO: sorting using the time precisions let lod = Duration::new(lo.whole_seconds() as i64, 0); let rod = Duration::new(ro.whole_seconds() as i64, 0); let l_adjusted = *l + lod; let r_adjusted = *r + rod; l_adjusted.cmp(&r_adjusted) } - (DateTime::TimeWithTz(_, _), _) => Ordering::Less, - (_, DateTime::TimeWithTz(_, _)) => Ordering::Greater, + (DateTime::TimeWithTz(_, _, _), _) => Ordering::Less, + (_, DateTime::TimeWithTz(_, _, _)) => Ordering::Greater, - (DateTime::Timestamp(l), DateTime::Timestamp(r)) => l.cmp(r), - (DateTime::Timestamp(_), _) => Ordering::Less, - (_, DateTime::Timestamp(_)) => Ordering::Greater, + // TODO: sorting using the timestamp precisions + (DateTime::Timestamp(l, _lp), DateTime::Timestamp(r, _rp)) => l.cmp(r), + (DateTime::Timestamp(_, _), _) => Ordering::Less, + (_, DateTime::Timestamp(_, _)) => Ordering::Greater, - (DateTime::TimestampWithTz(l), DateTime::TimestampWithTz(r)) => l.cmp(r), + // TODO: sorting using the timestamp precisions + (DateTime::TimestampWithTz(l, _lp), DateTime::TimestampWithTz(r, _rp)) => l.cmp(r), } } }