Skip to content

Commit

Permalink
Fix aggregates for 0.2.0
Browse files Browse the repository at this point in the history
ndc spec expects sum aggregates return a scalar represented as either return f64 or i64
Because ndc-postgres represents i64 as a string, we only mark sum aggregates returning a f64

any other sum aggregate will function as a custom aggregate and have no special meaning

additionally, we wrap SUM with `COALESCE(SUM(col), 0)` to ensure we return 0 when aggregating over no rows.

similarly, we only mark avg functions returning a f64, and treat any other avg as a custom aggregate
  • Loading branch information
BenoitRanque committed Jan 4, 2025
1 parent 8a64bc6 commit 2f470ec
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 115 deletions.
20 changes: 14 additions & 6 deletions crates/connectors/ndc-postgres/src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,24 @@ pub fn get_schema(
.map(|(function_name, function_definition)| {
(
function_name.clone(),
match function_name.as_str() {
"max" => models::AggregateFunctionDefinition::Max,
"min" => models::AggregateFunctionDefinition::Min,
"sum" => models::AggregateFunctionDefinition::Sum {
match (
function_name.as_str(),
function_definition.return_type.as_str(),
) {
// Mark SUM aggregations returning a f64 (float8) with the meaning tag.
// The spec wants SUM aggregations to return scalars represented as either f64 or i64
// i64 (int8) is represented as a string, so we omit it here
("sum", "float8") => models::AggregateFunctionDefinition::Sum {
result_type: function_definition.return_type.clone().into(),
},
"average" => models::AggregateFunctionDefinition::Average {
("max", _) => models::AggregateFunctionDefinition::Max,
("min", _) => models::AggregateFunctionDefinition::Min,
// Mark AVG aggregations returning a f64 (float8) with the meaning tag
// The spec wants all averages to return a scalar represented as a f64
("avg", "float8") => models::AggregateFunctionDefinition::Average {
result_type: function_definition.return_type.clone().into(),
},
_ => models::AggregateFunctionDefinition::Custom {
(_, _) => models::AggregateFunctionDefinition::Custom {
result_type: models::Type::Nullable {
// It turns out that all aggregates defined for postgres
// (_except_ `COUNT`) will return `NULL` for an empty row set.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,23 @@ pub fn translate(
},
),
);
sql::ast::Expression::FunctionCall {
let aggregate_function_call_expression = sql::ast::Expression::FunctionCall {
function: sql::ast::Function::Unknown(function.to_string()),
args: vec![column],
};
// postgres SUM aggregate returns null if no input rows are provided
// however, the ndc spec requires that SUM aggregates over no input rows return 0
// we achieve this with COALESCE, falling back to 0 if the aggregate expression returns null
if function.as_str() == "sum" {
sql::ast::Expression::FunctionCall {
function: sql::ast::Function::Coalesce,
args: vec![
aggregate_function_call_expression,
sql::ast::Expression::Value(sql::ast::Value::Int4(0)),
],
}
} else {
aggregate_function_call_expression
}
}
models::Aggregate::StarCount {} => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
source: crates/query-engine/translation/tests/tests.rs
expression: result
snapshot_kind: text
---
SELECT
coalesce(json_agg(row_to_json("%3_universe")), '[]') AS "universe"
Expand All @@ -23,7 +24,7 @@ FROM
COUNT("%2_Invoice"."InvoiceId") AS "InvoiceId_count",
min("%2_Invoice"."Total") AS "Total__min",
max("%2_Invoice"."Total") AS "Total__max",
sum("%2_Invoice"."Total") AS "Total__sum",
coalesce(sum("%2_Invoice"."Total"), 0) AS "Total__sum",
stddev("%2_Invoice"."Total") AS "Total__stddev",
COUNT(*) AS "count_all"
FROM
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
---
source: crates/tests/databases-tests/src/citus/schema_tests.rs
assertion_line: 7
expression: result
snapshot_kind: text
---
Expand Down Expand Up @@ -538,8 +537,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "int8"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "int8"
}
}
},
"var_pop": {
"type": "custom",
Expand Down Expand Up @@ -622,14 +627,8 @@ snapshot_kind: text
},
"aggregate_functions": {
"avg": {
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "float8"
}
}
"type": "average",
"result_type": "float8"
},
"max": {
"type": "max"
Expand Down Expand Up @@ -668,8 +667,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "float4"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "float4"
}
}
},
"var_pop": {
"type": "custom",
Expand Down Expand Up @@ -752,14 +757,8 @@ snapshot_kind: text
},
"aggregate_functions": {
"avg": {
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "float8"
}
}
"type": "average",
"result_type": "float8"
},
"max": {
"type": "max"
Expand Down Expand Up @@ -958,8 +957,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "int8"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "int8"
}
}
},
"var_pop": {
"type": "custom",
Expand Down Expand Up @@ -1118,8 +1123,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "int8"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "int8"
}
}
},
"var_pop": {
"type": "custom",
Expand Down Expand Up @@ -1278,8 +1289,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "numeric"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "numeric"
}
}
},
"var_pop": {
"type": "custom",
Expand Down Expand Up @@ -1378,8 +1395,14 @@ snapshot_kind: text
"type": "min"
},
"sum": {
"type": "sum",
"result_type": "interval"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "interval"
}
}
}
},
"comparison_operators": {
Expand Down Expand Up @@ -1478,8 +1501,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "numeric"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "numeric"
}
}
},
"var_pop": {
"type": "custom",
Expand Down Expand Up @@ -1704,8 +1733,14 @@ snapshot_kind: text
"type": "min"
},
"sum": {
"type": "sum",
"result_type": "interval"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "interval"
}
}
}
},
"comparison_operators": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
---
source: crates/tests/databases-tests/src/cockroach/schema_tests.rs
assertion_line: 7
expression: result
snapshot_kind: text
---
Expand Down Expand Up @@ -361,14 +360,8 @@ snapshot_kind: text
},
"aggregate_functions": {
"avg": {
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "float8"
}
}
"type": "average",
"result_type": "float8"
},
"sqrdiff": {
"type": "custom",
Expand Down Expand Up @@ -495,14 +488,8 @@ snapshot_kind: text
},
"aggregate_functions": {
"avg": {
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "float8"
}
}
"type": "average",
"result_type": "float8"
},
"sqrdiff": {
"type": "custom",
Expand Down Expand Up @@ -629,14 +616,8 @@ snapshot_kind: text
},
"aggregate_functions": {
"avg": {
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "float8"
}
}
"type": "average",
"result_type": "float8"
},
"bit_and": {
"type": "custom",
Expand Down Expand Up @@ -803,14 +784,8 @@ snapshot_kind: text
},
"aggregate_functions": {
"avg": {
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "float8"
}
}
"type": "average",
"result_type": "float8"
},
"bit_and": {
"type": "custom",
Expand Down Expand Up @@ -1047,8 +1022,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "numeric"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "numeric"
}
}
},
"sum_int": {
"type": "custom",
Expand Down Expand Up @@ -1161,8 +1142,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "interval"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "interval"
}
}
}
},
"comparison_operators": {
Expand Down Expand Up @@ -1265,8 +1252,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "numeric"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "numeric"
}
}
},
"var_pop": {
"type": "custom",
Expand Down Expand Up @@ -1517,8 +1510,14 @@ snapshot_kind: text
}
},
"sum": {
"type": "sum",
"result_type": "interval"
"type": "custom",
"result_type": {
"type": "nullable",
"underlying_type": {
"type": "named",
"name": "interval"
}
}
}
},
"comparison_operators": {
Expand Down
Loading

0 comments on commit 2f470ec

Please sign in to comment.