Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support parenthesized expression in filter #888

Merged
merged 4 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/ppl-lang/PPL-Example-Commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ _- **Limitation: new field added by eval command with a function cannot be dropp
- `source = table | where a < 1 | fields a,b,c`
- `source = table | where b != 'test' | fields a,b,c`
- `source = table | where c = 'test' | fields a,b,c | head 3`
- `source = table | where c = 'test' AND a = 1 | fields a,b,c`
- `source = table | where c != 'test' OR a > 1 | fields a,b,c`
- `source = table | where (b > 1 OR a > 1) AND c != 'test' | fields a,b,c`
- `source = table | where c = 'test' NOT a > 1 | fields a,b,c` - Note: "AND" is optional
- `source = table | where ispresent(b)`
- `source = table | where isnull(coalesce(a, b)) | fields a,b,c | head 3`
- `source = table | where isempty(a)`
Expand Down
13 changes: 5 additions & 8 deletions docs/ppl-lang/ppl-where-command.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ PPL query:
### Additional Examples

#### **Filters With Logical Conditions**
```
- `source = table | where c = 'test' AND a = 1 | fields a,b,c`
- `source = table | where c != 'test' OR a > 1 | fields a,b,c | head 1`
- `source = table | where c = 'test' NOT a > 1 | fields a,b,c`
- `source = table | where a = 1 | fields a,b,c`
- `source = table | where a >= 1 | fields a,b,c`
- `source = table | where a < 1 | fields a,b,c`
- `source = table | where b != 'test' | fields a,b,c`
- `source = table | where c = 'test' | fields a,b,c | head 3`
- `source = table | where c = 'test' AND a = 1 | fields a,b,c`
- `source = table | where c != 'test' OR a > 1 | fields a,b,c`
- `source = table | where (b > 1 OR a > 1) AND c != 'test' | fields a,b,c`
- `source = table | where c = 'test' NOT a > 1 | fields a,b,c` - Note: "AND" is optional
- `source = table | where ispresent(b)`
- `source = table | where isnull(coalesce(a, b)) | fields a,b,c | head 3`
- `source = table | where isempty(a)`
Expand All @@ -45,7 +45,6 @@ PPL query:
- `source = table | where b not between '2024-09-10' and '2025-09-10'` - Note: This returns b >= '2024-09-10' and b <= '2025-09-10'
- `source = table | where cidrmatch(ip, '192.169.1.0/24')`
- `source = table | where cidrmatch(ipv6, '2003:db8::/32')`

- `source = table | eval status_category =
case(a >= 200 AND a < 300, 'Success',
a >= 300 AND a < 400, 'Redirection',
Expand All @@ -57,10 +56,8 @@ PPL query:
a >= 400 AND a < 500, 'Client Error',
a >= 500, 'Server Error'
else 'Incorrect HTTP status code'
) = 'Incorrect HTTP status code'

) = 'Incorrect HTTP status code'`
- `source = table
| eval factor = case(a > 15, a - 14, isnull(b), a - 7, a < 3, a + 1 else 1)
| where case(factor = 2, 'even', factor = 4, 'even', factor = 6, 'even', factor = 8, 'even' else 'odd') = 'even'
| stats count() by factor`
```
47 changes: 26 additions & 21 deletions integ-test/src/integration/resources/tpch/q19.ppl
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,30 @@ where
*/

source = lineitem
| join ON p_partkey = l_partkey
and p_brand = 'Brand#12'
and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
and l_quantity >= 1 and l_quantity <= 1 + 10
and p_size between 1 and 5
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
OR p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
and l_quantity >= 10 and l_quantity <= 10 + 10
and p_size between 1 and 10
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
OR p_partkey = l_partkey
and p_brand = 'Brand#34'
and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
and l_quantity >= 20 and l_quantity <= 20 + 10
and p_size between 1 and 15
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
| join ON
(
p_partkey = l_partkey
and p_brand = 'Brand#12'
and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
and l_quantity >= 1 and l_quantity <= 1 + 10
and p_size between 1 and 5
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
) OR (
p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
and l_quantity >= 10 and l_quantity <= 10 + 10
and p_size between 1 and 10
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
) OR (
p_partkey = l_partkey
and p_brand = 'Brand#34'
and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
and l_quantity >= 20 and l_quantity <= 20 + 10
and p_size between 1 and 15
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
part
2 changes: 1 addition & 1 deletion integ-test/src/integration/resources/tpch/q7.ppl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ source = [
| join ON s_nationkey = n1.n_nationkey nation as n1
| join ON c_nationkey = n2.n_nationkey nation as n2
| where l_shipdate between date('1995-01-01') and date('1996-12-31')
and n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY' or n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE'
and ((n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE'))
| eval supp_nation = n1.n_name, cust_nation = n2.n_name, l_year = year(l_shipdate), volume = l_extendedprice * (1 - l_discount)
| fields supp_nation, cust_nation, l_year, volume
] as shipping
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -467,4 +467,96 @@ class FlintSparkPPLFiltersITSuite
val expectedPlan = Project(Seq(UnresolvedAttribute("state")), filter)
comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
}

test("test parenthesis in filter") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LantaoJin can u plz add a nested parenthesis test case ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in addition - can u plz add a correspondent logical unit test under the PPL*** relevant tests ?

val frame = sql(s"""
| source = $testTable | where country = 'Canada' or age > 60 and age < 25 | fields name, age, country
| """.stripMargin)
assertSameRows(Seq(Row("John", 25, "Canada"), Row("Jane", 20, "Canada")), frame)

val frameWithParenthesis = sql(s"""
| source = $testTable | where (country = 'Canada' or age > 60) and age < 25 | fields name, age, country
| """.stripMargin)
assertSameRows(Seq(Row("Jane", 20, "Canada")), frameWithParenthesis)

val logicalPlan: LogicalPlan = frameWithParenthesis.queryExecution.logical
val table = UnresolvedRelation(Seq("spark_catalog", "default", "flint_ppl_test"))
val filter = Filter(
And(
Or(
EqualTo(UnresolvedAttribute("country"), Literal("Canada")),
GreaterThan(UnresolvedAttribute("age"), Literal(60))),
LessThan(UnresolvedAttribute("age"), Literal(25))),
table)
val expectedPlan = Project(
Seq(
UnresolvedAttribute("name"),
UnresolvedAttribute("age"),
UnresolvedAttribute("country")),
filter)
comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
}

test("test complex and nested parenthesis in filter") {
val frame1 = sql(s"""
| source = $testTable | WHERE (age > 18 AND (state = 'California' OR state = 'New York'))
| """.stripMargin)
assertSameRows(
Seq(
Row("Hello", 30, "New York", "USA", 2023, 4),
Row("Jake", 70, "California", "USA", 2023, 4)),
frame1)

val frame2 = sql(s"""
| source = $testTable | WHERE ((((age > 18) AND ((((state = 'California') OR state = 'New York'))))))
| """.stripMargin)
assertSameRows(
Seq(
Row("Hello", 30, "New York", "USA", 2023, 4),
Row("Jake", 70, "California", "USA", 2023, 4)),
frame2)

val frame3 = sql(s"""
| source = $testTable | WHERE (year = 2023 AND (month BETWEEN 1 AND 6)) AND (age >= 31 OR country = 'Canada')
| """.stripMargin)
assertSameRows(
Seq(
Row("John", 25, "Ontario", "Canada", 2023, 4),
Row("Jake", 70, "California", "USA", 2023, 4),
Row("Jane", 20, "Quebec", "Canada", 2023, 4)),
frame3)

val frame4 = sql(s"""
| source = $testTable | WHERE ((state = 'Texas' OR state = 'California') AND (age < 30 OR (country = 'USA' AND year > 2020)))
| """.stripMargin)
assertSameRows(Seq(Row("Jake", 70, "California", "USA", 2023, 4)), frame4)

val frame5 = sql(s"""
| source = $testTable | WHERE (LIKE(LOWER(name), 'a%') OR LIKE(LOWER(name), 'j%')) AND (LENGTH(state) > 6 OR (country = 'USA' AND age > 18))
| """.stripMargin)
assertSameRows(
Seq(
Row("John", 25, "Ontario", "Canada", 2023, 4),
Row("Jake", 70, "California", "USA", 2023, 4)),
frame5)

val frame6 = sql(s"""
| source = $testTable | WHERE (age BETWEEN 25 AND 40) AND ((state IN ('California', 'New York', 'Texas') AND year = 2023) OR (country != 'USA' AND (month = 1 OR month = 12)))
| """.stripMargin)
assertSameRows(Seq(Row("Hello", 30, "New York", "USA", 2023, 4)), frame6)

val frame7 = sql(s"""
| source = $testTable | WHERE NOT (age < 18 OR (state = 'Alaska' AND year < 2020)) AND (country = 'USA' OR (country = 'Mexico' AND month BETWEEN 6 AND 8))
| """.stripMargin)
assertSameRows(
Seq(
Row("Jake", 70, "California", "USA", 2023, 4),
Row("Hello", 30, "New York", "USA", 2023, 4)),
frame7)

val frame8 = sql(s"""
| source = $testTable | WHERE (NOT (year < 2020 OR age < 18)) AND ((state = 'Texas' AND month % 2 = 0) OR (country = 'Mexico' AND (year = 2023 OR (year = 2022 AND month > 6))))
| """.stripMargin)
assertSameRows(Seq(), frame8)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ expression

logicalExpression
: NOT logicalExpression # logicalNot
| LT_PRTHS logicalExpression RT_PRTHS # parentheticLogicalExpr
| comparisonExpression # comparsion
| left = logicalExpression (AND)? right = logicalExpression # logicalAnd
| left = logicalExpression OR right = logicalExpression # logicalOr
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ public UnresolvedExpression visitBinaryArithmetic(OpenSearchPPLParser.BinaryArit
ctx.binaryOperator.getText(), Arrays.asList(visit(ctx.left), visit(ctx.right)));
}

@Override
public UnresolvedExpression visitParentheticLogicalExpr(OpenSearchPPLParser.ParentheticLogicalExprContext ctx) {
return visit(ctx.logicalExpression()); // Discard parenthesis around
}

@Override
public UnresolvedExpression visitParentheticValueExpr(OpenSearchPPLParser.ParentheticValueExprContext ctx) {
return visit(ctx.valueExpression()); // Discard parenthesis around
Expand Down
Loading
Loading