Skip to content

Commit

Permalink
test: add regression test for unnesting dictionary encoded columns (#…
Browse files Browse the repository at this point in the history
…14395)

* chore: add regression test for unnest dict encoded cols

* chore: use dataframe api for testing

* chore: rm unused dep
  • Loading branch information
duongcongtoai authored Feb 2, 2025
1 parent ca9eef1 commit fa0874b
Showing 1 changed file with 67 additions and 0 deletions.
67 changes: 67 additions & 0 deletions datafusion/core/tests/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use datafusion_functions_aggregate::count::count_udaf;
use datafusion_functions_aggregate::expr_fn::{
array_agg, avg, count, count_distinct, max, median, min, sum,
};
use datafusion_functions_nested::make_array::make_array_udf;
use datafusion_functions_window::expr_fn::{first_value, row_number};
use object_store::local::LocalFileSystem;
use sqlparser::ast::NullTreatment;
Expand Down Expand Up @@ -3358,6 +3359,72 @@ async fn unnest_columns() -> Result<()> {
Ok(())
}

#[tokio::test]
async fn unnest_dict_encoded_columns() -> Result<()> {
let strings = vec!["x", "y", "z"];
let keys = Int32Array::from_iter(0..strings.len() as i32);

let utf8_values = StringArray::from(strings.clone());
let utf8_dict = DictionaryArray::new(keys.clone(), Arc::new(utf8_values));

let make_array_udf_expr1 = make_array_udf().call(vec![col("column1")]);
let batch =
RecordBatch::try_from_iter(vec![("column1", Arc::new(utf8_dict) as ArrayRef)])?;

let ctx = SessionContext::new();
ctx.register_batch("test", batch)?;
let df = ctx
.table("test")
.await?
.select(vec![
make_array_udf_expr1.alias("make_array_expr"),
col("column1"),
])?
.unnest_columns(&["make_array_expr"])?;

let results = df.collect().await.unwrap();
let expected = [
"+-----------------+---------+",
"| make_array_expr | column1 |",
"+-----------------+---------+",
"| x | x |",
"| y | y |",
"| z | z |",
"+-----------------+---------+",
];
assert_batches_eq!(expected, &results);

// make_array(dict_encoded_string,literal string)
let make_array_udf_expr2 = make_array_udf().call(vec![
col("column1"),
lit(ScalarValue::new_utf8("fixed_string")),
]);
let df = ctx
.table("test")
.await?
.select(vec![
make_array_udf_expr2.alias("make_array_expr"),
col("column1"),
])?
.unnest_columns(&["make_array_expr"])?;

let results = df.collect().await.unwrap();
let expected = [
"+-----------------+---------+",
"| make_array_expr | column1 |",
"+-----------------+---------+",
"| x | x |",
"| fixed_string | x |",
"| y | y |",
"| fixed_string | y |",
"| z | z |",
"| fixed_string | z |",
"+-----------------+---------+",
];
assert_batches_eq!(expected, &results);
Ok(())
}

#[tokio::test]
async fn unnest_column_nulls() -> Result<()> {
let df = table_with_lists_and_nulls().await?;
Expand Down

0 comments on commit fa0874b

Please sign in to comment.