forked from duckdb/duckdb-iceberg
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request duckdb#59 from motherduckdb/pb/generate-small-data
add generated_data to repo to facilitate CI
- Loading branch information
Showing
4 changed files
with
33 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 25 additions & 27 deletions
52
...ceberg_scan_generated_data_0_01.test_slow → ...ql/iceberg_scan_generated_data_0_001.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,110 +1,108 @@ | ||
# name: test/sql/iceberg_scan_generated_data_0_01.test_slow | ||
# description: test iceberg extension with the sf0.01 generated test set | ||
# name: test/sql/iceberg_scan_generated_data_0_001.test_slow | ||
# description: test iceberg extension with the sf0.001 generated test set | ||
# group: [iceberg] | ||
|
||
require parquet | ||
|
||
require iceberg | ||
|
||
require-env DUCKDB_ICEBERG_HAVE_TEST_DATA | ||
|
||
### Iceberg spec v1 | ||
|
||
# Check count matches | ||
query I | ||
SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_01/pyspark_iceberg_table'); | ||
SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table'); | ||
---- | ||
<FILE>:data/iceberg/generated_spec1_0_01/expected_results/last/count.csv | ||
<FILE>:data/iceberg/generated_spec1_0_001/expected_results/last/count.csv | ||
|
||
# Check schema is identical, sorting by uuid to guarantee unique order | ||
query I nosort q1-schema | ||
DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_01/pyspark_iceberg_table') ORDER BY uuid; | ||
DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table') ORDER BY uuid; | ||
---- | ||
|
||
query I nosort q1-schema | ||
DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_01/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
---- | ||
|
||
query I nosort q1-schema | ||
DESCRIBE SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec1_0_01/expected_results/q08/data/*.parquet') ORDER BY uuid; | ||
DESCRIBE SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec1_0_001/expected_results/last/data/*.parquet') ORDER BY uuid; | ||
---- | ||
|
||
# Check data is identical, sorting by uuid to guarantee unique order | ||
query I nosort q1-data | ||
SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_01/pyspark_iceberg_table') ORDER BY uuid; | ||
SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table') ORDER BY uuid; | ||
---- | ||
|
||
query I nosort q1-data | ||
SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_01/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
---- | ||
|
||
query I nosort q1-data | ||
SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec1_0_01/expected_results/q08/data/*.parquet') ORDER BY uuid; | ||
SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec1_0_001/expected_results/last/data/*.parquet') ORDER BY uuid; | ||
---- | ||
|
||
# Confirm the type matches that of the iceberg schema | ||
query IIIIII | ||
DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_01/pyspark_iceberg_table') ORDER BY uuid; | ||
DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table') ORDER BY uuid; | ||
---- | ||
schema_evol_added_col_1 BIGINT YES NULL NULL NULL | ||
|
||
### Iceberg spec v2 | ||
|
||
# Check count matches | ||
query I | ||
SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_01/pyspark_iceberg_table'); | ||
SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table'); | ||
---- | ||
<FILE>:data/iceberg/generated_spec2_0_01/expected_results/last/count.csv | ||
<FILE>:data/iceberg/generated_spec2_0_001/expected_results/last/count.csv | ||
|
||
# We should also be able to scan the metadata file directly | ||
query I | ||
SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_01/pyspark_iceberg_table/metadata/v9.metadata.json'); | ||
SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json'); | ||
---- | ||
<FILE>:data/iceberg/generated_spec2_0_01/expected_results/last/count.csv | ||
<FILE>:data/iceberg/generated_spec2_0_001/expected_results/last/count.csv | ||
|
||
# Check schema is identical, sorting by uuid to guarantee unique order | ||
query I nosort q2-schema | ||
DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_01/pyspark_iceberg_table') ORDER BY uuid; | ||
DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table') ORDER BY uuid; | ||
---- | ||
|
||
query I nosort q2-schema | ||
DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_01/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
---- | ||
|
||
query I nosort q2-schema | ||
DESCRIBE SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec2_0_01/expected_results/q08/data/*.parquet') ORDER BY uuid; | ||
DESCRIBE SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec2_0_001/expected_results/last/data/*.parquet') ORDER BY uuid; | ||
---- | ||
|
||
# Check data is identical, sorting by uuid to guarantee unique order | ||
query I nosort q2-data | ||
SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_01/pyspark_iceberg_table') ORDER BY uuid; | ||
SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table') ORDER BY uuid; | ||
---- | ||
|
||
# Check data is identical, sorting by uuid to guarantee unique order | ||
query I nosort q2-data | ||
SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_01/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
---- | ||
|
||
query I nosort q2-data | ||
SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec2_0_01/expected_results/q08/data/*.parquet') ORDER BY uuid; | ||
SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec2_0_001/expected_results/last/data/*.parquet') ORDER BY uuid; | ||
---- | ||
|
||
### Test schema evolution | ||
|
||
# Latest metadata version has correct type | ||
query IIIIII | ||
DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_01/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; | ||
---- | ||
schema_evol_added_col_1 BIGINT YES NULL NULL NULL | ||
|
||
# One before has the old type | ||
query IIIIII | ||
DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_01/pyspark_iceberg_table/metadata/v8.metadata.json') ORDER BY uuid; | ||
DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json') ORDER BY uuid; | ||
---- | ||
schema_evol_added_col_1 INTEGER YES NULL NULL NULL | ||
|
||
# Even older: it did not exist yet | ||
statement error | ||
DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_01/pyspark_iceberg_table/metadata/v6.metadata.json') ORDER BY uuid; | ||
DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json') ORDER BY uuid; | ||
---- | ||
Binder Error | ||
Binder Error |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters