From 5f9843f7c94befe4bacd39ea6e67dce92ebb75cd Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 19 Nov 2024 13:59:04 +0100 Subject: [PATCH] fix tests, timestmap precision --- src/excel/xlsx/read_xlsx.cpp | 25 ++++++++++++++++--------- test/sql/excel/xlsx/limits.test | 2 +- test/sql/excel/xlsx/read_sparse.test | 12 ++++++------ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/excel/xlsx/read_xlsx.cpp b/src/excel/xlsx/read_xlsx.cpp index 041ef42..3a56669 100644 --- a/src/excel/xlsx/read_xlsx.cpp +++ b/src/excel/xlsx/read_xlsx.cpp @@ -372,23 +372,30 @@ static unique_ptr InitGlobal(ClientContext &context, T int64_t ExcelToEpochUS(const double serial) { // Convert to microseconds since epoch - static constexpr auto SECONDS_PER_DAY = 86400; - static constexpr auto MICROSECONDS_PER_SECOND = 1000000; - static constexpr auto DAYS_BETWEEN_1900_AND_1970 = 25569; + static constexpr auto SECONDS_PER_DAY = 86400UL; + static constexpr auto MICROSECONDS_PER_SECOND = 1000000UL; + static constexpr auto DAYS_BETWEEN_1900_AND_1970 = 25569UL; // Excel serial is days since 1900-01-01 - const auto days = serial - DAYS_BETWEEN_1900_AND_1970; - const auto seconds = days * SECONDS_PER_DAY; - const auto micros = seconds * MICROSECONDS_PER_SECOND; + const auto serial_days = serial; + auto serial_secs = serial_days * SECONDS_PER_DAY; + + if (std::fabs(serial_secs - std::round(serial_secs)) < 1e-3) { + serial_secs = std::round(serial_secs); + } + + const auto epoch_secs = serial_secs - (DAYS_BETWEEN_1900_AND_1970 * SECONDS_PER_DAY); + const auto epoch_micros = epoch_secs * MICROSECONDS_PER_SECOND; // Clamp to the range. Theres not much we can do if the value is out of range - if (micros <= static_cast(NumericLimits::Minimum())) { + if (epoch_micros <= static_cast(NumericLimits::Minimum())) { return NumericLimits::Minimum(); } - if (micros >= static_cast(NumericLimits::Maximum())) { + if (epoch_micros >= static_cast(NumericLimits::Maximum())) { return NumericLimits::Maximum(); } - return static_cast(micros); + + return static_cast(epoch_micros); } static void TryCast(XLSXGlobalState &state, bool ignore_errors, const idx_t col_idx, ClientContext &context, diff --git a/test/sql/excel/xlsx/limits.test b/test/sql/excel/xlsx/limits.test index ef67ee3..b11a6e4 100644 --- a/test/sql/excel/xlsx/limits.test +++ b/test/sql/excel/xlsx/limits.test @@ -16,4 +16,4 @@ TO '__TEST_DIR__/test_limit.xlsx' (FORMAT 'XLSX', sheet_name 'test'); ---- Invalid Input Error: XLSX: Sheet row limit of '1048576' rows exceeded! * XLSX files and compatible applications generally have a limit of '1048576' rows - * You can export larger sheets on your own risk by setting the 'sheet_row_limit' parameter to a higher value + * You can export larger sheets at your own risk by setting the 'sheet_row_limit' parameter to a higher value diff --git a/test/sql/excel/xlsx/read_sparse.test b/test/sql/excel/xlsx/read_sparse.test index a966824..4390169 100644 --- a/test/sql/excel/xlsx/read_sparse.test +++ b/test/sql/excel/xlsx/read_sparse.test @@ -5,16 +5,16 @@ query I SELECT * FROM read_xlsx('test/data/xlsx/sparse.xlsx'); ---- -query I -DESCRIBE SELECT column_name, column_type FROM read_xlsx('test/data/xlsx/sparse.xlsx'); +query II +SELECT column_name, column_type FROM (DESCRIBE FROM read_xlsx('test/data/xlsx/sparse.xlsx')); ---- -duckdb DOUBLE +duck DOUBLE # But if we disable the header, we can read the cell -query I -DESCRIBE SELECT column_name, column_type FROM read_xlsx('test/data/xlsx/sparse.xlsx', header = false); +query II +SELECT column_name, column_type FROM (DESCRIBE FROM read_xlsx('test/data/xlsx/sparse.xlsx', header = false)); ---- -R465 +R465 VARCHAR # Voila! query I