From e0fee1edb34abcd6849f5ae57800580b3a8c8674 Mon Sep 17 00:00:00 2001 From: Yuriy Chernyshov Date: Thu, 16 Jan 2025 15:55:45 +0800 Subject: [PATCH] ORC-1834: [C++] Fix undefined behavior ### What changes were proposed in this pull request? Unaligned reads are UB in C++, memcpy-ing zero bytes is UB either. ### How was this patch tested? Internal UBsan report was used to detect and fix this bug. Closes #2112 from georgthegreat/patch-3. Authored-by: Yuriy Chernyshov Signed-off-by: Gang Wu (cherry picked from commit ab084b507a70d4da16ef1dc7cfd0fec186083761) Signed-off-by: Dongjoon Hyun --- c++/src/ColumnReader.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index e70f916ffd..af434c37ca 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -395,7 +395,7 @@ namespace orc { int64_t bits = 0; if (bufferEnd_ - bufferPointer_ >= 8) { if (isLittleEndian) { - bits = *(reinterpret_cast(bufferPointer_)); + memcpy(&bits, bufferPointer_, sizeof(bits)); } else { bits = static_cast(static_cast(bufferPointer_[0])); bits |= static_cast(static_cast(bufferPointer_[1])) << 8; @@ -509,8 +509,10 @@ namespace orc { bufferNum = std::min(numValues, static_cast(bufferEnd_ - bufferPointer_) / bytesPerValue_); uint64_t bufferBytes = bufferNum * bytesPerValue_; - memcpy(outArray, bufferPointer_, bufferBytes); - bufferPointer_ += bufferBytes; + if (bufferBytes > 0) { + memcpy(outArray, bufferPointer_, bufferBytes); + bufferPointer_ += bufferBytes; + } } for (size_t i = bufferNum; i < numValues; ++i) { outArray[i] = readDouble();