From ffc6b16f4b61e0796b4773f7ef26c027b7363e97 Mon Sep 17 00:00:00 2001 From: yebukong Date: Fri, 24 Nov 2023 19:03:51 +0800 Subject: [PATCH 1/8] Fix readBytes potential overflow in RecordReaderUtils#create --- .../apache/orc/impl/RecordReaderUtils.java | 21 ++++++++---- .../orc/impl/TestRecordReaderUtils.java | 32 +++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java index 8c37246b75..a50234fcc1 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java @@ -770,18 +770,18 @@ void readRanges(FSDataInputStream file, boolean allocateDirect, double extraByte } static ChunkReader create(BufferChunk from, BufferChunk to) { - long f = Integer.MAX_VALUE; - long e = Integer.MIN_VALUE; + long f = Long.MAX_VALUE; + long e = Long.MIN_VALUE; - long cf = Integer.MAX_VALUE; - long ef = Integer.MIN_VALUE; - int reqBytes = 0; + long cf = Long.MAX_VALUE; + long ef = Long.MIN_VALUE; + long reqBytes = 0L; BufferChunk current = from; while (current != to.next) { f = Math.min(f, current.getOffset()); e = Math.max(e, current.getEnd()); - if (ef == Integer.MIN_VALUE || current.getOffset() <= ef) { + if (ef == Long.MIN_VALUE || current.getOffset() <= ef) { cf = Math.min(cf, current.getOffset()); ef = Math.max(ef, current.getEnd()); } else { @@ -792,7 +792,14 @@ static ChunkReader create(BufferChunk from, BufferChunk to) { current = (BufferChunk) current.next; } reqBytes += ef - cf; - return new ChunkReader(from, to, (int) (e - f), reqBytes); + if (reqBytes >= Integer.MAX_VALUE) { + throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + Integer.MAX_VALUE); + } + long readBytes = e - f; + if (readBytes >= Integer.MAX_VALUE) { + throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + Integer.MAX_VALUE); + } + return new ChunkReader(from, to, (int) readBytes, (int) reqBytes); } static ChunkReader create(BufferChunk from, int minSeekSize) { diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java index d5f3550d3d..8a01623d6a 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.List; import java.util.Objects; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -177,6 +178,37 @@ public void execute() throws Throwable { assertTrue(dis.isAllReleased()); } + @Test + public void testBufferChunkOffsetExceedsMaxInt() { + List mockData = Arrays.asList( + new long[]{15032282586L, 15032298848L} + , new long[]{15032298848L, 15032299844L} + , new long[]{15032299844L, 15032377804L} + , new long[]{15058260587L, 15058261632L} + , new long[]{15058261632L, 15058288409L} + , new long[]{15058288409L, 15058288862L} + , new long[]{15058339730L, 15058340775L} + , new long[]{15058340775L, 15058342439L} + , new long[]{15058449794L, 15058449982L} + , new long[]{15058449982L, 15058451700L} + , new long[]{15058451700L, 15058451749L} + , new long[]{15058484358L, 15058484422L} + , new long[]{15058484422L, 15058484862L} + , new long[]{15058484862L, 15058484878L} + ); + TestOrcLargeStripe.RangeBuilder rangeBuilder = new TestOrcLargeStripe.RangeBuilder(); + mockData.forEach(e -> rangeBuilder.range(e[0], (int) (e[1] - e[0]))); + BufferChunkList rangeList = rangeBuilder.build(); + + RecordReaderUtils.ChunkReader chunkReader = + RecordReaderUtils.ChunkReader.create(rangeList.get(), + 134217728); + long readBytes = mockData.get(mockData.size() - 1)[1] - mockData.get(0)[0]; + long reqBytes = mockData.stream().mapToLong(e -> (int) (e[1] - e[0])).sum(); + assertEquals(chunkReader.getReadBytes(), readBytes); + assertEquals(chunkReader.getReqBytes(), reqBytes); + } + private static byte[] byteBufferToArray(ByteBuffer buf) { byte[] resultArray = new byte[buf.remaining()]; ByteBuffer buffer = buf.slice(); From 88aecc6b4460253a0402b706d45451abf35b1494 Mon Sep 17 00:00:00 2001 From: yebukong Date: Sat, 25 Nov 2023 02:17:52 +0800 Subject: [PATCH 2/8] rename variables --- .../apache/orc/impl/RecordReaderUtils.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java index a50234fcc1..2c1955128b 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java @@ -770,32 +770,32 @@ void readRanges(FSDataInputStream file, boolean allocateDirect, double extraByte } static ChunkReader create(BufferChunk from, BufferChunk to) { - long f = Long.MAX_VALUE; - long e = Long.MIN_VALUE; + long start = Long.MAX_VALUE; + long end = Long.MIN_VALUE; - long cf = Long.MAX_VALUE; - long ef = Long.MIN_VALUE; + long currentStart = Long.MAX_VALUE; + long currentEnd = Long.MIN_VALUE; long reqBytes = 0L; BufferChunk current = from; while (current != to.next) { - f = Math.min(f, current.getOffset()); - e = Math.max(e, current.getEnd()); - if (ef == Long.MIN_VALUE || current.getOffset() <= ef) { - cf = Math.min(cf, current.getOffset()); - ef = Math.max(ef, current.getEnd()); + start = Math.min(start, current.getOffset()); + end = Math.max(end, current.getEnd()); + if (currentEnd == Long.MIN_VALUE || current.getOffset() <= currentEnd) { + currentStart = Math.min(currentStart, current.getOffset()); + currentEnd = Math.max(currentEnd, current.getEnd()); } else { - reqBytes += ef - cf; - cf = current.getOffset(); - ef = current.getEnd(); + reqBytes += currentEnd - currentStart; + currentStart = current.getOffset(); + currentEnd = current.getEnd(); } current = (BufferChunk) current.next; } - reqBytes += ef - cf; + reqBytes += currentEnd - currentStart; if (reqBytes >= Integer.MAX_VALUE) { throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + Integer.MAX_VALUE); } - long readBytes = e - f; + long readBytes = end - start; if (readBytes >= Integer.MAX_VALUE) { throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + Integer.MAX_VALUE); } From a9f325f996febc7b368d8256dcb61fa8c4f7f622 Mon Sep 17 00:00:00 2001 From: yebukong Date: Mon, 27 Nov 2023 14:14:26 +0800 Subject: [PATCH 3/8] Revert "rename variables" This reverts commit 88aecc6b4460253a0402b706d45451abf35b1494. --- .../apache/orc/impl/RecordReaderUtils.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java index 2c1955128b..a50234fcc1 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java @@ -770,32 +770,32 @@ void readRanges(FSDataInputStream file, boolean allocateDirect, double extraByte } static ChunkReader create(BufferChunk from, BufferChunk to) { - long start = Long.MAX_VALUE; - long end = Long.MIN_VALUE; + long f = Long.MAX_VALUE; + long e = Long.MIN_VALUE; - long currentStart = Long.MAX_VALUE; - long currentEnd = Long.MIN_VALUE; + long cf = Long.MAX_VALUE; + long ef = Long.MIN_VALUE; long reqBytes = 0L; BufferChunk current = from; while (current != to.next) { - start = Math.min(start, current.getOffset()); - end = Math.max(end, current.getEnd()); - if (currentEnd == Long.MIN_VALUE || current.getOffset() <= currentEnd) { - currentStart = Math.min(currentStart, current.getOffset()); - currentEnd = Math.max(currentEnd, current.getEnd()); + f = Math.min(f, current.getOffset()); + e = Math.max(e, current.getEnd()); + if (ef == Long.MIN_VALUE || current.getOffset() <= ef) { + cf = Math.min(cf, current.getOffset()); + ef = Math.max(ef, current.getEnd()); } else { - reqBytes += currentEnd - currentStart; - currentStart = current.getOffset(); - currentEnd = current.getEnd(); + reqBytes += ef - cf; + cf = current.getOffset(); + ef = current.getEnd(); } current = (BufferChunk) current.next; } - reqBytes += currentEnd - currentStart; + reqBytes += ef - cf; if (reqBytes >= Integer.MAX_VALUE) { throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + Integer.MAX_VALUE); } - long readBytes = end - start; + long readBytes = e - f; if (readBytes >= Integer.MAX_VALUE) { throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + Integer.MAX_VALUE); } From c6cc17783cc14f8309dd4314b55b0ac33551915a Mon Sep 17 00:00:00 2001 From: yebukong Date: Mon, 27 Nov 2023 14:32:48 +0800 Subject: [PATCH 4/8] adjusting code style --- .../orc/impl/TestRecordReaderUtils.java | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java index 8a01623d6a..308f291b43 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java @@ -181,28 +181,27 @@ public void execute() throws Throwable { @Test public void testBufferChunkOffsetExceedsMaxInt() { List mockData = Arrays.asList( - new long[]{15032282586L, 15032298848L} - , new long[]{15032298848L, 15032299844L} - , new long[]{15032299844L, 15032377804L} - , new long[]{15058260587L, 15058261632L} - , new long[]{15058261632L, 15058288409L} - , new long[]{15058288409L, 15058288862L} - , new long[]{15058339730L, 15058340775L} - , new long[]{15058340775L, 15058342439L} - , new long[]{15058449794L, 15058449982L} - , new long[]{15058449982L, 15058451700L} - , new long[]{15058451700L, 15058451749L} - , new long[]{15058484358L, 15058484422L} - , new long[]{15058484422L, 15058484862L} - , new long[]{15058484862L, 15058484878L} + new long[]{15032282586L, 15032298848L}, + new long[]{15032298848L, 15032299844L}, + new long[]{15032299844L, 15032377804L}, + new long[]{15058260587L, 15058261632L}, + new long[]{15058261632L, 15058288409L}, + new long[]{15058288409L, 15058288862L}, + new long[]{15058339730L, 15058340775L}, + new long[]{15058340775L, 15058342439L}, + new long[]{15058449794L, 15058449982L}, + new long[]{15058449982L, 15058451700L}, + new long[]{15058451700L, 15058451749L}, + new long[]{15058484358L, 15058484422L}, + new long[]{15058484422L, 15058484862L}, + new long[]{15058484862L, 15058484878L} ); TestOrcLargeStripe.RangeBuilder rangeBuilder = new TestOrcLargeStripe.RangeBuilder(); mockData.forEach(e -> rangeBuilder.range(e[0], (int) (e[1] - e[0]))); BufferChunkList rangeList = rangeBuilder.build(); RecordReaderUtils.ChunkReader chunkReader = - RecordReaderUtils.ChunkReader.create(rangeList.get(), - 134217728); + RecordReaderUtils.ChunkReader.create(rangeList.get(), 134217728); long readBytes = mockData.get(mockData.size() - 1)[1] - mockData.get(0)[0]; long reqBytes = mockData.stream().mapToLong(e -> (int) (e[1] - e[0])).sum(); assertEquals(chunkReader.getReadBytes(), readBytes); From 1869d7254ddb04e59f4ea9be237a14a08d9b252c Mon Sep 17 00:00:00 2001 From: yebukong Date: Mon, 27 Nov 2023 14:36:40 +0800 Subject: [PATCH 5/8] adjust validation for reqBytes and readBytes to MAX_ARRAY_SIZE --- java/core/src/java/org/apache/orc/impl/IOUtils.java | 5 +++++ .../src/java/org/apache/orc/impl/RecordReaderUtils.java | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/IOUtils.java b/java/core/src/java/org/apache/orc/impl/IOUtils.java index d580294154..d28ea6c663 100644 --- a/java/core/src/java/org/apache/orc/impl/IOUtils.java +++ b/java/core/src/java/org/apache/orc/impl/IOUtils.java @@ -30,6 +30,11 @@ public final class IOUtils { public static final int DEFAULT_BUFFER_SIZE = 8192; + /** + * The maximum size of array to allocate, value being the same as {@link java.util.Hashtable} + */ + public static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8; + /** * Returns a new byte array of size {@link #DEFAULT_BUFFER_SIZE}. * diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java index a50234fcc1..f99773b2f4 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java @@ -792,12 +792,12 @@ static ChunkReader create(BufferChunk from, BufferChunk to) { current = (BufferChunk) current.next; } reqBytes += ef - cf; - if (reqBytes >= Integer.MAX_VALUE) { - throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + Integer.MAX_VALUE); + if (reqBytes >= IOUtils.MAX_ARRAY_SIZE) { + throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE); } long readBytes = e - f; - if (readBytes >= Integer.MAX_VALUE) { - throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + Integer.MAX_VALUE); + if (readBytes >= IOUtils.MAX_ARRAY_SIZE) { + throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE); } return new ChunkReader(from, to, (int) readBytes, (int) reqBytes); } From 4f9dfc26b56fd771a53dece90bdeda6ae6752017 Mon Sep 17 00:00:00 2001 From: yebukong Date: Mon, 27 Nov 2023 21:09:26 +0800 Subject: [PATCH 6/8] adjust validation conditions --- java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java index f99773b2f4..1b76f0d8b7 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java @@ -792,11 +792,11 @@ static ChunkReader create(BufferChunk from, BufferChunk to) { current = (BufferChunk) current.next; } reqBytes += ef - cf; - if (reqBytes >= IOUtils.MAX_ARRAY_SIZE) { + if (reqBytes > IOUtils.MAX_ARRAY_SIZE) { throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE); } long readBytes = e - f; - if (readBytes >= IOUtils.MAX_ARRAY_SIZE) { + if (readBytes > IOUtils.MAX_ARRAY_SIZE) { throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE); } return new ChunkReader(from, to, (int) readBytes, (int) reqBytes); From 26b35016c7c2c3ea03ff502151f34807acb58dd1 Mon Sep 17 00:00:00 2001 From: yebukong Date: Mon, 27 Nov 2023 21:11:13 +0800 Subject: [PATCH 7/8] readBytes and reqBytes validation UT --- .../orc/impl/TestRecordReaderUtils.java | 60 ++++++++++++++----- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java index 308f291b43..a46602dd88 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java @@ -181,33 +181,61 @@ public void execute() throws Throwable { @Test public void testBufferChunkOffsetExceedsMaxInt() { List mockData = Arrays.asList( - new long[]{15032282586L, 15032298848L}, - new long[]{15032298848L, 15032299844L}, - new long[]{15032299844L, 15032377804L}, - new long[]{15058260587L, 15058261632L}, - new long[]{15058261632L, 15058288409L}, - new long[]{15058288409L, 15058288862L}, - new long[]{15058339730L, 15058340775L}, - new long[]{15058340775L, 15058342439L}, - new long[]{15058449794L, 15058449982L}, - new long[]{15058449982L, 15058451700L}, - new long[]{15058451700L, 15058451749L}, - new long[]{15058484358L, 15058484422L}, - new long[]{15058484422L, 15058484862L}, - new long[]{15058484862L, 15058484878L} + new long[]{15032282586L, 15032298848L}, + new long[]{15032298848L, 15032299844L}, + new long[]{15032299844L, 15032377804L}, + new long[]{15058260587L, 15058261632L}, + new long[]{15058261632L, 15058288409L}, + new long[]{15058288409L, 15058288862L}, + new long[]{15058339730L, 15058340775L}, + new long[]{15058340775L, 15058342439L}, + new long[]{15058449794L, 15058449982L}, + new long[]{15058449982L, 15058451700L}, + new long[]{15058451700L, 15058451749L}, + new long[]{15058484358L, 15058484422L}, + new long[]{15058484422L, 15058484862L}, + new long[]{15058484862L, 15058484878L} ); TestOrcLargeStripe.RangeBuilder rangeBuilder = new TestOrcLargeStripe.RangeBuilder(); mockData.forEach(e -> rangeBuilder.range(e[0], (int) (e[1] - e[0]))); BufferChunkList rangeList = rangeBuilder.build(); - RecordReaderUtils.ChunkReader chunkReader = - RecordReaderUtils.ChunkReader.create(rangeList.get(), 134217728); + RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(), 134217728); long readBytes = mockData.get(mockData.size() - 1)[1] - mockData.get(0)[0]; long reqBytes = mockData.stream().mapToLong(e -> (int) (e[1] - e[0])).sum(); assertEquals(chunkReader.getReadBytes(), readBytes); assertEquals(chunkReader.getReqBytes(), reqBytes); } + @Test + public void testBufferChunkCreateReqBytesReadBytesValidation() { + BufferChunkList rangeList = new TestOrcLargeStripe.RangeBuilder() + .range(0, IOUtils.MAX_ARRAY_SIZE) + .range(1L + IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE + 1) + .range(2L * IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE - 4) + .range(3L * IOUtils.MAX_ARRAY_SIZE, 2) + .build(); + + // reqBytes,readBytes boundary value + RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(0), 0); + assertEquals(chunkReader.getReadBytes(), IOUtils.MAX_ARRAY_SIZE); + assertEquals(chunkReader.getReqBytes(), IOUtils.MAX_ARRAY_SIZE); + + // reqBytes > IOUtils.MAX_ARRAY_SIZE validation + assertThrowsExactly(IllegalArgumentException.class, + () -> RecordReaderUtils.ChunkReader.create(rangeList.get(1), 0), + () -> String.format("invalid reqBytes value %d,out of bounds %d", + rangeList.get(1).getLength(), IOUtils.MAX_ARRAY_SIZE) + ); + + // readBytes > IOUtils.MAX_ARRAY_SIZE validation + assertThrowsExactly(IllegalArgumentException.class, + () -> RecordReaderUtils.ChunkReader.create(rangeList.get(2), 100), + () -> String.format("invalid readBytes value %d,out of bounds %d", + rangeList.get(3).getEnd() - rangeList.get(2).getOffset(), IOUtils.MAX_ARRAY_SIZE) + ); + } + private static byte[] byteBufferToArray(ByteBuffer buf) { byte[] resultArray = new byte[buf.remaining()]; ByteBuffer buffer = buf.slice(); From 0e6c74abc1c12c5f27eb38d6000cdf0d974d4ccb Mon Sep 17 00:00:00 2001 From: yebukong Date: Tue, 28 Nov 2023 00:16:02 +0800 Subject: [PATCH 8/8] rename the UT methods --- .../src/test/org/apache/orc/impl/TestRecordReaderUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java index a46602dd88..54a961550d 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java @@ -179,7 +179,7 @@ public void execute() throws Throwable { } @Test - public void testBufferChunkOffsetExceedsMaxInt() { + public void testChunkReaderCreateOffsetExceedsMaxInt() { List mockData = Arrays.asList( new long[]{15032282586L, 15032298848L}, new long[]{15032298848L, 15032299844L}, @@ -208,7 +208,7 @@ public void testBufferChunkOffsetExceedsMaxInt() { } @Test - public void testBufferChunkCreateReqBytesReadBytesValidation() { + public void testChunkReaderCreateReqBytesAndReadBytesValidation() { BufferChunkList rangeList = new TestOrcLargeStripe.RangeBuilder() .range(0, IOUtils.MAX_ARRAY_SIZE) .range(1L + IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE + 1)