Skip to content

Commit

Permalink
GH-44626: [Java] fix SplitAndTransfer throws for empty MapVector (#44627
Browse files Browse the repository at this point in the history
)

### Rationale for this change

Empty MapVector.splitAndTransfer throws `java.lang.IndexOutOfBoundsException`. Details in  #44626

### What changes are included in this PR?

Fixed for MapVector as for other vector types in #41066

### Are these changes tested?

Added unit test mimicking the scenario we've observed where MapVector's offset buffer capacity is 0.
* GitHub Issue: #44626

Authored-by: Maksim Yegorov <[email protected]>
Signed-off-by: David Li <[email protected]>
  • Loading branch information
myegorov authored Nov 8, 2024
1 parent b193c4f commit 5fe87a3
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -211,23 +211,25 @@ public void splitAndTransfer(int startIndex, int length) {
startIndex,
length,
valueCount);
final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
final int sliceLength =
offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
to.clear();
to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
/* splitAndTransfer offset buffer */
for (int i = 0; i < length + 1; i++) {
final int relativeOffset =
offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
if (length > 0) {
final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
final int sliceLength =
offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
/* splitAndTransfer offset buffer */
for (int i = 0; i < length + 1; i++) {
final int relativeOffset =
offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
}
/* splitAndTransfer validity buffer */
splitAndTransferValidityBuffer(startIndex, length, to);
/* splitAndTransfer data buffer */
dataTransferPair.splitAndTransfer(startPoint, sliceLength);
to.lastSet = length - 1;
to.setValueCount(length);
}
/* splitAndTransfer validity buffer */
splitAndTransferValidityBuffer(startIndex, length, to);
/* splitAndTransfer data buffer */
dataTransferPair.splitAndTransfer(startPoint, sliceLength);
to.lastSet = length - 1;
to.setValueCount(length);
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@
*/
package org.apache.arrow.vector;

import static java.util.Arrays.asList;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
Expand All @@ -36,6 +39,7 @@
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.jupiter.api.AfterEach;
Expand Down Expand Up @@ -223,6 +227,40 @@ public void testWithZeroVector() {
// no allocations to clear for ZeroVector
}

@Test
public void testListVectorWithEmptyMapVector() {
// List<element: Map(false)<entries: Struct<key: Utf8 not null, value: Utf8> not null>>
int valueCount = 1;
List<Field> children = new ArrayList<>();
children.add(new Field("key", FieldType.notNullable(new ArrowType.Utf8()), null));
children.add(new Field("value", FieldType.nullable(new ArrowType.Utf8()), null));
Field structField =
new Field("entries", FieldType.notNullable(ArrowType.Struct.INSTANCE), children);

Field mapField =
new Field("element", FieldType.notNullable(new ArrowType.Map(false)), asList(structField));

Field listField = new Field("list", FieldType.nullable(new ArrowType.List()), asList(mapField));

ListVector fromListVector = (ListVector) listField.createVector(allocator);
fromListVector.allocateNew();
fromListVector.setValueCount(valueCount);

// child vector is empty
MapVector dataVector = (MapVector) fromListVector.getDataVector();
dataVector.allocateNew();
// unset capacity to mimic observed failure mode
dataVector.getOffsetBuffer().capacity(0);

TransferPair transferPair = fromListVector.getTransferPair(fromListVector.getAllocator());
transferPair.splitAndTransfer(0, valueCount);
ListVector toListVector = (ListVector) transferPair.getTo();

assertEquals(valueCount, toListVector.getValueCount());
fromListVector.clear();
toListVector.clear();
}

@Test /* VarCharVector */
public void test() throws Exception {
try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
Expand Down

0 comments on commit 5fe87a3

Please sign in to comment.