Skip to content

Commit

Permalink
Merge pull request #108 from duckdb/jray/handle-nested-list-vector-up…
Browse files Browse the repository at this point in the history
…dates

handle nested list vector updates
  • Loading branch information
jraymakers authored Jan 14, 2025
2 parents b780160 + 0b2c5a8 commit 497f6af
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 57 deletions.
131 changes: 74 additions & 57 deletions api/src/DuckDBVector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2709,27 +2709,33 @@ export class DuckDBEnum32Vector extends DuckDBVector<string> {
}

export class DuckDBListVector extends DuckDBVector<DuckDBListValue> {
private readonly parentList: DuckDBListVector | null;
private readonly listType: DuckDBListType;
private readonly entryData: BigUint64Array;
private readonly validity: DuckDBValidity;
private readonly vector: duckdb.Vector;
private childData: DuckDBVector;
private readonly itemOffset: number;
private readonly _itemCount: number;
private readonly itemCache: (DuckDBListValue | null | undefined)[];
constructor(
parentList: DuckDBListVector | null,
listType: DuckDBListType,
entryData: BigUint64Array,
validity: DuckDBValidity,
vector: duckdb.Vector,
childData: DuckDBVector,
itemOffset: number,
itemCount: number
) {
super();
this.parentList = parentList;
this.listType = listType;
this.entryData = entryData;
this.validity = validity;
this.vector = vector;
this.childData = childData;
this.itemOffset = itemOffset,
this._itemCount = itemCount;
this.itemCache = [];
}
Expand Down Expand Up @@ -2759,11 +2765,13 @@ export class DuckDBListVector extends DuckDBVector<DuckDBListValue> {
);

return new DuckDBListVector(
null,
listType,
entryData,
validity,
vector,
childData,
0,
itemCount
);
}
Expand Down Expand Up @@ -2796,78 +2804,86 @@ export class DuckDBListVector extends DuckDBVector<DuckDBListValue> {
return item;
}
public setItem(itemIndex: number, value: DuckDBListValue | null) {
// TODO: don't allow for non-root vectors

this.itemCache[itemIndex] = value;
this.validity.setItemValid(itemIndex, value != null);
if (this.parentList) {
this.parentList.setItem(this.itemOffset + itemIndex, value);
} else {
this.validity.setItemValid(itemIndex, value != null);
}
}
public flush() {
// TODO: don't allow for non-root vectors

// update entryData offset & lengths
// calculate new child vector size (sum of all item lengths)
let totalLength = 0;
for (let itemIndex = 0; itemIndex < this._itemCount; itemIndex++) {
const entryDataStartIndex = itemIndex * 2;
this.entryData[entryDataStartIndex] = BigInt(totalLength);
// ensure the cache is populated for all items
const item = this.getItem(itemIndex);
if (item) {
this.entryData[entryDataStartIndex + 1] = BigInt(item.items.length);
totalLength += item.items.length;
} else {
this.entryData[entryDataStartIndex + 1] = 0n;
if (this.parentList) {
this.parentList.flush();
for (let i = 0; i < this.itemCount; i++) {
this.itemCache[i] = undefined;
}
} else {
// update entryData offset & lengths
// calculate new child vector size (sum of all item lengths)
let totalLength = 0;
for (let itemIndex = 0; itemIndex < this._itemCount; itemIndex++) {
const entryDataStartIndex = itemIndex * 2;
this.entryData[entryDataStartIndex] = BigInt(totalLength);
// ensure the cache is populated for all items
const item = this.getItem(itemIndex);
if (item) {
this.entryData[entryDataStartIndex + 1] = BigInt(item.items.length);
totalLength += item.items.length;
} else {
this.entryData[entryDataStartIndex + 1] = 0n;
}
}
}

// set new child vector size
duckdb.list_vector_set_size(this.vector, totalLength);
// set new child vector size
duckdb.list_vector_set_size(this.vector, totalLength);

// recreate childData after resize
const child_vector = duckdb.list_vector_get_child(this.vector);
const child_vector_size = duckdb.list_vector_get_size(this.vector);
this.childData = DuckDBVector.create(
child_vector,
child_vector_size,
this.listType.valueType
);

// set all childData items
let childItemAbsoluteIndex = 0;
for (let listIndex = 0; listIndex < this._itemCount; listIndex++) {
const list = this.getItem(listIndex);
if (list) {
for (
let childItemRelativeIndex = 0;
childItemRelativeIndex < list.items.length;
childItemRelativeIndex++
) {
this.childData.setItem(
childItemAbsoluteIndex++,
list.items[childItemRelativeIndex]
);
// recreate childData after resize
const child_vector = duckdb.list_vector_get_child(this.vector);
const child_vector_size = duckdb.list_vector_get_size(this.vector);
this.childData = DuckDBVector.create(
child_vector,
child_vector_size,
this.listType.valueType
);

// set all childData items
let childItemAbsoluteIndex = 0;
for (let listIndex = 0; listIndex < this._itemCount; listIndex++) {
const list = this.getItem(listIndex);
if (list) {
for (
let childItemRelativeIndex = 0;
childItemRelativeIndex < list.items.length;
childItemRelativeIndex++
) {
this.childData.setItem(
childItemAbsoluteIndex++,
list.items[childItemRelativeIndex]
);
}
}
}
}

// copy childData to child vector
this.childData.flush();
// copy childData to child vector
this.childData.flush();

// copy entryData to vector
duckdb.copy_data_to_vector(
this.vector,
0,
this.entryData.buffer as ArrayBuffer,
this.entryData.byteOffset,
this.entryData.byteLength
);
// copy entryData to vector
duckdb.copy_data_to_vector(
this.vector,
0,
this.entryData.buffer as ArrayBuffer,
this.entryData.byteOffset,
this.entryData.byteLength
);

// flush validity
this.validity.flush(this.vector);
// flush validity
this.validity.flush(this.vector);
}
}
public override slice(offset: number, length: number): DuckDBListVector {
const entryDataStartIndex = offset * 2;
return new DuckDBListVector(
this,
this.listType,
this.entryData.slice(
entryDataStartIndex,
Expand All @@ -2876,6 +2892,7 @@ export class DuckDBListVector extends DuckDBVector<DuckDBListValue> {
this.validity.slice(offset, length),
this.vector,
this.childData,
offset,
length
);
}
Expand Down
57 changes: 57 additions & 0 deletions api/test/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,17 @@ describe('api', () => {
assert.equal(vector.getItem(1), 12345);
assert.equal(vector.getItem(2), null);
});
test('write list vector', () => {
const chunk = DuckDBDataChunk.create([LIST(INTEGER)], 3);
const vector = chunk.getColumnVector(0) as DuckDBListVector;
assert.equal(vector.itemCount, 3);
vector.setItem(0, listValue([10, 11, 12]));
vector.setItem(1, listValue([20, 21, 22]));
vector.setItem(2, null);
assert.deepEqual(vector.getItem(0), listValue([10, 11, 12]));
assert.deepEqual(vector.getItem(1), listValue([20, 21, 22]));
assert.equal(vector.getItem(2), null);
});
test('create and append data chunk', async () => {
await withConnection(async (connection) => {
const values = [42, 12345, null];
Expand Down Expand Up @@ -1280,6 +1291,52 @@ describe('api', () => {
}
});
});
test('create and append data chunk , modify nested list vector', async () => {
await withConnection(async (connection) => {
const originalValues = [
listValue([
listValue([110, 111]),
listValue([]),
listValue([130]),
]),
listValue([]),
listValue([
listValue([310, 311, 312]),
listValue([320, 321]),
listValue([330, 331, 332, 333]),
]),
];

const chunk = DuckDBDataChunk.create([LIST(LIST(INTEGER))], originalValues.length);
chunk.setColumnValues(0, originalValues);

const outerListVector = chunk.getColumnVector(0) as DuckDBListVector;
const innerListVector = outerListVector.getItemVector(2) as DuckDBListVector;
innerListVector.setItem(1, listValue([350, 351, 352, 353, 354]));
innerListVector.flush();

const modifiedValues = [...originalValues];
modifiedValues[2] = listValue([
listValue([310, 311, 312]),
listValue([350, 351, 352, 353, 354]),
listValue([330, 331, 332, 333]),
]);

await connection.run('create table target(col0 integer[][])');
const appender = await connection.createAppender('main', 'target');
appender.appendDataChunk(chunk);
appender.flush();

const result = await connection.run('from target');
const resultChunk = await result.fetchChunk();
assert.isDefined(resultChunk);
if (resultChunk) {
assert.equal(resultChunk.columnCount, 1);
assert.equal(resultChunk.rowCount, modifiedValues.length);
assertValues(resultChunk, 0, DuckDBListVector, modifiedValues);
}
});
});
test('create and append data chunk with arrays of integers', async () => {
await withConnection(async (connection) => {
const values = [
Expand Down

0 comments on commit 497f6af

Please sign in to comment.