From 29138aa947133128b4a2600817d56afa8ef62c65 Mon Sep 17 00:00:00 2001 From: Sweta Vooda Date: Thu, 11 Apr 2024 20:13:42 +0000 Subject: [PATCH 1/4] optimized zero vector flushing --- src/pinecone/pinecone.h | 2 +- src/pinecone/pinecone_build.c | 1 + src/pinecone/pinecone_insert.c | 6 +- src/pinecone/pinecone_scan.c | 2 +- src/pinecone/pinecone_utils.c | 26 +--- src/pinecone/pinecone_validate.c | 6 +- test/expected/pinecone_zero_vector_insert.out | 117 +++++++++++++++--- test/sql/pinecone_zero_vector_insert.sql | 85 ++++++++++--- 8 files changed, 176 insertions(+), 69 deletions(-) diff --git a/src/pinecone/pinecone.h b/src/pinecone/pinecone.h index c67d8019..484fd43f 100644 --- a/src/pinecone/pinecone.h +++ b/src/pinecone/pinecone.h @@ -203,7 +203,7 @@ IndexBulkDeleteResult *no_vacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteRe void pinecone_spec_validator(const char *spec); void pinecone_host_validator(const char *spec); void validate_api_key(void); -void validate_vector_nonzero(Vector* vector); +bool validate_vector_nonzero(Vector* vector); bool no_validate(Oid opclassoid); // utils diff --git a/src/pinecone/pinecone_build.c b/src/pinecone/pinecone_build.c index 1bc3f8dc..fbdeea01 100644 --- a/src/pinecone/pinecone_build.c +++ b/src/pinecone/pinecone_build.c @@ -153,6 +153,7 @@ void pinecone_build_callback(Relation index, ItemPointer tid, Datum *values, boo cJSON *json_vector; char* pinecone_id = pinecone_id_from_heap_tid(*tid); json_vector = tuple_get_pinecone_vector(itup_desc, values, isnull, pinecone_id); + if(json_vector==NULL) return; cJSON_AddItemToArray(buildstate->json_vectors, json_vector); if (cJSON_GetArraySize(buildstate->json_vectors) >= PINECONE_BATCH_SIZE) { pinecone_bulk_upsert(pinecone_api_key, buildstate->host, buildstate->json_vectors, pinecone_vectors_per_request); diff --git a/src/pinecone/pinecone_insert.c b/src/pinecone/pinecone_insert.c index 3f08817c..c5bd0ada 100644 --- a/src/pinecone/pinecone_insert.c +++ b/src/pinecone/pinecone_insert.c @@ -156,15 +156,12 @@ bool AppendBufferTupleInCtx(Relation index, Datum *values, bool *isnull, ItemPoi MemoryContext oldCtx; MemoryContext insertCtx; bool checkpoint_created; - Vector* vector; // use a memory context because index_form_tuple can allocate insertCtx = AllocSetContextCreate(CurrentMemoryContext, "Pinecone insert tuple temporary context", ALLOCSET_DEFAULT_SIZES); oldCtx = MemoryContextSwitchTo(insertCtx); - vector = DatumGetVector(values[0]); - validate_vector_nonzero(vector); checkpoint_created = AppendBufferTuple(index, values, isnull, heap_tid, heapRel); MemoryContextSwitchTo(oldCtx); @@ -286,7 +283,8 @@ void FlushToPinecone(Relation index) vector_id = pinecone_id_from_heap_tid(buffer_tup.tid); json_vector = tuple_get_pinecone_vector(index->rd_att, index_values, index_isnull, vector_id); - cJSON_AddItemToArray(json_vectors, json_vector); + if(json_vector!=NULL) + cJSON_AddItemToArray(json_vectors, json_vector); } } diff --git a/src/pinecone/pinecone_scan.c b/src/pinecone/pinecone_scan.c index 12eca13a..08b79174 100644 --- a/src/pinecone/pinecone_scan.c +++ b/src/pinecone/pinecone_scan.c @@ -315,7 +315,7 @@ void load_buffer_into_sort(Relation index, PineconeScanOpaque so, Datum query_da page = BufferGetPage(buf); // add all tuples on the page to the sortstate - for (OffsetNumber offno = FirstOffsetNumber; offno <= PageGetMaxOffsetNumber(page); offno = OffsetNumberNext(offno)) { + for (OffsetNumber offno = FirstOffsetNumber; offno <= PageGetMaxOffsetNumber(page) && n_sortedtuple > pinecone_max_buffer_scan; offno = OffsetNumberNext(offno)) { // get the tid and the vector from the heap tuple ItemId itemid; Item item; diff --git a/src/pinecone/pinecone_utils.c b/src/pinecone/pinecone_utils.c index 9a5dbfce..80ec9dee 100644 --- a/src/pinecone/pinecone_utils.c +++ b/src/pinecone/pinecone_utils.c @@ -12,8 +12,11 @@ cJSON* tuple_get_pinecone_vector(TupleDesc tup_desc, Datum *values, bool *isnull cJSON *metadata = cJSON_CreateObject(); Vector *vector; cJSON *json_values; + bool isNonZero; + vector = DatumGetVector(values[0]); - validate_vector_nonzero(vector); + isNonZero = validate_vector_nonzero(vector); + if(!isNonZero) return NULL; json_values = cJSON_CreateFloatArray(vector->x, vector->dim); // prepare metadata for (int i = 1; i < tup_desc->natts; i++) // skip the first column which is the vector @@ -52,27 +55,6 @@ cJSON* tuple_get_pinecone_vector(TupleDesc tup_desc, Datum *values, bool *isnull return json_vector; } -cJSON* index_tuple_get_pinecone_vector(Relation index, IndexTuple itup) { - int natts = index->rd_att->natts; - Datum *itup_values = (Datum *) palloc(sizeof(Datum) * natts); - bool *itup_isnull = (bool *) palloc(sizeof(bool) * natts); - TupleDesc itup_desc = index->rd_att; - char* vector_id; - index_deform_tuple(itup, itup_desc, itup_values, itup_isnull); - vector_id = pinecone_id_from_heap_tid(itup->t_tid); - return tuple_get_pinecone_vector(itup_desc, itup_values, itup_isnull, vector_id); -} - -cJSON* heap_tuple_get_pinecone_vector(Relation heap, HeapTuple htup) { - int natts = heap->rd_att->natts; - Datum *htup_values = (Datum *) palloc(sizeof(Datum) * natts); - bool *htup_isnull = (bool *) palloc(sizeof(bool) * natts); - TupleDesc htup_desc = heap->rd_att; - char* vector_id; - heap_deform_tuple(htup, htup_desc, htup_values, htup_isnull); - vector_id = pinecone_id_from_heap_tid(htup->t_self); - return tuple_get_pinecone_vector(htup_desc, htup_values, htup_isnull, vector_id); -} ItemPointerData pinecone_id_get_heap_tid(char *id) { diff --git a/src/pinecone/pinecone_validate.c b/src/pinecone/pinecone_validate.c index 7a4b73e5..50932c8a 100644 --- a/src/pinecone/pinecone_validate.c +++ b/src/pinecone/pinecone_validate.c @@ -12,12 +12,14 @@ void validate_api_key(void) { } } -void validate_vector_nonzero(Vector* vector) { +bool validate_vector_nonzero(Vector* vector) { if (vector_eq_zero_internal(vector)) { - ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("Invalid vector: zero vector"), errhint("Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine."))); + return false; } + return true; } diff --git a/test/expected/pinecone_zero_vector_insert.out b/test/expected/pinecone_zero_vector_insert.out index c8d73b80..ad662959 100644 --- a/test/expected/pinecone_zero_vector_insert.out +++ b/test/expected/pinecone_zero_vector_insert.out @@ -7,6 +7,7 @@ SET client_min_messages = 'notice'; -- flush each vector individually SET pinecone.vectors_per_request = 1; SET pinecone.requests_per_batch = 1; +SET pinecone.max_buffer_scan = 0; -- disable flat scan to force use of the index SET enable_seqscan = off; -- CREATE TABLE @@ -35,29 +36,105 @@ VALUES ('https://api.pinecone.io/indexes', 'POST', $${ }$$); -- mock describe index stats INSERT INTO pinecone_mock (url_prefix, method, response) -VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":0}'); -INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); --- create index after insering 0 vector - Throws an error -CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); -ERROR: Invalid vector: zero vector -HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine. --- Truncate the table to remove the values for creating an index successfully -TRUNCATE TABLE t; +VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":2}'); +-- mock upsert +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/upsert', 'POST', '{"upsertedCount":1}'); +-- mock query +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/query', 'POST', $${ + "results": [], + "matches": [{ + "id": "000000000001", + "score": 2, + "values": [] + }], + "namespace": "", + "usage": { + "readUnits": 5 + } +}$$); +-- mock fetch +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/fetch', 'GET', $${ + "code": 3, + "message": "No IDs provided for fetch query", + "details": [] +}$$); -- create index CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); -INSERT INTO pinecone_mock (url_prefix, method, response) -VALUES ('https://fakehost/vectors/upsert', -'{ "vectors": [{ - "id": "000000000001", - "values": [100, 1, 1], - "metadata": { - } - }] - }', - '{"upsertedCount":1}' -); +-- insert vectors: throws warning while flushing zero-vector INSERT INTO t (id, val) VALUES (1, '[100,1,1]'); INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); -ERROR: Invalid vector: zero vector +INSERT INTO t (id, val) VALUES (3, '[10120,76,1]'); +WARNING: Invalid vector: zero vector +HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine. +WARNING: No vectors to flush to pinecone +-- returns only id = 1 as it is flushed to pinecone )zero vector not flushed to pinecone) +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; +NOTICE: Buffer is too large +HINT: There are 0 tuples in the buffer that have not yet been flushed to pinecone and 2 tuples in pinecone that are not yet live. You may want to consider flushing the buffer. +NOTICE: Reached max local scan + id | val +----+----------- + 1 | [100,1,1] +(1 row) + +SELECT * FROM t; + id | val +----+-------------- + 1 | [100,1,1] + 2 | [0,0,0] + 3 | [10120,76,1] +(3 rows) + +DROP INDEX i2; +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + id | val +----+-------------- + 2 | [0,0,0] + 1 | [100,1,1] + 3 | [10120,76,1] +(3 rows) + +DELETE FROM pinecone_mock +WHERE url_prefix = 'https://fakehost/query' AND method = 'POST'; +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/query', 'POST', $${ + "results": [], + "matches": [{ + "id": "000000000001", + "score": 2, + "values": [] + }, + { + "id": "000000000003", + "score": 2, + "values": [] + }], + "namespace": "", + "usage": { + "readUnits": 5 + } +}$$); +-- displays warning while flushing zero vector to pinecone +CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); +WARNING: Invalid vector: zero vector HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine. +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; +NOTICE: Reached max local scan + id | val +----+-------------- + 1 | [100,1,1] + 3 | [10120,76,1] +(2 rows) + +SELECT * FROM t; + id | val +----+-------------- + 1 | [100,1,1] + 2 | [0,0,0] + 3 | [10120,76,1] +(3 rows) + DROP TABLE t; diff --git a/test/sql/pinecone_zero_vector_insert.sql b/test/sql/pinecone_zero_vector_insert.sql index 4bbd61c1..b6dbd631 100644 --- a/test/sql/pinecone_zero_vector_insert.sql +++ b/test/sql/pinecone_zero_vector_insert.sql @@ -7,6 +7,8 @@ SET client_min_messages = 'notice'; -- flush each vector individually SET pinecone.vectors_per_request = 1; SET pinecone.requests_per_batch = 1; +SET pinecone.max_buffer_scan = 0; + -- disable flat scan to force use of the index SET enable_seqscan = off; -- CREATE TABLE @@ -36,33 +38,78 @@ VALUES ('https://api.pinecone.io/indexes', 'POST', $${ -- mock describe index stats INSERT INTO pinecone_mock (url_prefix, method, response) -VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":0}'); - +VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":2}'); -INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); +-- mock upsert +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/upsert', 'POST', '{"upsertedCount":1}'); --- create index after insering 0 vector - Throws an error -CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); +-- mock query +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/query', 'POST', $${ + "results": [], + "matches": [{ + "id": "000000000001", + "score": 2, + "values": [] + }], + "namespace": "", + "usage": { + "readUnits": 5 + } +}$$); --- Truncate the table to remove the values for creating an index successfully -TRUNCATE TABLE t; +-- mock fetch +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/fetch', 'GET', $${ + "code": 3, + "message": "No IDs provided for fetch query", + "details": [] +}$$); -- create index CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); -INSERT INTO pinecone_mock (url_prefix, method, response) -VALUES ('https://fakehost/vectors/upsert', -'{ "vectors": [{ - "id": "000000000001", - "values": [100, 1, 1], - "metadata": { - } - }] - }', - '{"upsertedCount":1}' -); - +-- insert vectors: throws warning while flushing zero-vector INSERT INTO t (id, val) VALUES (1, '[100,1,1]'); INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); +INSERT INTO t (id, val) VALUES (3, '[10120,76,1]'); + +-- returns only id = 1 as it is flushed to pinecone )zero vector not flushed to pinecone) +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + +SELECT * FROM t; + +DROP INDEX i2; + +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; + +DELETE FROM pinecone_mock +WHERE url_prefix = 'https://fakehost/query' AND method = 'POST'; + +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/query', 'POST', $${ + "results": [], + "matches": [{ + "id": "000000000001", + "score": 2, + "values": [] + }, + { + "id": "000000000003", + "score": 2, + "values": [] + }], + "namespace": "", + "usage": { + "readUnits": 5 + } +}$$); + +-- displays warning while flushing zero vector to pinecone +CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); + +SELECT * FROM t ORDER BY val <-> '[3,3,3]'; +SELECT * FROM t; DROP TABLE t; \ No newline at end of file From 02c404f7da9a9f77a0980f5f93a4537914cfddc3 Mon Sep 17 00:00:00 2001 From: Sweta Vooda Date: Fri, 12 Apr 2024 05:14:25 +0000 Subject: [PATCH 2/4] modify crud test --- test/expected/pinecone_crud.out | 3 +++ test/sql/pinecone_crud.sql | 3 +++ 2 files changed, 6 insertions(+) diff --git a/test/expected/pinecone_crud.out b/test/expected/pinecone_crud.out index e8423d29..fe648ec5 100644 --- a/test/expected/pinecone_crud.out +++ b/test/expected/pinecone_crud.out @@ -1,11 +1,14 @@ -- SETUP -- suppress output \o /dev/null +\o /dev/null +delete from pinecone_mock; -- logging level SET client_min_messages = 'notice'; -- flush each vector individually SET pinecone.vectors_per_request = 1; SET pinecone.requests_per_batch = 1; +SET pinecone.max_buffer_scan = 1000; -- disable flat scan to force use of the index SET enable_seqscan = off; -- Testing database is responsible for initializing the mock table with diff --git a/test/sql/pinecone_crud.sql b/test/sql/pinecone_crud.sql index b26fb6e0..18819016 100644 --- a/test/sql/pinecone_crud.sql +++ b/test/sql/pinecone_crud.sql @@ -2,10 +2,13 @@ -- suppress output \o /dev/null -- logging level +\o /dev/null +delete from pinecone_mock; SET client_min_messages = 'notice'; -- flush each vector individually SET pinecone.vectors_per_request = 1; SET pinecone.requests_per_batch = 1; +SET pinecone.max_buffer_scan = 1000; -- disable flat scan to force use of the index SET enable_seqscan = off; -- Testing database is responsible for initializing the mock table with From 5a1f37520b9b084eea7bc7bc9a8e7453a14ee692 Mon Sep 17 00:00:00 2001 From: Sweta Vooda Date: Mon, 22 Apr 2024 22:46:39 +0000 Subject: [PATCH 3/4] fixed seg fault on NULL vector --- src/pinecone/pinecone_scan.c | 2 +- src/pinecone/pinecone_utils.c | 8 ++++- test/expected/pinecone_zero_vector_insert.out | 31 ++++++++++++------- test/sql/pinecone_zero_vector_insert.sql | 9 +++--- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/pinecone/pinecone_scan.c b/src/pinecone/pinecone_scan.c index 08b79174..98d69d64 100644 --- a/src/pinecone/pinecone_scan.c +++ b/src/pinecone/pinecone_scan.c @@ -315,7 +315,7 @@ void load_buffer_into_sort(Relation index, PineconeScanOpaque so, Datum query_da page = BufferGetPage(buf); // add all tuples on the page to the sortstate - for (OffsetNumber offno = FirstOffsetNumber; offno <= PageGetMaxOffsetNumber(page) && n_sortedtuple > pinecone_max_buffer_scan; offno = OffsetNumberNext(offno)) { + for (OffsetNumber offno = FirstOffsetNumber; offno <= PageGetMaxOffsetNumber(page) && n_sortedtuple > pinecone_max_buffer_scan; offno = OffsetNumberNext(offno)) { // get the tid and the vector from the heap tuple ItemId itemid; Item item; diff --git a/src/pinecone/pinecone_utils.c b/src/pinecone/pinecone_utils.c index 80ec9dee..a58c76b8 100644 --- a/src/pinecone/pinecone_utils.c +++ b/src/pinecone/pinecone_utils.c @@ -13,7 +13,13 @@ cJSON* tuple_get_pinecone_vector(TupleDesc tup_desc, Datum *values, bool *isnull Vector *vector; cJSON *json_values; bool isNonZero; - + // Check if the first Datum is zero, which indicates a NULL pointer/ NULL vector + if(values[0]==0) { + ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Invalid vector: NULL vector"), + errhint("Pinecone insists that vectors cannot be NULL."))); + return NULL; + } vector = DatumGetVector(values[0]); isNonZero = validate_vector_nonzero(vector); if(!isNonZero) return NULL; diff --git a/test/expected/pinecone_zero_vector_insert.out b/test/expected/pinecone_zero_vector_insert.out index ad662959..d1d6257d 100644 --- a/test/expected/pinecone_zero_vector_insert.out +++ b/test/expected/pinecone_zero_vector_insert.out @@ -66,14 +66,18 @@ CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":" -- insert vectors: throws warning while flushing zero-vector INSERT INTO t (id, val) VALUES (1, '[100,1,1]'); INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); -INSERT INTO t (id, val) VALUES (3, '[10120,76,1]'); +INSERT INTO t (id, val) VALUES (3, NULL); WARNING: Invalid vector: zero vector HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine. WARNING: No vectors to flush to pinecone --- returns only id = 1 as it is flushed to pinecone )zero vector not flushed to pinecone) +INSERT INTO t (id, val) VALUES (4, '[10120,76,1]'); +WARNING: Invalid vector: NULL vector +HINT: Pinecone insists that vectors cannot be NULL. +WARNING: No vectors to flush to pinecone +-- returns only id = 1 as it is flushed to pinecone (zero vector not flushed to pinecone) SELECT * FROM t ORDER BY val <-> '[3,3,3]'; NOTICE: Buffer is too large -HINT: There are 0 tuples in the buffer that have not yet been flushed to pinecone and 2 tuples in pinecone that are not yet live. You may want to consider flushing the buffer. +HINT: There are 0 tuples in the buffer that have not yet been flushed to pinecone and 3 tuples in pinecone that are not yet live. You may want to consider flushing the buffer. NOTICE: Reached max local scan id | val ----+----------- @@ -85,8 +89,9 @@ SELECT * FROM t; ----+-------------- 1 | [100,1,1] 2 | [0,0,0] - 3 | [10120,76,1] -(3 rows) + 3 | + 4 | [10120,76,1] +(4 rows) DROP INDEX i2; SELECT * FROM t ORDER BY val <-> '[3,3,3]'; @@ -94,8 +99,9 @@ SELECT * FROM t ORDER BY val <-> '[3,3,3]'; ----+-------------- 2 | [0,0,0] 1 | [100,1,1] - 3 | [10120,76,1] -(3 rows) + 4 | [10120,76,1] + 3 | +(4 rows) DELETE FROM pinecone_mock WHERE url_prefix = 'https://fakehost/query' AND method = 'POST'; @@ -108,7 +114,7 @@ VALUES ('https://fakehost/query', 'POST', $${ "values": [] }, { - "id": "000000000003", + "id": "000000000004", "score": 2, "values": [] }], @@ -121,12 +127,14 @@ VALUES ('https://fakehost/query', 'POST', $${ CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); WARNING: Invalid vector: zero vector HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine. +WARNING: Invalid vector: NULL vector +HINT: Pinecone insists that vectors cannot be NULL. SELECT * FROM t ORDER BY val <-> '[3,3,3]'; NOTICE: Reached max local scan id | val ----+-------------- 1 | [100,1,1] - 3 | [10120,76,1] + 4 | [10120,76,1] (2 rows) SELECT * FROM t; @@ -134,7 +142,8 @@ SELECT * FROM t; ----+-------------- 1 | [100,1,1] 2 | [0,0,0] - 3 | [10120,76,1] -(3 rows) + 3 | + 4 | [10120,76,1] +(4 rows) DROP TABLE t; diff --git a/test/sql/pinecone_zero_vector_insert.sql b/test/sql/pinecone_zero_vector_insert.sql index b6dbd631..3698a925 100644 --- a/test/sql/pinecone_zero_vector_insert.sql +++ b/test/sql/pinecone_zero_vector_insert.sql @@ -1,7 +1,6 @@ -- SETUP -- suppress output \o /dev/null -delete from pinecone_mock; -- logging level SET client_min_messages = 'notice'; -- flush each vector individually @@ -73,9 +72,11 @@ CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":" -- insert vectors: throws warning while flushing zero-vector INSERT INTO t (id, val) VALUES (1, '[100,1,1]'); INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); -INSERT INTO t (id, val) VALUES (3, '[10120,76,1]'); +INSERT INTO t (id, val) VALUES (3, NULL); +INSERT INTO t (id, val) VALUES (4, '[10120,76,1]'); --- returns only id = 1 as it is flushed to pinecone )zero vector not flushed to pinecone) + +-- returns only id = 1 as it is flushed to pinecone (zero vector not flushed to pinecone) SELECT * FROM t ORDER BY val <-> '[3,3,3]'; SELECT * FROM t; @@ -96,7 +97,7 @@ VALUES ('https://fakehost/query', 'POST', $${ "values": [] }, { - "id": "000000000003", + "id": "000000000004", "score": 2, "values": [] }], From 229b9c12b625fba637476ba74ae1ce1bb62fed3a Mon Sep 17 00:00:00 2001 From: Sweta Vooda Date: Mon, 22 Apr 2024 22:49:18 +0000 Subject: [PATCH 4/4] supress warn --- .github/workflows/build.yml | 2 +- src/pinecone/pinecone_validate.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6a0395ac..b340d6e3 100755 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,7 +52,7 @@ jobs: - run: psql test -c 'alter database test set enable_seqscan = off' # setup the database for testing - - run: make installcheck REGRESS="pinecone_crud pinecone_medium_create pinecone_zero_vector_insert pinecone_build_after_insert pinecone_invalid_config" REGRESS_OPTS="--dbname=test --inputdir=./test --use-existing" + - run: make installcheck REGRESS="pinecone_crud pinecone_medium_create pinecone_zero_vector_insert pinecone_build_after_insert" REGRESS_OPTS="--dbname=test --inputdir=./test --use-existing" - if: ${{ failure() }} run: cat regression.diffs # mac: diff --git a/src/pinecone/pinecone_validate.c b/src/pinecone/pinecone_validate.c index 2c5def93..69d69526 100644 --- a/src/pinecone/pinecone_validate.c +++ b/src/pinecone/pinecone_validate.c @@ -22,7 +22,8 @@ bool validate_vector_nonzero(Vector* vector) { return true; } - +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnonnull" void pinecone_spec_validator(const PineconeOptions *opts) { if (opts == NULL || cJSON_Parse(GET_STRING_RELOPTION(opts, spec)) == NULL || strcmp(GET_STRING_RELOPTION(opts, spec), "") == 0) @@ -34,6 +35,7 @@ void pinecone_spec_validator(const PineconeOptions *opts) Refer to https://docs.pinecone.io/reference/create_index"))); } } +#pragma GCC diagnostic pop void pinecone_host_validator(const char *host) {