From a7d4bf1cc50ed09e4aab5b04f37e5325cbd8745a Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Fri, 3 Jan 2025 19:57:52 +0900 Subject: [PATCH 01/13] feat(object_domain, db_set): coerce str to vector --- src/compat/db_set.c | 60 ++++++++++++++++++++++++ src/compat/db_set_function.h | 1 + src/object/object_domain.c | 89 ++++++++++++++++++++++++++++++++++++ 3 files changed, 150 insertions(+) diff --git a/src/compat/db_set.c b/src/compat/db_set.c index db60bd5bfdf..fe2bff595e3 100644 --- a/src/compat/db_set.c +++ b/src/compat/db_set.c @@ -296,6 +296,66 @@ db_seq_create (MOP classop, const char *name, int size) return (set); } +/* + * db_vec_create() - This function creates an empty vector. The class and + * name arguments can be set to NULL. If values are supplied, a check will + * be made to make sure that the attribute was defined with the vector + * domain. + * return : a set (vector) descriptor + * classop(in): class or instance + * name(in): attribute name + * size(in): initial size + * + * note : The new set will not be attached to any object, so you must use the + * db_put( ) function to assign it as the value of an attribute. If the size + * is not known, it is permissible to pass zero. + */ +DB_SET * +db_vec_create (MOP classop, const char *name, int size) +{ + DB_SET *set; +#if !defined(SERVER_MODE) + int error = NO_ERROR; +#endif + + CHECK_CONNECT_NULL (); + + set = NULL; + if (classop == NULL || name == NULL) + { + set = set_create_vector (size); + } + else + { +#if !defined(SERVER_MODE) + SM_CLASS *class_; + SM_ATTRIBUTE *att; + + if (au_fetch_class (classop, &class_, AU_FETCH_READ, AU_SELECT) == NO_ERROR) + { + att = classobj_find_attribute (class_, name, 0); + if (att == NULL) + { + ERROR_SET1 (error, ER_OBJ_INVALID_ATTRIBUTE, name); + } + else + { + if (att->type->id == DB_TYPE_VECTOR) + { + set = set_create_vector (size); + } + else + { + ERROR_SET1 (error, ER_OBJ_DOMAIN_CONFLICT, name); + } + } + } +#endif + } + + return (set); +} + /* * db_set_free() - This function frees a set handle. If the set is owned by an * object, the contents of the set are not freed, only the set handle is diff --git a/src/compat/db_set_function.h b/src/compat/db_set_function.h index 47e214190ae..dc6e76a0762 100644 --- a/src/compat/db_set_function.h +++ b/src/compat/db_set_function.h @@ -36,6 +36,7 @@ extern "C" extern DB_COLLECTION *db_set_create_basic (DB_OBJECT * classobj, const char *name); extern DB_COLLECTION *db_set_create_multi (DB_OBJECT * classobj, const char *name); extern DB_COLLECTION *db_seq_create (DB_OBJECT * classobj, const char *name, int size); + extern DB_COLLECTION *db_vec_create (DB_OBJECT * classobj, const char *name, int size); extern int db_set_free (DB_COLLECTION * set); extern int db_set_filter (DB_COLLECTION * set); extern int db_set_add (DB_COLLECTION * set, DB_VALUE * value); diff --git a/src/object/object_domain.c b/src/object/object_domain.c index d5fb6a45b2e..7e08a47ecf4 100644 --- a/src/object/object_domain.c +++ b/src/object/object_domain.c @@ -611,6 +611,7 @@ static DB_BIGINT tp_ubi_to_bi_with_args (UINT64 ubi, bool is_negative, bool trun DB_DATA_STATUS * data_stat); static UINT64 tp_ubi_times_ten (UINT64 ubi, bool * truncated); +static int tp_str_to_vector (DB_VALUE const *src, DB_VALUE * result); /* * tp_init - Global initialization for this module. @@ -4994,6 +4995,69 @@ tp_atof (const DB_VALUE * src, double *num_value, DB_DATA_STATUS * data_stat) return status; } +/* + * tp_str_to_vector - Coerce a string to a vector. + * return: NO_ERROR or error code. + * src(in): string DB_VALUE + * result(out): vector DB_VALUE + */ +static int +tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) +{ + const char *p = db_get_string (src); + int count = 0; + char number_buffer[64]; + int buffer_idx; + float float_array[2000]; + DB_SET *vec; + DB_VALUE e_val; + int status = NO_ERROR; + + // Skip opening bracket + while (*p && (*p == '[' || isspace (*p))) + { + p++; + } + + while (*p) + { + // Get number into buffer + buffer_idx = 0; + while (*p && *p != ',' && *p != ']') + { + if (!isspace (*p)) + { + number_buffer[buffer_idx++] = *p; + } + p++; + } + number_buffer[buffer_idx] = '\0'; + + // Convert to float and store + float_array[count++] = atof (number_buffer); + + // Skip comma and spaces + while (*p && (*p == ',' || isspace (*p))) + { + p++; + } + + if (*p == ']') + break; + } + + // create empty vector + vec = db_vec_create (NULL, NULL, 0); + db_make_vector (result, vec); + + for (int i = 0; i < count; ++i) + { + db_make_float (&e_val, float_array[i]); + db_seq_put (db_get_set (result), i, &e_val); + } + return status; +} + /* * tp_atobi - Coerce a string to a bigint. * return: NO_ERROR or error code @@ -9125,6 +9189,31 @@ tp_value_cast_internal (const DB_VALUE * src, DB_VALUE * dest, const TP_DOMAIN * } break; + case DB_TYPE_VECTOR: + switch (original_type) + { + case DB_TYPE_CHAR: + case DB_TYPE_VARCHAR: + case DB_TYPE_NCHAR: + case DB_TYPE_VARNCHAR: + { + // step 1. convert string to float array + // float temp_float_array[] = {111.456f, 222.456f, 333.456f}; + // float *float_arr = temp_float_array; + // int float_arr_size = 3; + + + int float_arr_size = tp_str_to_vector (src, target); + + // step 2. iterate over float and put float elements + break; + + } + default: + status = DOMAIN_INCOMPATIBLE; + break; + } + break; case DB_TYPE_VOBJ: if (original_type == DB_TYPE_VOBJ) { From e71a0a2c077077ff746e3c598d0f90eeffe08a72 Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Mon, 6 Jan 2025 15:22:07 +0900 Subject: [PATCH 02/13] feat(object_domain): function tp_str_to_vector --- src/object/object_domain.c | 141 ++++++++++++++++++++++++++++--------- 1 file changed, 108 insertions(+), 33 deletions(-) diff --git a/src/object/object_domain.c b/src/object/object_domain.c index 7e08a47ecf4..7d291c24f7c 100644 --- a/src/object/object_domain.c +++ b/src/object/object_domain.c @@ -5000,62 +5000,144 @@ tp_atof (const DB_VALUE * src, double *num_value, DB_DATA_STATUS * data_stat) * return: NO_ERROR or error code. * src(in): string DB_VALUE * result(out): vector DB_VALUE + * Note: + * Accepts strings that are not null terminated. Don't call this unless + * src is a string db_value. */ static int tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) { const char *p = db_get_string (src); + const char *end = p + db_get_string_size (src); int count = 0; char number_buffer[64]; int buffer_idx; float float_array[2000]; - DB_SET *vec; + DB_SET *vec = NULL; DB_VALUE e_val; - int status = NO_ERROR; + INTL_CODESET codeset = db_get_string_codeset (src); - // Skip opening bracket - while (*p && (*p == '[' || isspace (*p))) + if (p == NULL) { - p++; + return ER_FAILED; } - while (*p) + // Skip leading spaces and opening bracket + p = (char *) intl_skip_spaces (p, end, codeset); + if (p >= end || *p != '[') { + return ER_FAILED; + } + p++; + + while (p < end && count < 2000) + { + // Skip spaces before number + p = (char *) intl_skip_spaces (p, end, codeset); + if (p >= end) + { + return ER_FAILED; + } + + // Check for closing bracket + if (*p == ']') + { + break; + } + // Get number into buffer buffer_idx = 0; - while (*p && *p != ',' && *p != ']') - { - if (!isspace (*p)) - { - number_buffer[buffer_idx++] = *p; - } - p++; - } + while (p < end && *p != ',' && *p != ']' && buffer_idx < 63) + { + if (!isspace (*p)) + { + number_buffer[buffer_idx++] = *p; + } + p++; + } + + if (buffer_idx == 0 || buffer_idx >= 63) + { + return ER_FAILED; + } + number_buffer[buffer_idx] = '\0'; - // Convert to float and store - float_array[count++] = atof (number_buffer); + // Convert to float + char *end_ptr = NULL; + errno = 0; + float_array[count] = strtof (number_buffer, &end_ptr); + + if (errno == ERANGE) + { + return ER_FAILED; + } - // Skip comma and spaces - while (*p && (*p == ',' || isspace (*p))) - { - p++; - } + if (*end_ptr != '\0') + { + return ER_FAILED; + } + count++; + + // Skip spaces after number + p = (char *) intl_skip_spaces (p, end, codeset); + if (p >= end) + { + return ER_FAILED; + } + + // Must be comma or closing bracket if (*p == ']') - break; + { + break; + } + else if (*p != ',') + { + return ER_FAILED; + } + p++; + } + + // Check for closing bracket + if (p >= end || *p != ']') + { + return ER_FAILED; } + p++; - // create empty vector + // Skip trailing spaces + p = (char *) intl_skip_spaces (p, end, codeset); + if (p != end) + { + return ER_FAILED; + } + + if (count == 0) + { + return ER_FAILED; + } + + // Create vector and populate it vec = db_vec_create (NULL, NULL, 0); + if (vec == NULL) + { + assert (er_errid () != NO_ERROR); + return er_errid (); + } + db_make_vector (result, vec); for (int i = 0; i < count; ++i) { db_make_float (&e_val, float_array[i]); - db_seq_put (db_get_set (result), i, &e_val); + if (db_seq_put (db_get_set (result), i, &e_val) != NO_ERROR) + { + return ER_FAILED; + } } - return status; + + return NO_ERROR; } /* @@ -9197,15 +9279,8 @@ tp_value_cast_internal (const DB_VALUE * src, DB_VALUE * dest, const TP_DOMAIN * case DB_TYPE_NCHAR: case DB_TYPE_VARNCHAR: { - // step 1. convert string to float array - // float temp_float_array[] = {111.456f, 222.456f, 333.456f}; - // float *float_arr = temp_float_array; - // int float_arr_size = 3; - - - int float_arr_size = tp_str_to_vector (src, target); - // step 2. iterate over float and put float elements + err = tp_str_to_vector (src, target); break; } From 9924b865cd76180313ca9a6f292182d345559ed5 Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Mon, 6 Jan 2025 17:02:00 +0900 Subject: [PATCH 03/13] style(object_domain): fix style --- src/object/object_domain.c | 71 +++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/src/object/object_domain.c b/src/object/object_domain.c index 7d291c24f7c..cf46b142da0 100644 --- a/src/object/object_domain.c +++ b/src/object/object_domain.c @@ -5035,31 +5035,32 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) // Skip spaces before number p = (char *) intl_skip_spaces (p, end, codeset); if (p >= end) - { - return ER_FAILED; - } + { + return ER_FAILED; + } // Check for closing bracket if (*p == ']') - { - break; - } + { + break; + } // Get number into buffer buffer_idx = 0; while (p < end && *p != ',' && *p != ']' && buffer_idx < 63) - { - if (!isspace (*p)) - { - number_buffer[buffer_idx++] = *p; - } - p++; - } + { + if (!isspace (*p)) + { + number_buffer[buffer_idx++] = *p; + } + p++; + } if (buffer_idx == 0 || buffer_idx >= 63) - { - return ER_FAILED; - } + { + + return ER_FAILED; + } number_buffer[buffer_idx] = '\0'; @@ -5067,35 +5068,35 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) char *end_ptr = NULL; errno = 0; float_array[count] = strtof (number_buffer, &end_ptr); - + if (errno == ERANGE) - { - return ER_FAILED; - } + { + return ER_FAILED; + } if (*end_ptr != '\0') - { - return ER_FAILED; - } + { + return ER_FAILED; + } count++; // Skip spaces after number p = (char *) intl_skip_spaces (p, end, codeset); if (p >= end) - { - return ER_FAILED; - } + { + return ER_FAILED; + } // Must be comma or closing bracket if (*p == ']') - { - break; - } + { + break; + } else if (*p != ',') - { - return ER_FAILED; - } + { + return ER_FAILED; + } p++; } @@ -5132,9 +5133,9 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) { db_make_float (&e_val, float_array[i]); if (db_seq_put (db_get_set (result), i, &e_val) != NO_ERROR) - { - return ER_FAILED; - } + { + return ER_FAILED; + } } return NO_ERROR; From ad73a80f8754c344a9d48ca9ca62b8ab7a0b0601 Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Mon, 6 Jan 2025 17:33:11 +0900 Subject: [PATCH 04/13] refactor(object_domain): no magic number for sizes --- src/object/object_domain.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/object/object_domain.c b/src/object/object_domain.c index cf46b142da0..55ef7172dc2 100644 --- a/src/object/object_domain.c +++ b/src/object/object_domain.c @@ -5010,9 +5010,11 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) const char *p = db_get_string (src); const char *end = p + db_get_string_size (src); int count = 0; - char number_buffer[64]; + const int number_buffer_size = 64; + char number_buffer[number_buffer_size]; int buffer_idx; - float float_array[2000]; + const int max_vector_size = 2000; + float float_array[max_vector_size]; DB_SET *vec = NULL; DB_VALUE e_val; INTL_CODESET codeset = db_get_string_codeset (src); @@ -5030,7 +5032,7 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) } p++; - while (p < end && count < 2000) + while (p < end && count < max_vector_size) { // Skip spaces before number p = (char *) intl_skip_spaces (p, end, codeset); @@ -5047,7 +5049,7 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) // Get number into buffer buffer_idx = 0; - while (p < end && *p != ',' && *p != ']' && buffer_idx < 63) + while (p < end && *p != ',' && *p != ']' && buffer_idx < number_buffer_size) { if (!isspace (*p)) { @@ -5056,7 +5058,7 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) p++; } - if (buffer_idx == 0 || buffer_idx >= 63) + if (buffer_idx == 0 || buffer_idx >= number_buffer_size) { return ER_FAILED; From 2bbfce759cf8ddc302cca01b779680ca24a6fdb5 Mon Sep 17 00:00:00 2001 From: Daehyun Kim <18080546+vimkim@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:01:36 +0900 Subject: [PATCH 05/13] fix(object_domain): 63 instead of 64 for null terminator --- src/object/object_domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/object/object_domain.c b/src/object/object_domain.c index 55ef7172dc2..7c395656ef4 100644 --- a/src/object/object_domain.c +++ b/src/object/object_domain.c @@ -5049,7 +5049,7 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) // Get number into buffer buffer_idx = 0; - while (p < end && *p != ',' && *p != ']' && buffer_idx < number_buffer_size) + while (p < end && *p != ',' && *p != ']' && buffer_idx < number_buffer_size - 1) { if (!isspace (*p)) { @@ -5058,7 +5058,7 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) p++; } - if (buffer_idx == 0 || buffer_idx >= number_buffer_size) + if (buffer_idx == 0 || buffer_idx >= number_buffer_size - 1) { return ER_FAILED; From 69b03f1b5325f06fe439429fb74a7abba6dfecb2 Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Mon, 13 Jan 2025 19:29:20 +0900 Subject: [PATCH 06/13] replace intl_skip_spaces to plain c impl --- src/object/object_domain.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/object/object_domain.c b/src/object/object_domain.c index 7c395656ef4..a1d19b62197 100644 --- a/src/object/object_domain.c +++ b/src/object/object_domain.c @@ -5017,7 +5017,6 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) float float_array[max_vector_size]; DB_SET *vec = NULL; DB_VALUE e_val; - INTL_CODESET codeset = db_get_string_codeset (src); if (p == NULL) { @@ -5025,7 +5024,11 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) } // Skip leading spaces and opening bracket - p = (char *) intl_skip_spaces (p, end, codeset); + while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) + { + p++; + } + if (p >= end || *p != '[') { return ER_FAILED; @@ -5035,7 +5038,11 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) while (p < end && count < max_vector_size) { // Skip spaces before number - p = (char *) intl_skip_spaces (p, end, codeset); + while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) + { + p++; + } + if (p >= end) { return ER_FAILED; @@ -5060,7 +5067,6 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) if (buffer_idx == 0 || buffer_idx >= number_buffer_size - 1) { - return ER_FAILED; } @@ -5070,21 +5076,22 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) char *end_ptr = NULL; errno = 0; float_array[count] = strtof (number_buffer, &end_ptr); - if (errno == ERANGE) { return ER_FAILED; } - if (*end_ptr != '\0') { return ER_FAILED; } - count++; // Skip spaces after number - p = (char *) intl_skip_spaces (p, end, codeset); + while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) + { + p++; + } + if (p >= end) { return ER_FAILED; @@ -5110,7 +5117,11 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) p++; // Skip trailing spaces - p = (char *) intl_skip_spaces (p, end, codeset); + while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) + { + p++; + } + if (p != end) { return ER_FAILED; @@ -5130,7 +5141,6 @@ tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) } db_make_vector (result, vec); - for (int i = 0; i < count; ++i) { db_make_float (&e_val, float_array[i]); From d3c71b548c23415f17b050b5e5da0dfb01b4214b Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Mon, 13 Jan 2025 19:33:21 +0900 Subject: [PATCH 07/13] rename tp_str_to_vector to tp_atovector --- src/object/object_domain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/object/object_domain.c b/src/object/object_domain.c index a1d19b62197..f9198ac9475 100644 --- a/src/object/object_domain.c +++ b/src/object/object_domain.c @@ -581,6 +581,7 @@ static int tp_atodatetimetz (const DB_VALUE * src, DB_DATETIMETZ * temp); static int tp_atonumeric (const DB_VALUE * src, DB_VALUE * temp); static int tp_atof (const DB_VALUE * src, double *num_value, DB_DATA_STATUS * data_stat); static int tp_atobi (const DB_VALUE * src, DB_BIGINT * num_value, DB_DATA_STATUS * data_stat); +static int tp_atovector (DB_VALUE const *src, DB_VALUE * result); #if defined(ENABLE_UNUSED_FUNCTION) static char *tp_itoa (int value, char *string, int radix); #endif @@ -611,7 +612,6 @@ static DB_BIGINT tp_ubi_to_bi_with_args (UINT64 ubi, bool is_negative, bool trun DB_DATA_STATUS * data_stat); static UINT64 tp_ubi_times_ten (UINT64 ubi, bool * truncated); -static int tp_str_to_vector (DB_VALUE const *src, DB_VALUE * result); /* * tp_init - Global initialization for this module. @@ -5005,7 +5005,7 @@ tp_atof (const DB_VALUE * src, double *num_value, DB_DATA_STATUS * data_stat) * src is a string db_value. */ static int -tp_str_to_vector (const DB_VALUE * src, DB_VALUE * result) +tp_atovector (const DB_VALUE * src, DB_VALUE * result) { const char *p = db_get_string (src); const char *end = p + db_get_string_size (src); @@ -9293,7 +9293,7 @@ tp_value_cast_internal (const DB_VALUE * src, DB_VALUE * dest, const TP_DOMAIN * case DB_TYPE_VARNCHAR: { - err = tp_str_to_vector (src, target); + err = tp_atovector (src, target); break; } From 0850adb3a331b1cad4e92ab4561d30911f784085 Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Mon, 20 Jan 2025 12:35:13 +0900 Subject: [PATCH 08/13] add db_vector.cpp, db_vector.hpp with db_string_to_vector function --- cs/CMakeLists.txt | 2 + cubrid/CMakeLists.txt | 2 + sa/CMakeLists.txt | 1 + src/compat/db_vector.cpp | 147 +++++++++++++++++++++++++++++++++++++ src/compat/db_vector.hpp | 30 ++++++++ src/object/object_domain.c | 116 +---------------------------- 6 files changed, 186 insertions(+), 112 deletions(-) create mode 100644 src/compat/db_vector.cpp create mode 100644 src/compat/db_vector.hpp diff --git a/cs/CMakeLists.txt b/cs/CMakeLists.txt index 65fef46f6ec..43ff584a0ff 100644 --- a/cs/CMakeLists.txt +++ b/cs/CMakeLists.txt @@ -59,6 +59,7 @@ set(COMPAT_SOURCES ${COMPAT_DIR}/db_temp.c ${COMPAT_DIR}/db_value_printer.cpp ${COMPAT_DIR}/db_vdb.c + ${COMPAT_DIR}/db_vector.cpp ${COMPAT_DIR}/db_virt.c ${COMPAT_DIR}/dbtype_function.c ) @@ -79,6 +80,7 @@ set (COMPAT_HEADERS ${COMPAT_DIR}/dbtype_function.h ${COMPAT_DIR}/dbtype_function.i ${COMPAT_DIR}/db_admin.h + ${COMPAT_DIR}/db_vector.hpp ) set(BASE_SOURCES diff --git a/cubrid/CMakeLists.txt b/cubrid/CMakeLists.txt index 08b185d1c89..0958d722a51 100644 --- a/cubrid/CMakeLists.txt +++ b/cubrid/CMakeLists.txt @@ -29,6 +29,7 @@ set(COMPAT_SOURCES ${COMPAT_DIR}/db_macro.c ${COMPAT_DIR}/db_set.c ${COMPAT_DIR}/db_value_printer.cpp + ${COMPAT_DIR}/db_vector.cpp ) set (COMPAT_HEADERS ${COMPAT_DIR}/dbtype_def.h @@ -40,6 +41,7 @@ set (COMPAT_HEADERS ${COMPAT_DIR}/db_set.h ${COMPAT_DIR}/db_set_function.h ${COMPAT_DIR}/dbtype_function.i + ${COMPAT_DIR}/db_vector.hpp ) set(THREAD_SOURCES diff --git a/sa/CMakeLists.txt b/sa/CMakeLists.txt index 130822f842b..8bcc573bc59 100644 --- a/sa/CMakeLists.txt +++ b/sa/CMakeLists.txt @@ -60,6 +60,7 @@ set(COMPAT_SOURCES ${COMPAT_DIR}/db_temp.c ${COMPAT_DIR}/db_value_printer.cpp ${COMPAT_DIR}/db_vdb.c + ${COMPAT_DIR}/db_vector.cpp ${COMPAT_DIR}/db_virt.c ${COMPAT_DIR}/dbtype_function.c ) diff --git a/src/compat/db_vector.cpp b/src/compat/db_vector.cpp new file mode 100644 index 00000000000..4b49bcd421c --- /dev/null +++ b/src/compat/db_vector.cpp @@ -0,0 +1,147 @@ +/* + * Copyright 2008 Search Solution Corporation + * Copyright 2016 CUBRID Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * db_vector.cpp + */ + +#include "error_code.h" +#include +#include +#include +// XXX: SHOULD BE THE LAST INCLUDE HEADER +#include "memory_wrapper.hpp" + +int +db_string_to_vector (const char *p, int str_len, float * vector, int *p_count) +{ + const char *end = p + str_len; + int count = 0; + const int number_buffer_size = 64; + char number_buffer[number_buffer_size]; + int buffer_idx; + const int max_vector_size = 2000; + + if (p == nullptr || vector == nullptr || p_count == nullptr) + { + return ER_FAILED; + } + + // Skip leading spaces and opening bracket + while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) + { + p++; + } + if (p >= end || *p != '[') + { + return ER_FAILED; + } + p++; + + while (p < end && count < max_vector_size) + { + // Skip spaces before number + while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) + { + p++; + } + if (p >= end) + { + return ER_FAILED; + } + // Check for closing bracket + if (*p == ']') + { + break; + } + + // Get number into buffer + buffer_idx = 0; + while (p < end && *p != ',' && *p != ']' && buffer_idx < number_buffer_size - 1) + { + if (!isspace (*p)) + { + number_buffer[buffer_idx++] = *p; + } + p++; + } + if (buffer_idx == 0 || buffer_idx >= number_buffer_size - 1) + { + return ER_FAILED; + } + number_buffer[buffer_idx] = '\0'; + + // Convert to float + char *end_ptr = nullptr; + errno = 0; + vector[count] = strtof (number_buffer, &end_ptr); + if (errno == ERANGE) + { + return ER_FAILED; + } + if (*end_ptr != '\0') + { + return ER_FAILED; + } + count++; + + // Skip spaces after number + while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) + { + p++; + } + if (p >= end) + { + return ER_FAILED; + } + // Must be comma or closing bracket + if (*p == ']') + { + break; + } + else if (*p != ',') + { + return ER_FAILED; + } + p++; + } + + // Check for closing bracket + if (p >= end || *p != ']') + { + return ER_FAILED; + } + p++; + + // Skip trailing spaces + while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) + { + p++; + } + if (p != end) + { + return ER_FAILED; + } + if (count == 0) + { + return ER_FAILED; + } + + *p_count = count; + return NO_ERROR; +} diff --git a/src/compat/db_vector.hpp b/src/compat/db_vector.hpp new file mode 100644 index 00000000000..d991d0c1560 --- /dev/null +++ b/src/compat/db_vector.hpp @@ -0,0 +1,30 @@ +/* + * Copyright 2008 Search Solution Corporation + * Copyright 2016 CUBRID Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * db_vector.hpp - Definitions for the vector utilities. + */ + +#ifndef _DB_VECTOR_HPP_ +#define _DB_VECTOR_HPP_ + +#ident "$Id$" + +extern int db_string_to_vector (const char *p, int str_len, float * vector, int * count); + +#endif /* _DB_VECTOR_HPP_ */ diff --git a/src/object/object_domain.c b/src/object/object_domain.c index f9198ac9475..336cf0992b8 100644 --- a/src/object/object_domain.c +++ b/src/object/object_domain.c @@ -51,6 +51,7 @@ #include "db_json.hpp" #include "string_buffer.hpp" #include "db_value_printer.hpp" +#include "db_vector.hpp" #if !defined (SERVER_MODE) #include "work_space.h" @@ -5018,119 +5019,10 @@ tp_atovector (const DB_VALUE * src, DB_VALUE * result) DB_SET *vec = NULL; DB_VALUE e_val; - if (p == NULL) - { - return ER_FAILED; - } - - // Skip leading spaces and opening bracket - while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) - { - p++; - } - - if (p >= end || *p != '[') - { - return ER_FAILED; - } - p++; - - while (p < end && count < max_vector_size) - { - // Skip spaces before number - while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) - { - p++; - } - - if (p >= end) - { - return ER_FAILED; - } - - // Check for closing bracket - if (*p == ']') - { - break; - } - - // Get number into buffer - buffer_idx = 0; - while (p < end && *p != ',' && *p != ']' && buffer_idx < number_buffer_size - 1) - { - if (!isspace (*p)) - { - number_buffer[buffer_idx++] = *p; - } - p++; - } - - if (buffer_idx == 0 || buffer_idx >= number_buffer_size - 1) - { - return ER_FAILED; - } - - number_buffer[buffer_idx] = '\0'; - - // Convert to float - char *end_ptr = NULL; - errno = 0; - float_array[count] = strtof (number_buffer, &end_ptr); - if (errno == ERANGE) - { - return ER_FAILED; - } - if (*end_ptr != '\0') - { - return ER_FAILED; - } - count++; - - // Skip spaces after number - while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) - { - p++; - } - - if (p >= end) - { - return ER_FAILED; - } - - // Must be comma or closing bracket - if (*p == ']') - { - break; - } - else if (*p != ',') - { - return ER_FAILED; - } - p++; - } - - // Check for closing bracket - if (p >= end || *p != ']') - { + int error = db_string_to_vector(p, db_get_string_size(src), float_array, &count); + if (error != NO_ERROR) { return ER_FAILED; - } - p++; - - // Skip trailing spaces - while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) - { - p++; - } - - if (p != end) - { - return ER_FAILED; - } - - if (count == 0) - { - return ER_FAILED; - } + } // Create vector and populate it vec = db_vec_create (NULL, NULL, 0); From 46fb305560618589cfca7d6d172ac10777c19bc0 Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Mon, 20 Jan 2025 17:07:27 +0900 Subject: [PATCH 09/13] convert db_string_to_vector to cpp style --- src/compat/db_vector.cpp | 203 +++++++++++++++++++++------------------ 1 file changed, 107 insertions(+), 96 deletions(-) diff --git a/src/compat/db_vector.cpp b/src/compat/db_vector.cpp index 4b49bcd421c..7a358068506 100644 --- a/src/compat/db_vector.cpp +++ b/src/compat/db_vector.cpp @@ -21,127 +21,138 @@ */ #include "error_code.h" -#include -#include -#include +#include +#include +#include +#include +#include +#include // XXX: SHOULD BE THE LAST INCLUDE HEADER #include "memory_wrapper.hpp" -int -db_string_to_vector (const char *p, int str_len, float * vector, int *p_count) +std::optional> +db_string_to_vector (std::string_view input) { - const char *end = p + str_len; - int count = 0; - const int number_buffer_size = 64; - char number_buffer[number_buffer_size]; - int buffer_idx; - const int max_vector_size = 2000; - - if (p == nullptr || vector == nullptr || p_count == nullptr) + static constexpr size_t max_vector_size = 2000; + static constexpr size_t number_buffer_size = 64; + + // Skip leading whitespace + auto start = input.find_first_not_of (" \t\n\r"); + if (start == std::string_view::npos || input[start] != '[') { - return ER_FAILED; + return std::nullopt; } - // Skip leading spaces and opening bracket - while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) + std::vector result; + result.reserve (64); // Reserve some initial capacity + + size_t pos = start + 1; + std::string number_buffer; + number_buffer.reserve (number_buffer_size); + + while (pos < input.size() && result.size() < max_vector_size) { - p++; + // Skip whitespace before number + pos = input.find_first_not_of (" \t\n\r", pos); + if (pos == std::string_view::npos) + { + return std::nullopt; + } + + // Check for closing bracket + if (input[pos] == ']') + { + break; + } + + // Extract number until comma or closing bracket + number_buffer.clear(); + size_t number_end = pos; + while (number_end < input.size() && + input[number_end] != ',' && + input[number_end] != ']' && + number_buffer.size() < number_buffer_size - 1) + { + if (!std::isspace (input[number_end])) + { + number_buffer.push_back (input[number_end]); + } + ++number_end; + } + + if (number_buffer.empty() || number_buffer.size() >= number_buffer_size - 1) + { + return std::nullopt; + } + + // Convert string to float + try + { + float value = std::stof (number_buffer); + result.push_back (value); + } + catch (const std::exception &) + { + return std::nullopt; + } + + pos = number_end; + + // Skip whitespace after number + pos = input.find_first_not_of (" \t\n\r", pos); + if (pos == std::string_view::npos) + { + return std::nullopt; + } + + // Must be comma or closing bracket + if (input[pos] == ']') + { + break; + } + if (input[pos] != ',') + { + return std::nullopt; + } + ++pos; } - if (p >= end || *p != '[') + + // Verify proper ending + if (pos >= input.size() || input[pos] != ']') { - return ER_FAILED; + return std::nullopt; } - p++; - while (p < end && count < max_vector_size) + // Check for trailing content + auto end = input.find_first_not_of (" \t\n\r", pos + 1); + if (end != std::string_view::npos || result.empty()) { - // Skip spaces before number - while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) - { - p++; - } - if (p >= end) - { - return ER_FAILED; - } - // Check for closing bracket - if (*p == ']') - { - break; - } - - // Get number into buffer - buffer_idx = 0; - while (p < end && *p != ',' && *p != ']' && buffer_idx < number_buffer_size - 1) - { - if (!isspace (*p)) - { - number_buffer[buffer_idx++] = *p; - } - p++; - } - if (buffer_idx == 0 || buffer_idx >= number_buffer_size - 1) - { - return ER_FAILED; - } - number_buffer[buffer_idx] = '\0'; - - // Convert to float - char *end_ptr = nullptr; - errno = 0; - vector[count] = strtof (number_buffer, &end_ptr); - if (errno == ERANGE) - { - return ER_FAILED; - } - if (*end_ptr != '\0') - { - return ER_FAILED; - } - count++; - - // Skip spaces after number - while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) - { - p++; - } - if (p >= end) - { - return ER_FAILED; - } - // Must be comma or closing bracket - if (*p == ']') - { - break; - } - else if (*p != ',') - { - return ER_FAILED; - } - p++; + return std::nullopt; } - // Check for closing bracket - if (p >= end || *p != ']') + return result; +} + +// Optional wrapper function to maintain backward compatibility +int db_string_to_vector (const char* p, int str_len, float* vector, int* p_count) +{ + if (!p || !vector || !p_count || str_len <= 0) { return ER_FAILED; } - p++; - // Skip trailing spaces - while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')) - { - p++; - } - if (p != end) + auto result = db_string_to_vector (std::string_view (p, static_cast (str_len))); + if (!result) { return ER_FAILED; } - if (count == 0) + + if (result->size() > static_cast (std::numeric_limits::max())) { return ER_FAILED; } - *p_count = count; + std::copy (result->begin(), result->end(), vector); + *p_count = static_cast (result->size()); return NO_ERROR; } From c5db8df0031892418b3c9583ad83f2521f7b00eb Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Mon, 20 Jan 2025 17:32:05 +0900 Subject: [PATCH 10/13] improve readability --- src/compat/db_vector.cpp | 151 +++++++++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 47 deletions(-) diff --git a/src/compat/db_vector.cpp b/src/compat/db_vector.cpp index 7a358068506..6d41e3aa654 100644 --- a/src/compat/db_vector.cpp +++ b/src/compat/db_vector.cpp @@ -17,7 +17,8 @@ */ /* - * db_vector.cpp + * @file db_vector.cpp + * @brief Implements string to vector conversion functionality */ #include "error_code.h" @@ -25,53 +26,107 @@ #include #include #include -#include #include // XXX: SHOULD BE THE LAST INCLUDE HEADER #include "memory_wrapper.hpp" +namespace +{ + /** + * Configuration constants for vector parsing + */ + constexpr size_t MAX_VECTOR_SIZE = 2000; // Maximum allowed elements + constexpr size_t NUMBER_BUFFER_SIZE = 64; // Maximum digits per number + constexpr size_t INITIAL_VECTOR_CAPACITY = 128; // Initial allocation size + constexpr std::string_view WHITESPACE = " \t\n\r"; + + /** + * @brief Checks if vector parsing should continue + * @param pos Current position in input string + * @param current_size Current vector size + * @param input Input string being parsed + * @return true if parsing should continue + */ + bool should_continue_parsing ( + size_t pos, + size_t current_size, + const std::string_view& input + ) + { + return pos < input.size() && current_size < MAX_VECTOR_SIZE; + } + + /** + * @brief Validates the proper ending of vector string + * @param pos Current position in input string + * @param input Input string being parsed + * @param result Vector being constructed + * @return true if ending is valid + */ + bool has_valid_ending ( + size_t pos, + const std::string_view& input, + const std::vector &result + ) + { + if (pos >= input.size() || input[pos] != ']') + { + return false; + } + + size_t end = input.find_first_not_of (WHITESPACE, pos + 1); + return end == std::string_view::npos && !result.empty(); + } +} // anonymous namespace + +/** + * @brief Converts a string representation of a vector to std::vector + * @param input String view containing vector in format "[n1, n2, ...]" + * @return Optional vector of floats, nullopt if parsing fails + */ std::optional> db_string_to_vector (std::string_view input) { - static constexpr size_t max_vector_size = 2000; - static constexpr size_t number_buffer_size = 64; - - // Skip leading whitespace - auto start = input.find_first_not_of (" \t\n\r"); + // Validate input starting with '[' + size_t start = input.find_first_not_of (WHITESPACE); if (start == std::string_view::npos || input[start] != '[') { return std::nullopt; } + // Initialize result vector and number buffer std::vector result; - result.reserve (64); // Reserve some initial capacity + result.reserve (INITIAL_VECTOR_CAPACITY); - size_t pos = start + 1; std::string number_buffer; - number_buffer.reserve (number_buffer_size); + number_buffer.reserve (NUMBER_BUFFER_SIZE); - while (pos < input.size() && result.size() < max_vector_size) + // Parse numbers until end of input or max size reached + size_t pos = start + 1; + while (should_continue_parsing (pos, result.size(), input)) { - // Skip whitespace before number - pos = input.find_first_not_of (" \t\n\r", pos); + // Skip leading whitespace before number + pos = input.find_first_not_of (WHITESPACE, pos); if (pos == std::string_view::npos) { return std::nullopt; } - // Check for closing bracket + // Check for end of vector if (input[pos] == ']') { break; } - // Extract number until comma or closing bracket + // Extract and parse number number_buffer.clear(); size_t number_end = pos; + + // Build number string, skipping whitespace while (number_end < input.size() && input[number_end] != ',' && input[number_end] != ']' && - number_buffer.size() < number_buffer_size - 1) + number_buffer.size() < NUMBER_BUFFER_SIZE - 1) { if (!std::isspace (input[number_end])) { @@ -80,32 +135,31 @@ db_string_to_vector (std::string_view input) ++number_end; } - if (number_buffer.empty() || number_buffer.size() >= number_buffer_size - 1) + // Validate number buffer + if (number_buffer.empty() || + number_buffer.size() >= NUMBER_BUFFER_SIZE - 1) { return std::nullopt; } - // Convert string to float + // Convert to float and add to result try { - float value = std::stof (number_buffer); - result.push_back (value); + result.push_back (std::stof (number_buffer)); } catch (const std::exception &) { return std::nullopt; } - pos = number_end; - - // Skip whitespace after number - pos = input.find_first_not_of (" \t\n\r", pos); + // Move position and check delimiter + pos = input.find_first_not_of (WHITESPACE, number_end); if (pos == std::string_view::npos) { return std::nullopt; } - // Must be comma or closing bracket + // Handle end of vector or comma separator if (input[pos] == ']') { break; @@ -117,42 +171,45 @@ db_string_to_vector (std::string_view input) ++pos; } - // Verify proper ending - if (pos >= input.size() || input[pos] != ']') - { - return std::nullopt; - } - - // Check for trailing content - auto end = input.find_first_not_of (" \t\n\r", pos + 1); - if (end != std::string_view::npos || result.empty()) - { - return std::nullopt; - } - - return result; + return has_valid_ending (pos, input, result) + ? std::make_optional (std::move (result)) + : std::nullopt; } -// Optional wrapper function to maintain backward compatibility -int db_string_to_vector (const char* p, int str_len, float* vector, int* p_count) +/** + * @brief Backward compatibility wrapper for C-style interface + * @param p Input string + * @param str_len Length of input string + * @param vector Output buffer for floats + * @param p_count Output parameter for number of floats + * @return NO_ERROR on success, ER_FAILED on failure + */ +int db_string_to_vector ( + const char *p, + int str_len, + float *vector, + int *p_count +) { + // Validate input parameters if (!p || !vector || !p_count || str_len <= 0) { return ER_FAILED; } - auto result = db_string_to_vector (std::string_view (p, static_cast (str_len))); - if (!result) - { - return ER_FAILED; - } + // Convert string to vector + std::optional> result = + db_string_to_vector (std::string_view (p, static_cast (str_len))); - if (result->size() > static_cast (std::numeric_limits::max())) + // Validate result size and copy data + if (!result || + result->size() > static_cast (std::numeric_limits::max())) { return ER_FAILED; } std::copy (result->begin(), result->end(), vector); *p_count = static_cast (result->size()); + return NO_ERROR; } From a06a768663194ff5c51d341ca3370c236e0352c9 Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Mon, 20 Jan 2025 17:56:20 +0900 Subject: [PATCH 11/13] more readability --- src/compat/db_vector.cpp | 144 ++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 49 deletions(-) diff --git a/src/compat/db_vector.cpp b/src/compat/db_vector.cpp index 6d41e3aa654..e6880834d2b 100644 --- a/src/compat/db_vector.cpp +++ b/src/compat/db_vector.cpp @@ -77,24 +77,102 @@ namespace size_t end = input.find_first_not_of (WHITESPACE, pos + 1); return end == std::string_view::npos && !result.empty(); } -} // anonymous namespace -/** - * @brief Converts a string representation of a vector to std::vector - * @param input String view containing vector in format "[n1, n2, ...]" - * @return Optional vector of floats, nullopt if parsing fails - */ + /** + * @brief Extracts and validates a number from the input string + * @param input Input string being parsed + * @param pos Current position in input + * @param number_buffer Buffer to store the number string + * @return Position after the number, or npos if invalid + */ + size_t extract_number ( + const std::string_view& input, + size_t pos, + std::string& number_buffer + ) + { + number_buffer.clear(); + size_t number_end = pos; + + while (number_end < input.size() && + input[number_end] != ',' && + input[number_end] != ']' && + number_buffer.size() < NUMBER_BUFFER_SIZE - 1) + { + if (!std::isspace (input[number_end])) + { + number_buffer.push_back (input[number_end]); + } + ++number_end; + } + + return (number_buffer.empty() || + number_buffer.size() >= NUMBER_BUFFER_SIZE - 1) + ? std::string_view::npos + : number_end; + } + + /** + * @brief Converts string to float and adds to result vector + * @param number_str String containing the number + * @param result Vector to append the number to + * @return true if conversion successful + */ + bool parse_and_add_number ( + const std::string& number_str, + std::vector &result + ) + { + try + { + result.push_back (std::stof (number_str)); + return true; + } + catch (const std::exception &) + { + return false; + } + } + + /** + * @brief Checks for valid delimiter after number + * @param input Input string being parsed + * @param pos Position to check + * @return Next position to parse, or npos if invalid + */ + size_t validate_delimiter ( + const std::string_view& input, + size_t pos + ) + { + pos = input.find_first_not_of (WHITESPACE, pos); + if (pos == std::string_view::npos) + { + return std::string_view::npos; + } + + if (input[pos] == ']') + { + return pos; + } + if (input[pos] != ',') + { + return std::string_view::npos; + } + return pos + 1; + } +} + std::optional> db_string_to_vector (std::string_view input) { // Validate input starting with '[' - size_t start = input.find_first_not_of (WHITESPACE); - if (start == std::string_view::npos || input[start] != '[') + size_t pos = input.find_first_not_of (WHITESPACE); + if (pos == std::string_view::npos || input[pos] != '[') { return std::nullopt; } - // Initialize result vector and number buffer std::vector result; result.reserve (INITIAL_VECTOR_CAPACITY); @@ -102,73 +180,41 @@ db_string_to_vector (std::string_view input) number_buffer.reserve (NUMBER_BUFFER_SIZE); // Parse numbers until end of input or max size reached - size_t pos = start + 1; + pos = pos + 1; while (should_continue_parsing (pos, result.size(), input)) { - // Skip leading whitespace before number + // Skip leading whitespace and check for end pos = input.find_first_not_of (WHITESPACE, pos); if (pos == std::string_view::npos) { return std::nullopt; } - - // Check for end of vector if (input[pos] == ']') { break; } - // Extract and parse number - number_buffer.clear(); - size_t number_end = pos; - - // Build number string, skipping whitespace - while (number_end < input.size() && - input[number_end] != ',' && - input[number_end] != ']' && - number_buffer.size() < NUMBER_BUFFER_SIZE - 1) - { - if (!std::isspace (input[number_end])) - { - number_buffer.push_back (input[number_end]); - } - ++number_end; - } - - // Validate number buffer - if (number_buffer.empty() || - number_buffer.size() >= NUMBER_BUFFER_SIZE - 1) + size_t number_end = extract_number (input, pos, number_buffer); + if (number_end == std::string_view::npos) { return std::nullopt; } - // Convert to float and add to result - try - { - result.push_back (std::stof (number_buffer)); - } - catch (const std::exception &) + if (!parse_and_add_number (number_buffer, result)) { return std::nullopt; } - // Move position and check delimiter - pos = input.find_first_not_of (WHITESPACE, number_end); + // Validate and move past delimiter + pos = validate_delimiter (input, number_end); if (pos == std::string_view::npos) { return std::nullopt; } - - // Handle end of vector or comma separator - if (input[pos] == ']') + if (input[pos - 1] == ']') { break; } - if (input[pos] != ',') - { - return std::nullopt; - } - ++pos; } return has_valid_ending (pos, input, result) From e36cfc1dba9b2552fceb0d656f2f81cdf9cea6b7 Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Tue, 21 Jan 2025 01:52:15 +0900 Subject: [PATCH 12/13] feat!(db_vector.cpp): use rapidjson parser --- src/compat/db_vector.cpp | 253 +++++---------------------------------- 1 file changed, 31 insertions(+), 222 deletions(-) diff --git a/src/compat/db_vector.cpp b/src/compat/db_vector.cpp index e6880834d2b..4054e0cb774 100644 --- a/src/compat/db_vector.cpp +++ b/src/compat/db_vector.cpp @@ -23,239 +23,48 @@ #include "error_code.h" #include -#include -#include -#include -#include +#include "rapidjson/document.h" // XXX: SHOULD BE THE LAST INCLUDE HEADER #include "memory_wrapper.hpp" -namespace -{ - /** - * Configuration constants for vector parsing - */ - constexpr size_t MAX_VECTOR_SIZE = 2000; // Maximum allowed elements - constexpr size_t NUMBER_BUFFER_SIZE = 64; // Maximum digits per number - constexpr size_t INITIAL_VECTOR_CAPACITY = 128; // Initial allocation size - constexpr std::string_view WHITESPACE = " \t\n\r"; - /** - * @brief Checks if vector parsing should continue - * @param pos Current position in input string - * @param current_size Current vector size - * @param input Input string being parsed - * @return true if parsing should continue - */ - bool should_continue_parsing ( - size_t pos, - size_t current_size, - const std::string_view& input - ) - { - return pos < input.size() && current_size < MAX_VECTOR_SIZE; - } - - /** - * @brief Validates the proper ending of vector string - * @param pos Current position in input string - * @param input Input string being parsed - * @param result Vector being constructed - * @return true if ending is valid - */ - bool has_valid_ending ( - size_t pos, - const std::string_view& input, - const std::vector &result - ) - { - if (pos >= input.size() || input[pos] != ']') - { - return false; - } - - size_t end = input.find_first_not_of (WHITESPACE, pos + 1); - return end == std::string_view::npos && !result.empty(); - } - - /** - * @brief Extracts and validates a number from the input string - * @param input Input string being parsed - * @param pos Current position in input - * @param number_buffer Buffer to store the number string - * @return Position after the number, or npos if invalid - */ - size_t extract_number ( - const std::string_view& input, - size_t pos, - std::string& number_buffer - ) - { - number_buffer.clear(); - size_t number_end = pos; - - while (number_end < input.size() && - input[number_end] != ',' && - input[number_end] != ']' && - number_buffer.size() < NUMBER_BUFFER_SIZE - 1) - { - if (!std::isspace (input[number_end])) - { - number_buffer.push_back (input[number_end]); - } - ++number_end; - } - - return (number_buffer.empty() || - number_buffer.size() >= NUMBER_BUFFER_SIZE - 1) - ? std::string_view::npos - : number_end; - } - - /** - * @brief Converts string to float and adds to result vector - * @param number_str String containing the number - * @param result Vector to append the number to - * @return true if conversion successful - */ - bool parse_and_add_number ( - const std::string& number_str, - std::vector &result - ) - { - try - { - result.push_back (std::stof (number_str)); - return true; - } - catch (const std::exception &) - { - return false; - } - } - - /** - * @brief Checks for valid delimiter after number - * @param input Input string being parsed - * @param pos Position to check - * @return Next position to parse, or npos if invalid - */ - size_t validate_delimiter ( - const std::string_view& input, - size_t pos - ) - { - pos = input.find_first_not_of (WHITESPACE, pos); - if (pos == std::string_view::npos) - { - return std::string_view::npos; - } - - if (input[pos] == ']') - { - return pos; - } - if (input[pos] != ',') - { - return std::string_view::npos; - } - return pos + 1; - } -} - -std::optional> -db_string_to_vector (std::string_view input) -{ - // Validate input starting with '[' - size_t pos = input.find_first_not_of (WHITESPACE); - if (pos == std::string_view::npos || input[pos] != '[') - { - return std::nullopt; +int db_string_to_vector( + const char* p, + int str_len, + float* vector, + int* p_count +) { + // Validate input parameters + if (!p || !vector || !p_count || str_len <= 0) { + return ER_FAILED; } - std::vector result; - result.reserve (INITIAL_VECTOR_CAPACITY); - - std::string number_buffer; - number_buffer.reserve (NUMBER_BUFFER_SIZE); - - // Parse numbers until end of input or max size reached - pos = pos + 1; - while (should_continue_parsing (pos, result.size(), input)) - { - // Skip leading whitespace and check for end - pos = input.find_first_not_of (WHITESPACE, pos); - if (pos == std::string_view::npos) - { - return std::nullopt; - } - if (input[pos] == ']') - { - break; - } - - size_t number_end = extract_number (input, pos, number_buffer); - if (number_end == std::string_view::npos) - { - return std::nullopt; - } - - if (!parse_and_add_number (number_buffer, result)) - { - return std::nullopt; - } - - // Validate and move past delimiter - pos = validate_delimiter (input, number_end); - if (pos == std::string_view::npos) - { - return std::nullopt; - } - if (input[pos - 1] == ']') - { - break; - } + // Parse without modifying the length (fixes const assignment error) + rapidjson::Document doc; + rapidjson::ParseResult result = doc.Parse(p, static_cast(str_len)); + if (!result) { + return ER_FAILED; } - return has_valid_ending (pos, input, result) - ? std::make_optional (std::move (result)) - : std::nullopt; -} - -/** - * @brief Backward compatibility wrapper for C-style interface - * @param p Input string - * @param str_len Length of input string - * @param vector Output buffer for floats - * @param p_count Output parameter for number of floats - * @return NO_ERROR on success, ER_FAILED on failure - */ -int db_string_to_vector ( - const char *p, - int str_len, - float *vector, - int *p_count -) -{ - // Validate input parameters - if (!p || !vector || !p_count || str_len <= 0) - { - return ER_FAILED; + // Check if root is an array + if (!doc.IsArray()) { + return ER_FAILED; } - // Convert string to vector - std::optional> result = - db_string_to_vector (std::string_view (p, static_cast (str_len))); - - // Validate result size and copy data - if (!result || - result->size() > static_cast (std::numeric_limits::max())) - { - return ER_FAILED; + // Check array size + size_t size = doc.Size(); + if (size > static_cast(std::numeric_limits::max())) { + return ER_FAILED; } - std::copy (result->begin(), result->end(), vector); - *p_count = static_cast (result->size()); + // Convert each element to float + for (size_t i = 0; i < size; i++) { + if (!doc[i].IsNumber()) { + return ER_FAILED; + } + vector[i] = static_cast(doc[i].GetDouble()); + } - return NO_ERROR; + *p_count = static_cast(size); + return NO_ERROR; } From 9f8b03c68ab2a32870540245f36b8ff090139c45 Mon Sep 17 00:00:00 2001 From: Daehyun Kim Date: Tue, 21 Jan 2025 12:36:54 +0900 Subject: [PATCH 13/13] style: format and isnan check --- src/compat/db_vector.cpp | 75 ++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/src/compat/db_vector.cpp b/src/compat/db_vector.cpp index 4054e0cb774..9dd59bd72b6 100644 --- a/src/compat/db_vector.cpp +++ b/src/compat/db_vector.cpp @@ -22,49 +22,66 @@ */ #include "error_code.h" +#include #include #include "rapidjson/document.h" // XXX: SHOULD BE THE LAST INCLUDE HEADER #include "memory_wrapper.hpp" -int db_string_to_vector( - const char* p, - int str_len, - float* vector, - int* p_count -) { - // Validate input parameters - if (!p || !vector || !p_count || str_len <= 0) { - return ER_FAILED; +int db_string_to_vector ( + const char *p, + int str_len, + float *vector, + int *p_count +) +{ + // Validate input parameters + if (!p || !vector || !p_count || str_len <= 0) + { + return ER_FAILED; } - // Parse without modifying the length (fixes const assignment error) - rapidjson::Document doc; - rapidjson::ParseResult result = doc.Parse(p, static_cast(str_len)); - if (!result) { - return ER_FAILED; + // Parse without modifying the length (fixes const assignment error) + rapidjson::Document doc; + rapidjson::ParseResult result = doc.Parse (p, static_cast (str_len)); + if (!result) + { + return ER_FAILED; } - // Check if root is an array - if (!doc.IsArray()) { - return ER_FAILED; + // Check if root is an array + if (!doc.IsArray()) + { + return ER_FAILED; } - // Check array size - size_t size = doc.Size(); - if (size > static_cast(std::numeric_limits::max())) { - return ER_FAILED; + // Check array size + size_t size = doc.Size(); + if (size > static_cast (std::numeric_limits::max())) + { + return ER_FAILED; } - // Convert each element to float - for (size_t i = 0; i < size; i++) { - if (!doc[i].IsNumber()) { - return ER_FAILED; - } - vector[i] = static_cast(doc[i].GetDouble()); + // Convert each element to float + for (size_t i = 0; i < size; i++) + { + if (!doc[i].IsNumber()) + { + return ER_FAILED; + } + + float num = doc[i].GetFloat(); + + if (std::isinf (num) || std::isnan (num)) + { + return ER_FAILED; + } + + vector[i] = num; + } - *p_count = static_cast(size); - return NO_ERROR; + *p_count = static_cast (size); + return NO_ERROR; }