-
Notifications
You must be signed in to change notification settings - Fork 126
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CUBVEC-24] Convert CHAR types to DB Vector in tp_value_cast #5771
base: cubvec/cubvec
Are you sure you want to change the base?
Changes from all commits
a7d4bf1
e71a0a2
9924b86
ad73a80
2bbfce7
69b03f1
d3c71b5
0850adb
46fb305
c5db8df
a06a768
e36cfc1
9f8b03c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -296,6 +296,66 @@ db_seq_create (MOP classop, const char *name, int size) | |
return (set); | ||
} | ||
|
||
/* | ||
* db_vec_create() - This function creates an empty vector. The class and | ||
* name arguments can be set to NULL. If values are supplied, a check will | ||
* be made to make sure that the attribute was defined with the vector | ||
* domain. | ||
* return : a set (vector) descriptor | ||
* classop(in): class or instance | ||
* name(in): attribute name | ||
* size(in): initial size | ||
* | ||
* note : The new set will not be attached to any object, so you must use the | ||
* db_put( ) function to assign it as the value of an attribute. If the size | ||
* is not known, it is permissible to pass zero. | ||
*/ | ||
DB_SET * | ||
db_vec_create (MOP classop, const char *name, int size) | ||
{ | ||
DB_SET *set; | ||
Comment on lines
+309
to
+316
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 이 부분은 보조 함수이니, 리뷰에 공을 많이 들이지 않으셔도 될 것 같습니다. 기존의 db_seq_create() 함수를 완전히 모방하였으며, 기존에는 set_create_vector() 함수만으로 진행하려 계획이었으나 |
||
#if !defined(SERVER_MODE) | ||
int error = NO_ERROR; | ||
#endif | ||
|
||
CHECK_CONNECT_NULL (); | ||
|
||
set = NULL; | ||
if (classop == NULL || name == NULL) | ||
{ | ||
set = set_create_vector (size); | ||
} | ||
else | ||
{ | ||
#if !defined(SERVER_MODE) | ||
SM_CLASS *class_; | ||
SM_ATTRIBUTE *att; | ||
|
||
if (au_fetch_class (classop, &class_, AU_FETCH_READ, AU_SELECT) == NO_ERROR) | ||
{ | ||
att = classobj_find_attribute (class_, name, 0); | ||
if (att == NULL) | ||
{ | ||
ERROR_SET1 (error, ER_OBJ_INVALID_ATTRIBUTE, name); | ||
} | ||
else | ||
{ | ||
if (att->type->id == DB_TYPE_VECTOR) | ||
{ | ||
set = set_create_vector (size); | ||
} | ||
else | ||
{ | ||
ERROR_SET1 (error, ER_OBJ_DOMAIN_CONFLICT, name); | ||
} | ||
} | ||
} | ||
#endif | ||
} | ||
|
||
return (set); | ||
} | ||
|
||
/* | ||
* db_set_free() - This function frees a set handle. If the set is owned by an | ||
* object, the contents of the set are not freed, only the set handle is | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
/* | ||
* Copyright 2008 Search Solution Corporation | ||
* Copyright 2016 CUBRID Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
|
||
/* | ||
* @file db_vector.cpp | ||
* @brief Implements string to vector conversion functionality | ||
*/ | ||
|
||
#include "error_code.h" | ||
#include <cmath> | ||
#include <limits> | ||
#include "rapidjson/document.h" | ||
// XXX: SHOULD BE THE LAST INCLUDE HEADER | ||
#include "memory_wrapper.hpp" | ||
|
||
|
||
int db_string_to_vector ( | ||
const char *p, | ||
int str_len, | ||
float *vector, | ||
int *p_count | ||
) | ||
{ | ||
// Validate input parameters | ||
if (!p || !vector || !p_count || str_len <= 0) | ||
{ | ||
return ER_FAILED; | ||
} | ||
|
||
// Parse without modifying the length (fixes const assignment error) | ||
rapidjson::Document doc; | ||
rapidjson::ParseResult result = doc.Parse (p, static_cast<size_t> (str_len)); | ||
if (!result) | ||
{ | ||
return ER_FAILED; | ||
} | ||
|
||
// Check if root is an array | ||
if (!doc.IsArray()) | ||
{ | ||
return ER_FAILED; | ||
} | ||
|
||
// Check array size | ||
size_t size = doc.Size(); | ||
if (size > static_cast<size_t> (std::numeric_limits<int>::max())) | ||
{ | ||
return ER_FAILED; | ||
} | ||
|
||
// Convert each element to float | ||
for (size_t i = 0; i < size; i++) | ||
{ | ||
if (!doc[i].IsNumber()) | ||
{ | ||
return ER_FAILED; | ||
} | ||
|
||
float num = doc[i].GetFloat(); | ||
|
||
Comment on lines
+74
to
+75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 내부구현은 GetDouble 후 static_cast 입니다. 엄밀한 검사를 위하면 lossless get float을 사용하라는 주석이 rapidjson에 있습니다. |
||
if (std::isinf (num) || std::isnan (num)) | ||
{ | ||
return ER_FAILED; | ||
} | ||
|
||
vector[i] = num; | ||
|
||
} | ||
|
||
*p_count = static_cast<int> (size); | ||
return NO_ERROR; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* Copyright 2008 Search Solution Corporation | ||
* Copyright 2016 CUBRID Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
|
||
/* | ||
* db_vector.hpp - Definitions for the vector utilities. | ||
*/ | ||
|
||
#ifndef _DB_VECTOR_HPP_ | ||
#define _DB_VECTOR_HPP_ | ||
|
||
#ident "$Id$" | ||
|
||
extern int db_string_to_vector (const char *p, int str_len, float * vector, int * count); | ||
|
||
#endif /* _DB_VECTOR_HPP_ */ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,7 @@ | |
#include "db_json.hpp" | ||
#include "string_buffer.hpp" | ||
#include "db_value_printer.hpp" | ||
#include "db_vector.hpp" | ||
|
||
#if !defined (SERVER_MODE) | ||
#include "work_space.h" | ||
|
@@ -581,6 +582,7 @@ static int tp_atodatetimetz (const DB_VALUE * src, DB_DATETIMETZ * temp); | |
static int tp_atonumeric (const DB_VALUE * src, DB_VALUE * temp); | ||
static int tp_atof (const DB_VALUE * src, double *num_value, DB_DATA_STATUS * data_stat); | ||
static int tp_atobi (const DB_VALUE * src, DB_BIGINT * num_value, DB_DATA_STATUS * data_stat); | ||
static int tp_atovector (DB_VALUE const *src, DB_VALUE * result); | ||
#if defined(ENABLE_UNUSED_FUNCTION) | ||
static char *tp_itoa (int value, char *string, int radix); | ||
#endif | ||
|
@@ -4994,6 +4996,55 @@ tp_atof (const DB_VALUE * src, double *num_value, DB_DATA_STATUS * data_stat) | |
return status; | ||
} | ||
|
||
/* | ||
* tp_str_to_vector - Coerce a string to a vector. | ||
Comment on lines
+4999
to
+5000
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 이 함수를 특히 면밀히 리뷰를 해주시면 감사하겠습니다. 감사합니다. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 이 함수의 구현은 db_date.c에 구현되어 있는 db_string_to_XXX 와 비슷한 기능 목적으로 보여서, 동일한 루틴을 가져가도 좋을 것 같습니다. 동일한 파일의 다음 두가지에 대해서 검토해볼 수 있을 것 같습니다
option 1 또는 option 2 진행 후에는 이 함수가 잘 정리될 것 같습니다.
문자열은 float array를 가지므로, 간단히 위 구현으로 충분하려면 intl_skip_spaces ()에서 INTL_CODESET_KSC5601_EUC (= EUC-KR) 에서 확인하고 있는 0xa1a1에 대해서 다음의 결정이 필요해보입니다.
이슈 진행 과정에서 예외 사항에 대한 최소한의 확인으로 진행하기로 했기 때문에 제 의견은 0xa1a1에 대해서는 유효하지 않은 문자라고 생각하고 char_isspace() 만으로만 공백을 처리하는 것이 좋을 것 같습니다. 의견 부탁드립니다 :-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 정말 중요한 조언 감사드립니다. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* return: NO_ERROR or error code. | ||
* src(in): string DB_VALUE | ||
* result(out): vector DB_VALUE | ||
* Note: | ||
* Accepts strings that are not null terminated. Don't call this unless | ||
* src is a string db_value. | ||
*/ | ||
static int | ||
tp_atovector (const DB_VALUE * src, DB_VALUE * result) | ||
{ | ||
const char *p = db_get_string (src); | ||
const char *end = p + db_get_string_size (src); | ||
int count = 0; | ||
const int number_buffer_size = 64; | ||
char number_buffer[number_buffer_size]; | ||
int buffer_idx; | ||
Comment on lines
+5014
to
+5016
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. object_domain.c 코드 전반에서 64라는 값을 버퍼의 최대 값으로 설정하는 함수들을 벤치마킹했습니다. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 64자로 지정하면 숫자 문자열을 63자까지만 처리하다는 의미죠 ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 네, 맞습니다. 검토 결과, 뒤에 '\0'을 넣을 공간을 확보하기 위해 |
||
const int max_vector_size = 2000; | ||
float float_array[max_vector_size]; | ||
Comment on lines
+5017
to
+5018
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. float 배열의 최대 크기를 2000 으로 설정했습니다. |
||
DB_SET *vec = NULL; | ||
DB_VALUE e_val; | ||
|
||
int error = db_string_to_vector(p, db_get_string_size(src), float_array, &count); | ||
if (error != NO_ERROR) { | ||
return ER_FAILED; | ||
} | ||
Comment on lines
+5022
to
+5025
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 기존의 긴 구현을 db_vector.cpp로 분리시켰습니다. |
||
|
||
// Create vector and populate it | ||
vec = db_vec_create (NULL, NULL, 0); | ||
if (vec == NULL) | ||
{ | ||
assert (er_errid () != NO_ERROR); | ||
return er_errid (); | ||
} | ||
|
||
db_make_vector (result, vec); | ||
for (int i = 0; i < count; ++i) | ||
{ | ||
db_make_float (&e_val, float_array[i]); | ||
if (db_seq_put (db_get_set (result), i, &e_val) != NO_ERROR) | ||
{ | ||
return ER_FAILED; | ||
} | ||
} | ||
|
||
return NO_ERROR; | ||
} | ||
|
||
/* | ||
* tp_atobi - Coerce a string to a bigint. | ||
* return: NO_ERROR or error code | ||
|
@@ -9125,6 +9176,24 @@ tp_value_cast_internal (const DB_VALUE * src, DB_VALUE * dest, const TP_DOMAIN * | |
} | ||
break; | ||
|
||
case DB_TYPE_VECTOR: | ||
switch (original_type) | ||
{ | ||
case DB_TYPE_CHAR: | ||
case DB_TYPE_VARCHAR: | ||
case DB_TYPE_NCHAR: | ||
case DB_TYPE_VARNCHAR: | ||
{ | ||
Comment on lines
+9182
to
+9186
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 이 부분에 대한 확신이 없습니다. 죄송합니다. 면밀히 리뷰 부탁드립니다. 현재 테스트 케이스는 DB_TYPE_CHAR 만으로도 통과하고 있습니다. 다른 테이블의 varchar 값을 vector로 변환하는 과정에서 나머지 3개의 case들이 필요할 것으로 추정하고 있습니다. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DB_TYPE_NCHAR와 DB_TYPE_VARNCHAR는 제거하는 것은 어떨까요 ? |
||
|
||
err = tp_atovector (src, target); | ||
break; | ||
|
||
} | ||
default: | ||
status = DOMAIN_INCOMPATIBLE; | ||
break; | ||
} | ||
break; | ||
case DB_TYPE_VOBJ: | ||
if (original_type == DB_TYPE_VOBJ) | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sa 모드에는 COMPAT_HEADERS 파일을 추가하는 부분이 없습니다.
따라서 db_vector.hpp를 추가하지 않았고, 그럼에도 정상 동작합니다.
사실 코드를 살펴보니 cs/CMakeLists.txt 파일에서도 COMPAT_HEADERS가 전혀 사용되지 않고 있기 때문에, COMPAT_HEADERS를 전부 삭제해도 괜찮은 것으로 분석했습니다.
다만 해당 이슈에서는 고려하지 않겠습니다.