Skip to content

Commit

Permalink
Add rdma support (#1903)
Browse files Browse the repository at this point in the history
Fixes #1895

Signed-off-by: vegetableysm <[email protected]>
Signed-off-by: Ye Cao <[email protected]>
  • Loading branch information
vegetableysm authored Jul 19, 2024
1 parent 1e02fce commit 82d7378
Show file tree
Hide file tree
Showing 37 changed files with 3,967 additions and 232 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ jobs:
protobuf-compiler-grpc \
python3-pip \
uuid-dev \
automake \
libtool \
librdmacm-dev \
libibverbs-dev \
libnuma-dev \
libnl-3-dev \
libnl-genl-3-dev \
wget
# install apache-arrow
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@
[submodule "modules/graph/thirdparty/powturbo"]
path = modules/graph/thirdparty/powturbo
url = https://github.com/powturbo/TurboPFor-Integer-Compression.git
[submodule "thirdparty/libfabric"]
path = thirdparty/libfabric
url = https://github.com/ofiwg/libfabric.git
66 changes: 66 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,57 @@ add_custom_target(vineyard_codegen_java)
include("cmake/GenerateVineyard.cmake")
include("cmake/GenerateVineyardJava.cmake")

if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
find_library(RDMACM_LIB rdmacm)
find_library(IBVERBS_LIB ibverbs)
find_library(NL3_LIB nl-3)
find_library(NL_GENL3_LIB nl-genl-3)
find_library(UUID_LIB uuid)
find_library(NUMA_LIB numa)
find_library(RT_LIB rt)

if(IBVERBS_LIB AND RDMACM_LIB AND NL3_LIB AND NL_GENL3_LIB AND UUID_LIB AND NUMA_LIB AND RT_LIB)
set(RDMA_LIBS ${RDMACM_LIB} ${IBVERBS_LIB} ${NL3_LIB} ${NL_GENL3_LIB} ${UUID_LIB} ${NUMA_LIB} ${RT_LIB})
set(FABRIC_MAKEFILE "${CMAKE_SOURCE_DIR}/thirdparty/libfabric/Makefile")
add_custom_command(
OUTPUT ${FABRIC_MAKEFILE}
COMMAND ./autogen.sh > /dev/null
COMMAND ./configure --disable-efa > /dev/null
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/thirdparty/libfabric
)
add_custom_target(
fabric_configure
DEPENDS ${FABRIC_MAKEFILE}
)
add_custom_target(
fabric ALL
COMMAND make CFLAGS+="-fPIC" -j$(nproc) > /dev/null
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${CMAKE_SOURCE_DIR}/thirdparty/libfabric/src/.libs/libfabric.a ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/libfabric.a
DEPENDS fabric_configure
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/thirdparty/libfabric
)

set(LIBFABRIC "${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/libfabric.a")
set(LIBFABRIC_VERSION_SCRIPT "${CMAKE_SOURCE_DIR}/src/common/rdma/libfabric.map")
install(DIRECTORY "${PROJECT_SOURCE_DIR}/thirdparty/libfabric/include"
DESTINATION ${CMAKE_INSTALL_PREFIX}/include/vineyard/contrib/libfabric
PATTERN "*.h"
)
include_directories(BEFORE SYSTEM
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/thirdparty/libfabric/include>
$<INSTALL_INTERFACE:include/vineyard/contrib>
)
list(APPEND VINEYARD_INSTALL_LIBS ${LIBFABRIC})

set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES
${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/libfabric.a)
else()
message("${RDMACM_LIB} ${IBVERBS_LIB} ${NL3_LIB} ${NL_GENL3_LIB} ${UUID_LIB} ${NUMA_LIB} ${RT_LIB}")
message(FATAL_ERROR "Missing rdma dependencies, please install them and retry: rdmacm, ibverbs, libnl-3, libnl-genl-3, libuuid, libnuma, librt")
endif()
endif()

# boost is only required by some components
if(BUILD_VINEYARD_SERVER OR BUILD_VINEYARD_IO OR BUILD_VINEYARD_GRAPH)
find_boost()
Expand Down Expand Up @@ -696,6 +747,7 @@ if(BUILD_VINEYARD_SERVER)
"src/common/memory/*.cc"
"src/common/util/*.cc"
"src/common/memory/gpu/*.cc"
"src/common/rdma/*.cc"
)
if(BUILD_VINEYARD_SERVER_REDIS)
list(APPEND SERVER_SRC_FILES "thirdparty/redis-plus-plus-shim/recipes/redlock.cpp")
Expand Down Expand Up @@ -749,6 +801,12 @@ if(BUILD_VINEYARD_SERVER)
target_link_libraries(vineyardd PRIVATE ${LIBUNWIND_LIBRARIES})
endif()

if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
target_link_libraries(vineyardd PRIVATE ${LIBFABRIC} ${RDMA_LIBS})
set_target_properties(vineyardd PROPERTIES LINK_FLAGS "-Wl,--version-script=${LIBFABRIC_VERSION_SCRIPT}")
add_dependencies(vineyardd fabric)
endif()

install_vineyard_target(vineyardd)
if(NOT BUILD_SHARED_LIBS)
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
Expand Down Expand Up @@ -797,6 +855,7 @@ if(BUILD_VINEYARD_CLIENT)
"src/common/memory/*.cc"
"src/common/util/*.cc"
"src/common/memory/gpu/*.cc"
"src/common/rdma/*.cc"
)
# the vineyard_client can only be a shared library, since the ObjectFactory
# is a singleton.
Expand All @@ -821,6 +880,12 @@ if(BUILD_VINEYARD_CLIENT)
target_link_libraries(vineyard_client PRIVATE ${LIBUNWIND_LIBRARIES})
endif()

if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
target_link_libraries(vineyard_client PRIVATE ${LIBFABRIC} ${RDMA_LIBS})
set_target_properties(vineyard_client PROPERTIES LINK_FLAGS "-Wl,--version-script=${LIBFABRIC_VERSION_SCRIPT}")
add_dependencies(vineyard_client fabric)
endif()

if(BUILD_VINEYARD_CLIENT_VERBOSE)
target_compile_options(vineyard_client PRIVATE -DWITH_VERBOSE)
endif()
Expand Down Expand Up @@ -881,6 +946,7 @@ macro(setup_pybind11_module target relpath)
target_add_link_options(${target} PRIVATE OPTIONS -Os)
endif()

file(RELATIVE_PATH RELATIVE_BUILD_PATH "${PROJECT_SOURCE_DIR}/python/vineyard" "${CMAKE_BINARY_DIR}/shared-lib")
if(UNIX AND NOT APPLE)
set_target_properties(${target} PROPERTIES
BUILD_WITH_INSTALL_RPATH TRUE
Expand Down
6 changes: 3 additions & 3 deletions modules/llm-cache/storage/file_storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ Status FileStorage::Update(
upper_bound += 1;
if (createFileSet.find(this->rootPath + pathList[i]) !=
createFileSet.end()) {
TouchFile(this->rootPath + pathList[i]);
VINEYARD_DISCARD(TouchFile(this->rootPath + pathList[i]));
}
} else {
break;
Expand Down Expand Up @@ -383,7 +383,7 @@ Status FileStorage::Update(
createFileSet.find(this->rootPath + pathList[i]) !=
createFileSet.end()) {
// Only this part is created.
TouchFile(this->rootPath + pathList[i]);
VINEYARD_DISCARD(TouchFile(this->rootPath + pathList[i]));
}
} else {
break;
Expand Down Expand Up @@ -787,7 +787,7 @@ Status FileStorage::GlobalGCFunc() {
VLOG(100) << "Global GC: " << path << " is dead!";
VLOG(100) << "Access time: " << GetTimestamp(accessTime);
VLOG(100) << "Now: " << GetTimestamp(nanoseconds_since_epoch);
Delete(path);
VINEYARD_DISCARD(Delete(path));
} else {
VLOG(100) << "Global GC: " << path << " is alive!";
VLOG(100) << "Access time: " << GetTimestamp(accessTime);
Expand Down
5 changes: 3 additions & 2 deletions modules/llm-cache/storage/local_file_storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ Status LocalFileStorage::TouchFile(const std::string& path) {
auto now_nano = std::chrono::duration_cast<std::chrono::nanoseconds>(
now.time_since_epoch())
.count();
struct timespec times[2] = {0};
struct timespec times[2] = {};
times[0].tv_sec = now_nano / SECOND_TO_NANOSECOND;
times[0].tv_nsec = now_nano % SECOND_TO_NANOSECOND;

Expand All @@ -219,7 +219,8 @@ Status LocalFileStorage::TouchFile(const std::string& path) {
if (stat(path.c_str(), &file_stat) == -1) {
return Status::IOError("Failed to get file stat: " + formatIOError(path));
}
times[1] = file_stat.st_mtim;
times[1].tv_sec = file_stat.st_mtime;
times[1].tv_nsec = file_stat.st_mtimespec.tv_nsec;
#else
times[1].tv_sec = UTIME_OMIT;
times[1].tv_nsec = UTIME_OMIT;
Expand Down
51 changes: 29 additions & 22 deletions python/client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,19 +67,21 @@ class ClientManager {

std::shared_ptr<ClientType> Connect(const std::string& username,
const std::string& password) {
return Connect("", RootSessionID(), username, password);
return Connect("", RootSessionID(), username, password, "");
}

std::shared_ptr<ClientType> Connect(std::string const& endpoint,
const SessionID session_id,
const std::string& username,
const std::string& password) {
return Connect(endpoint, RootSessionID(), username, password);
return Connect(endpoint, RootSessionID(), username, password, "");
}

std::shared_ptr<ClientType> Connect(std::string const& endpoint,
const SessionID session_id,
const std::string& username,
const std::string& password) {
const std::string& password,
const std::string& rdma_endpoint) {
std::lock_guard<std::mutex> guard{mtx_};
std::string endpoint_key = endpoint + ":" + SessionIDToString(session_id);
auto iter = client_set_.find(endpoint_key);
Expand All @@ -89,8 +91,8 @@ class ClientManager {
}
}
std::shared_ptr<ClientType> client = std::make_shared<ClientType>();
auto connect_status =
this->ConnectImpl(client, endpoint, session_id, username, password);
auto connect_status = this->ConnectImpl(client, endpoint, session_id,
username, password, rdma_endpoint);
if (PyErr_CheckSignals() != 0) {
// The method `Connect` will keep retrying, we need to propagate
// the Ctrl-C when during the C++ code run retries.
Expand Down Expand Up @@ -126,7 +128,8 @@ class ClientManager {
Status ConnectImpl(ClientPtrType& client, std::string const& endpoint = "",
const SessionID session_id = RootSessionID(),
const std::string& username = "",
const std::string& password = "") {
const std::string& password = "",
const std::string& rdma_endpoint = "") {
return endpoint.empty() ? client->Connect(username, password)
: client->Connect(endpoint, username, password);
}
Expand All @@ -138,10 +141,11 @@ class ClientManager {
Status ConnectImpl(ClientPtrType& client, std::string const& endpoint = "",
const SessionID session_id = RootSessionID(),
const std::string& username = "",
const std::string& password = "") {
return endpoint.empty()
? client->Connect(username, password)
: client->Connect(endpoint, session_id, username, password);
const std::string& password = "",
const std::string& rdma_endpoint = "") {
return endpoint.empty() ? client->Connect(username, password)
: client->Connect(endpoint, session_id, username,
password, rdma_endpoint);
}

std::mutex mtx_;
Expand Down Expand Up @@ -963,6 +967,7 @@ void bind_client(py::module& mod) {
.def_property_readonly("remote_instance_id",
&RPCClient::remote_instance_id,
doc::RPCClient_remote_instance_id)
.def_property_readonly("rdma_endpoint", &RPCClient::rdma_endpoint)
.def("__enter__", [](RPCClient* self) { return self; })
.def("__exit__", [](RPCClient* self, py::object, py::object, py::object) {
// DO NOTHING
Expand All @@ -982,49 +987,51 @@ void bind_client(py::module& mod) {
"_connect",
[](std::string const& host, const uint32_t port,
const SessionID session_id, const std::string& username,
const std::string& password) {
const std::string& password, const std::string& rdma_endpoint) {
std::string rpc_endpoint = host + ":" + std::to_string(port);
return py::cast(ClientManager<RPCClient>::GetManager()->Connect(
rpc_endpoint, session_id, username, password));
rpc_endpoint, session_id, username, password, rdma_endpoint));
},
"host"_a, "port"_a, py::kw_only(),
py::arg("session") = RootSessionID(), py::arg("username") = "",
py::arg("password") = "")
py::arg("password") = "", py::arg("rdma_endpoint") = "")
.def(
"_connect",
[](std::string const& host, std::string const& port,
const SessionID session_id, const std::string& username,
const std::string& password) {
const std::string& password, const std::string& rdma_endpoint) {
std::string rpc_endpoint = host + ":" + port;
return ClientManager<RPCClient>::GetManager()->Connect(
rpc_endpoint, session_id, username, password);
rpc_endpoint, session_id, username, password, rdma_endpoint);
},
"host"_a, "port"_a, py::kw_only(),
py::arg("session") = RootSessionID(), py::arg("username") = "",
py::arg("password") = "")
py::arg("password") = "", py::arg("rdma_endpoint") = "")
.def(
"_connect",
[](std::pair<std::string, uint32_t> const& endpoint,
const SessionID session_id, const std::string& username,
const std::string& password) {
const std::string& password, const std::string& rdma_endpoint) {
std::string rpc_endpoint =
endpoint.first + ":" + std::to_string(endpoint.second);
return ClientManager<RPCClient>::GetManager()->Connect(
rpc_endpoint, session_id, username, password);
rpc_endpoint, session_id, username, password, rdma_endpoint);
},
"endpoint"_a, py::kw_only(), py::arg("session") = RootSessionID(),
py::arg("username") = "", py::arg("password") = "")
py::arg("username") = "", py::arg("password") = "",
py::arg("rdma_endpoint") = "")
.def(
"_connect",
[](std::pair<std::string, std::string> const& endpoint,
const SessionID session_id, const std::string& username,
const std::string& password) {
const std::string& password, const std::string& rdma_endpoint) {
std::string rpc_endpoint = endpoint.first + ":" + endpoint.second;
return ClientManager<RPCClient>::GetManager()->Connect(
rpc_endpoint, session_id, username, password);
rpc_endpoint, session_id, username, password, rdma_endpoint);
},
"endpoint"_a, py::kw_only(), py::arg("session") = RootSessionID(),
py::arg("username") = "", py::arg("password") = "");
py::arg("username") = "", py::arg("password") = "",
py::arg("rdma_endpoint") = "");
} // NOLINT(readability/fn_size)

} // namespace vineyard
Loading

0 comments on commit 82d7378

Please sign in to comment.