Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: 开始实现 attention #89

Draft
wants to merge 17 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,20 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

# Download with:
#
# mkdir -p cmake
# wget -O cmake/CPM.cmake https://github.com/cpm-cmake/CPM.cmake/releases/latest/download/get_cpm.cmake
include(cmake/CPM.cmake)

if(USE_CUDA)
CPMAddPackage(NAME CCCL SOURCE_DIR ${CMAKE_SOURCE_DIR}/3rd-party/cccl)

add_compile_definitions(USE_CUDA)
enable_language(CUDA)
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 80)
set(CMAKE_CUDA_ARCHITECTURES native)
endif()
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 17)
Expand Down Expand Up @@ -45,7 +53,7 @@ endif()
if (USE_BANG)
add_compile_definitions(USE_BANG)
include_directories(src/kernels/mlu/include)

# Neuware Evironment
if ((NOT DEFINED NEUWARE_HOME) AND (NOT DEFINED ENV{NEUWARE_HOME}))
message(FATAL_ERROR "NEUWARE_HOME is not defined from cmake or env")
Expand All @@ -55,14 +63,14 @@ if (USE_BANG)
set(NEUWARE_HOME $ENV{NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development")
endif()
message(STATUS "NEUWARE_HOME: ${NEUWARE_HOME}")

# cnrt cndrv cnnl
include_directories("${NEUWARE_HOME}/include")
find_library(CAMBRICON_CNNL libcnnl.so "${NEUWARE_HOME}/lib64")
find_library(CAMBRICON_CNRT libcnrt.so "${NEUWARE_HOME}/lib64")
find_library(CAMBRICON_CNDRV libcndrv.so "${NEUWARE_HOME}/lib64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++ -Wall")

if ((NOT DEFINED TARGET_CPU_ARCH) AND (NOT DEFINED ENV{TARGET_CPU_ARCH}))
execute_process(COMMAND uname -m OUTPUT_VARIABLE _uname_m OUTPUT_STRIP_TRAILING_WHITESPACE)
set(TARGET_CPU_ARCH "${_uname_m}" CACHE STRING "Target CPU ARCH")
Expand Down
24 changes: 24 additions & 0 deletions cmake/CPM.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# SPDX-License-Identifier: MIT
#
# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors

set(CPM_DOWNLOAD_VERSION 0.38.7)
set(CPM_HASH_SUM "83e5eb71b2bbb8b1f2ad38f1950287a057624e385c238f6087f94cdfc44af9c5")

if(CPM_SOURCE_CACHE)
set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
elseif(DEFINED ENV{CPM_SOURCE_CACHE})
set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
else()
set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
endif()

# Expand relative path. This is important if the provided path contains a tilde (~)
get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)

file(DOWNLOAD
https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
)

include(${CPM_DOWNLOAD_LOCATION})
6 changes: 6 additions & 0 deletions src/02hardware/include/hardware/devices/nvidia.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@

#include "../device.h"

#define CUDA_ASSERT(STATUS) \
if (auto status = (STATUS); status != cudaSuccess) { \
RUNTIME_ERROR(fmt::format("cuda failed on \"" #STATUS "\" with \"{}\" ({})", \
cudaGetErrorString(status), (int) status)); \
}

namespace refactor::hardware {

class Nvidia final : public Device {
Expand Down
6 changes: 0 additions & 6 deletions src/02hardware/src/devices/nvidia/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@
#ifdef USE_CUDA
#include "memory.hh"
#include <cuda_runtime.h>

#define CUDA_ASSERT(STATUS) \
if (auto status = (STATUS); status != cudaSuccess) { \
RUNTIME_ERROR(fmt::format("cuda failed on \"" #STATUS "\" with \"{}\" ({})", \
cudaGetErrorString(status), (int) status)); \
}
#endif

namespace refactor::hardware {
Expand Down
8 changes: 1 addition & 7 deletions src/02hardware/src/devices/nvidia/memory.cc
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
#ifdef USE_CUDA

#include "memory.hh"
#include "common.h"
#include "hardware/devices/nvidia.h"
#include <cuda_runtime.h>

#define CUDA_ASSERT(STATUS) \
if (auto status = (STATUS); status != cudaSuccess) { \
RUNTIME_ERROR(fmt::format("cuda failed on \"" #STATUS "\" with \"{}\" ({})", \
cudaGetErrorString(status), (int) status)); \
}

namespace refactor::hardware {
using M = NvidiaMemory;

Expand Down
3 changes: 2 additions & 1 deletion src/04kernel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ if(USE_CUDA)
# nvrtc for cuda kernel compile
# cublas for matmul
# cudnn for conv and others
target_link_libraries(kernel PUBLIC cuda nvrtc cublas cublasLt cudnn kernel_cuda)
target_link_libraries(kernel PUBLIC cuda kernel_cuda)
target_link_libraries(kernel PRIVATE nvrtc cublas cublasLt cudnn)
target_include_directories(kernel PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
find_package(NCCL REQUIRED)
Expand Down
2 changes: 1 addition & 1 deletion src/04kernel/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ project(kernel_cuda)
file(GLOB_RECURSE KERNEL_CUDA_SUB_SRC src/*.cu)

add_library(kernel_cuda STATIC ${KERNEL_CUDA_SUB_SRC})
target_link_libraries(kernel_cuda PUBLIC common)
target_link_libraries(kernel_cuda PUBLIC common CCCL::CCCL)
target_include_directories(kernel_cuda PUBLIC include)

file(GLOB_RECURSE KERNEL_CUDA_TEST test/*.cu)
Expand Down
20 changes: 20 additions & 0 deletions src/04kernel/include/kernel/attributes/attention_info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef KERNEL_ATTENTION_INFO_H
#define KERNEL_ATTENTION_INFO_H

#include "../tensor.h"

namespace refactor::kernel {

struct AttentionInfo {
DataType dataType;
dim_t batch, nHead, nKVHead, seqLen, headDim, cacheLen;
bool concatCache, resetCache;

dim_t attLen(dim_t pastSeqLen) const noexcept;
size_t attSize(dim_t pastSeqLen) const noexcept;
size_t maxAttSize() const noexcept;
};

}// namespace refactor::kernel

#endif// KERNEL_ATTENTION_INFO_H
3 changes: 1 addition & 2 deletions src/04kernel/include/kernel/collectors/attention.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
namespace refactor::kernel {

struct AttentionCollector final : public InfoCollector {
dim_t maxSeqLen;

AttentionCollector(decltype(_target), decltype(maxSeqLen)) noexcept;
AttentionCollector(decltype(_target)) noexcept;

std::vector<KernelBox>
filter(TensorRefs inputs, TensorRefs outputs) const final;
Expand Down
17 changes: 17 additions & 0 deletions src/04kernel/src/attributes/attention_info.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "kernel/attributes/attention_info.h"

namespace refactor::kernel {

dim_t AttentionInfo::attLen(dim_t pastSeqLen) const noexcept {
return pastSeqLen + seqLen;
}

size_t AttentionInfo::attSize(dim_t pastSeqLen) const noexcept {
return batch * nHead * seqLen * attLen(pastSeqLen) * dataType.size();
}

size_t AttentionInfo::maxAttSize() const noexcept {
return batch * nHead * seqLen * (cacheLen ? cacheLen : seqLen) * dataType.size();
}

}// namespace refactor::kernel
55 changes: 37 additions & 18 deletions src/04kernel/src/collectors/attention.cc
Original file line number Diff line number Diff line change
@@ -1,38 +1,57 @@
#include "kernel/collectors/attention.h"
#include "kernel/attributes/attention_info.h"
// #include "../kernels/attention/cpu_kernel.hh"
#include "../kernels/attention/cuda_kernel.hh"

namespace refactor::kernel {

AttentionCollector::AttentionCollector(
decltype(_target) target,
decltype(maxSeqLen) maxSeqLen_) noexcept
: InfoCollector(target),
maxSeqLen(maxSeqLen_) {}
decltype(_target) target) noexcept
: InfoCollector(target) {}

std::vector<KernelBox>
AttentionCollector::filter(TensorRefs inputs, TensorRefs outputs) const {
auto const &query = inputs[0].get();
auto const &key = inputs[1].get();
auto pastSeqLen = inputs.size() == 3 ? 0 : *inputs[2].get().data->get<int64_t>();
auto cacheLen = outputs.size() == 1 ? 0 : outputs[1].get().shape[2];

std::vector<KernelBox> ans;
AttentionInfo info{
.dataType = query.dataType,
.batch = query.shape[0],
.nHead = query.shape[1],
.nKVHead = key.shape[1],
.seqLen = query.shape[2],
.headDim = query.shape[3],
.cacheLen = 0,
.concatCache = false,
.resetCache = false,
};
switch (outputs.size()) {
case 1:
// no kv cache
ASSERT(inputs.size() == 3, "");
break;
case 3:
switch (inputs.size()) {
case 6:
info.resetCache = true;
case 4:
info.concatCache = true;
case 3:
info.cacheLen = outputs[1].get().shape[2];
break;
default:
UNREACHABLE();
}
break;
default:
UNREACHABLE();
}

std ::vector<KernelBox> ans;
switch (_target) {
case decltype(_target)::Cpu:
break;
case decltype(_target)::Nvidia: {
decltype(AttentionCuda::info) info{
.dataType = query.dataType,
.batch = query.shape[0],
.nHead = query.shape[1],
.nKVHead = key.shape[1],
.pastSeqLen = static_cast<dim_t>(pastSeqLen),
.seqLen = query.shape[2],
.cacheLen = cacheLen,
.headDim = query.shape[3],
.resetCache = false,
};
if (auto ptr = AttentionCuda::build(info); ptr) {
ans.emplace_back(std::move(ptr));
}
Expand Down
Loading
Loading