-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add assign_pos\scatter operator and cpu kernel for moe.
- Loading branch information
1 parent
de18a1b
commit 5b33afd
Showing
17 changed files
with
565 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#ifndef KERNEL_MOE_INFO_H | ||
#define KERNEL_MOE_INFO_H | ||
|
||
#include "../tensor.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
struct AssignPosInfo { | ||
uint32_t top, expert_num; | ||
uint32_t elementSize; | ||
|
||
AssignPosInfo(uint32_t top, uint32_t expert_num, Tensor const &gate); | ||
}; | ||
|
||
struct ReorderInfo{ | ||
bool scatter; | ||
uint32_t top; | ||
uint32_t blockNum, blockSize; | ||
ReorderInfo(bool scatter, uint32_t top, TensorRefs inputs); | ||
}; | ||
|
||
}// namespace refactor::kernel | ||
|
||
#endif// KERNEL_SPLIT_INFO_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#ifndef KERNEL_MOE_H | ||
#define KERNEL_MOE_H | ||
|
||
#include "../collector.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
struct AssignPosCollector final : public InfoCollector { | ||
uint32_t topk,numExperts; | ||
constexpr AssignPosCollector(decltype(_target) target, uint32_t topk, uint32_t numExperts) noexcept | ||
: InfoCollector(target) ,topk(topk), numExperts(numExperts){} | ||
|
||
std::vector<KernelBox> | ||
filter(TensorRefs inputs, TensorRefs outputs) const final; | ||
}; | ||
|
||
struct ReorderCollector final : public InfoCollector { | ||
bool scatter; | ||
uint32_t topk; | ||
constexpr ReorderCollector(decltype(_target) target, bool scatter, uint32_t topk) noexcept | ||
: InfoCollector(target) ,scatter(scatter), topk(topk){} | ||
|
||
std::vector<KernelBox> | ||
filter(TensorRefs inputs, TensorRefs outputs) const final; | ||
}; | ||
|
||
}// namespace refactor::kernel | ||
|
||
#endif// KERNEL_SPLIT_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#include "kernel/attributes/moe_info.h" | ||
#include <numeric> | ||
|
||
namespace refactor::kernel { | ||
|
||
AssignPosInfo::AssignPosInfo(uint32_t top, uint32_t expert_num, Tensor const &gate):\ | ||
top(top), expert_num(expert_num),elementSize(gate.elementsSize()){} | ||
|
||
ReorderInfo::ReorderInfo(bool scatter, uint32_t top, TensorRefs inputs):\ | ||
scatter(scatter), top(top),blockNum(inputs[1].get().elementsSize()), blockSize(inputs[0].get().strides()[0]){} | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#include "kernel/collectors/moe.h" | ||
#include "../kernels/moe/cpu_kernel.hh" | ||
#include "kernel/attributes/moe_info.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
std::vector<KernelBox> | ||
AssignPosCollector::filter(TensorRefs inputs, TensorRefs outputs) const { | ||
AssignPosInfo info(topk, numExperts, inputs[0]); | ||
std::vector<KernelBox> ans; | ||
switch (_target) { | ||
case decltype(_target)::Cpu: | ||
if (auto ptr = AssignPosCpu::build(info); ptr) { | ||
ans.emplace_back(std::move(ptr)); | ||
} | ||
break; | ||
//todo :暂时用cpu的实现 | ||
case decltype(_target)::Nvidia: | ||
if (auto ptr = AssignPosCpu::build(info); ptr) { | ||
ans.emplace_back(std::move(ptr)); | ||
} | ||
break; | ||
default: | ||
UNREACHABLEX(void, "Unknown target"); | ||
} | ||
return ans; | ||
} | ||
|
||
std::vector<KernelBox> | ||
ReorderCollector::filter(TensorRefs inputs, TensorRefs outputs) const { | ||
ReorderInfo info(scatter, topk, inputs); | ||
std::vector<KernelBox> ans; | ||
switch (_target) { | ||
case decltype(_target)::Cpu: | ||
if (auto ptr = ReorderCpu::build(info); ptr) { | ||
ans.emplace_back(std::move(ptr)); | ||
} | ||
break; | ||
//todo :暂时用cpu的实现 | ||
case decltype(_target)::Nvidia: | ||
if (auto ptr = ReorderCpu::build(info); ptr) { | ||
ans.emplace_back(std::move(ptr)); | ||
} | ||
break; | ||
default: | ||
UNREACHABLEX(void, "Unknown target"); | ||
} | ||
return ans; | ||
} | ||
|
||
}// namespace refactor::kernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
#include "cpu_kernel.hh" | ||
#include <execution> | ||
#include <list> | ||
|
||
namespace refactor::kernel { | ||
|
||
AssignPosCpu::AssignPosCpu(AssignPosInfo info) noexcept | ||
: Kernel(), info(std::move(info)) {} | ||
|
||
auto AssignPosCpu::build(AssignPosInfo info) noexcept -> KernelBox { | ||
return std::make_unique<AssignPosCpu>(std::move(info)); | ||
} | ||
auto AssignPosCpu::typeId() noexcept -> size_t { | ||
static uint8_t ID = 1; | ||
return reinterpret_cast<size_t>(&ID); | ||
} | ||
|
||
auto AssignPosCpu::kernelTypeId() const noexcept -> size_t { | ||
return typeId(); | ||
} | ||
auto AssignPosCpu::description() const noexcept -> std::string_view { | ||
return "Performing AssignPos operation on generic cpu"; | ||
} | ||
|
||
auto AssignPosCpu::lower(Resources &) const noexcept -> RoutineWorkspace { | ||
using namespace runtime; | ||
return [info = this->info](Resources &, void *workspace, void const *const *inputs, void *const *outputs) { | ||
auto gate = reinterpret_cast<uint8_t const *>(inputs[0]); | ||
|
||
auto expert_cnt = reinterpret_cast<uint8_t*>(outputs[0]);//T | ||
auto pos = reinterpret_cast<uint8_t*>(outputs[1]); | ||
std::memset(expert_cnt, 0, info.expert_num); | ||
for (size_t i = 0; i < info.elementSize; i ++){ | ||
ASSERT (gate[i] >= 0 && gate[i] < info.expert_num, "gate exceeds expert idx scope!"); | ||
expert_cnt[gate[i]] ++; | ||
} | ||
std::vector<uint8_t> expert_accumlate; | ||
expert_accumlate.assign(info.expert_num, 0); | ||
for (size_t i=0; i<expert_accumlate.size(); ++i){ | ||
expert_accumlate[i] = (i==0) ? expert_cnt[i] : (expert_accumlate[i-1] + expert_cnt[i]); | ||
} | ||
|
||
for (size_t i=0; i< info.elementSize; ++i){ | ||
pos[--expert_accumlate[gate[i]]] = i; | ||
} | ||
}; | ||
} | ||
|
||
|
||
ReorderCpu::ReorderCpu(ReorderInfo info) noexcept | ||
: Kernel(), info(std::move(info)) {} | ||
|
||
auto ReorderCpu::build(ReorderInfo info) noexcept -> KernelBox { | ||
return std::make_unique<ReorderCpu>(std::move(info)); | ||
} | ||
auto ReorderCpu::typeId() noexcept -> size_t { | ||
static uint8_t ID = 1; | ||
return reinterpret_cast<size_t>(&ID); | ||
} | ||
|
||
auto ReorderCpu::kernelTypeId() const noexcept -> size_t { | ||
return typeId(); | ||
} | ||
auto ReorderCpu::description() const noexcept -> std::string_view { | ||
return "Performing scatter operation on generic cpu"; | ||
} | ||
|
||
auto ReorderCpu::lower(Resources &) const noexcept -> RoutineWorkspace { | ||
using namespace runtime; | ||
return [info = this->info](Resources &, void *workspace, void const *const *inputs, void *const *outputs) { | ||
auto input = reinterpret_cast<float const *>(inputs[0]); | ||
auto pos = reinterpret_cast<uint32_t const *>(inputs[1]); | ||
auto dstVal = reinterpret_cast<float*>(outputs[0]);//T | ||
|
||
for(size_t i = 0; i<info.blockNum; i++){ | ||
if (info.scatter) | ||
std::copy_n(input + (pos[i]/info.top) * info.blockSize, info.blockSize, dstVal + i*info.blockSize); | ||
else | ||
std::copy_n(input + i*info.blockSize, info.blockSize, dstVal + pos[i] * info.blockSize); | ||
} | ||
}; | ||
} | ||
}// namespace refactor::kernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#ifndef KERNEL_MOE_CPU_KERNEL_HH | ||
#define KERNEL_MOE_CPU_KERNEL_HH | ||
|
||
#include "kernel/attributes/moe_info.h" | ||
#include "kernel/kernel.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
struct AssignPosCpu final : public Kernel { | ||
AssignPosInfo info; | ||
explicit AssignPosCpu(AssignPosInfo info) noexcept; | ||
|
||
static KernelBox build(AssignPosInfo info) noexcept; | ||
static size_t typeId() noexcept; | ||
|
||
size_t kernelTypeId() const noexcept final; | ||
std::string_view description() const noexcept final; | ||
RoutineWorkspace lower(Resources &) const noexcept final; | ||
}; | ||
|
||
struct ReorderCpu final : public Kernel { | ||
ReorderInfo info; | ||
explicit ReorderCpu(ReorderInfo info) noexcept; | ||
|
||
static KernelBox build(ReorderInfo info) noexcept; | ||
static size_t typeId() noexcept; | ||
|
||
size_t kernelTypeId() const noexcept final; | ||
std::string_view description() const noexcept final; | ||
RoutineWorkspace lower(Resources &) const noexcept final; | ||
}; | ||
|
||
}// namespace refactor::kernel | ||
|
||
#endif// KERNEL_SPLIT_CPU_KERNEL_HH |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#include "../../../src/kernels/moe/cpu_kernel.hh" | ||
#include <gtest/gtest.h> | ||
#include <numeric> | ||
|
||
using namespace refactor; | ||
using namespace kernel; | ||
|
||
TEST(kernel, AssignPosCpu) { | ||
// build routine | ||
//auto inputTensor = Tensor::share(DataType::F32, Shape{4, 1024}); | ||
auto gate = Tensor::share(DataType::U32, Shape{8, 2}); | ||
auto expert_cnt = Tensor::share(DataType::U32, Shape{4}); | ||
auto pos = Tensor::share(DataType::U32, Shape{16}); | ||
|
||
auto kernel = AssignPosCpu::build(AssignPosInfo(2,4, *gate)); | ||
ASSERT_TRUE(kernel); | ||
auto res = runtime::Resources(); | ||
auto routine = kernel->lower(res).routine; | ||
// put input data | ||
std::vector<uint8_t> ins = {3,2, 0,1, 2,1, 1,3, 2,0, 1,3, 1,0, 1,2}; | ||
std::vector<uint8_t> out0(expert_cnt->elementsSize()); | ||
std::vector<uint8_t> out1(pos->elementsSize()); | ||
|
||
// inference | ||
void const *inputs[]{ins.data()}; | ||
void *outputs[]{out0.data(), out1.data()}; | ||
routine(res, nullptr, inputs, outputs); | ||
|
||
// check | ||
std::vector<uint32_t> expectExpertCnt = {3,6,4,3}; | ||
std::vector<uint32_t> expectPos = {13,9,2, 14,12,10,6,5,3, 15,8,4,1, 11,7,0}; | ||
//std::for_each(out0.begin(), out0.end(),[](const float &val){std::cout<<val<<" ";}); | ||
|
||
for(size_t i=0;i< expectPos.size(); ++i){ | ||
EXPECT_EQ(expectPos[i], out1[i]); | ||
} | ||
for(size_t i=0;i< expectExpertCnt.size(); ++i){ | ||
EXPECT_EQ(expectExpertCnt[i], out0[i]); | ||
} | ||
} | ||
|
||
TEST(kernel, ReorderScatterCpu) { | ||
// build routine | ||
const int seq = 8, hid = 4, top = 2; | ||
auto input = Tensor::share(DataType::U32, Shape{seq, hid}); | ||
auto pos = Tensor::share(DataType::U32, Shape{seq * top}); | ||
std::vector<Arc<Tensor>> inputTensors{input, pos}; | ||
TensorRefs inputs_; | ||
inputs_.reserve(inputTensors.size()); | ||
std::transform(inputTensors.begin(), inputTensors.end(), | ||
std::back_inserter(inputs_), | ||
[](auto const &it) { return std::cref(*it); }); | ||
|
||
auto kernel = ReorderCpu::build(ReorderInfo(true, top, inputs_)); | ||
ASSERT_TRUE(kernel); | ||
auto res = runtime::Resources(); | ||
auto routine = kernel->lower(res).routine; | ||
// put input data | ||
std::vector<float> ins0(input->elementsSize()); | ||
std::iota(ins0.begin(), ins0.end(), 0); | ||
std::vector<uint32_t> ins1 = {13,9,2, 14,12,10,6,5,3, 15,8,4,1, 11,7,0}; | ||
std::vector<float> out(input->elementsSize() * top); | ||
|
||
// inference | ||
void const *inputs[]{ins0.data(), ins1.data()}; | ||
void *outputs[]{out.data()}; | ||
routine(res, nullptr, inputs, outputs); | ||
std::for_each(out.begin(), out.end(),[](const float &val){std::cout<<val<<" ";}); | ||
// check | ||
for(size_t i=0;i< seq; ++i){ | ||
int row = ins1[i]/top; | ||
for(size_t j = 0; j<hid; j++) | ||
EXPECT_EQ(ins0[row *hid + j], out[i*hid + j]); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#ifndef COMPUTATION_MOE_H | ||
#define COMPUTATION_MOE_H | ||
|
||
#include "../operator.h" | ||
|
||
namespace refactor::computation { | ||
|
||
struct AssignPos final : public Operator { | ||
uint32_t topk,numExperts; | ||
|
||
constexpr explicit AssignPos(uint32_t topk, uint32_t numExperts) noexcept : Operator(), | ||
topk(topk), numExperts(numExperts){} | ||
|
||
static size_t typeId() noexcept; | ||
size_t opTypeId() const noexcept final; | ||
std::string_view name() const noexcept final; | ||
kernel::CollectorBox candidateKernels(Target) const final; | ||
std::string serialize() const noexcept final; | ||
}; | ||
|
||
struct Reorder final : public Operator { | ||
bool scatter; | ||
uint32_t topk; | ||
|
||
constexpr explicit Reorder(bool scatter, uint32_t topk) noexcept : Operator(), | ||
scatter(scatter), topk(topk){} | ||
|
||
static size_t typeId() noexcept; | ||
size_t opTypeId() const noexcept final; | ||
std::string_view name() const noexcept final; | ||
kernel::CollectorBox candidateKernels(Target) const final; | ||
std::string serialize() const noexcept final; | ||
}; | ||
|
||
}// namespace refactor::computation | ||
|
||
#endif// COMPUTATION_RMS_NORMALIZATION_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#include "computation/operators/moe.h" | ||
#include "kernel/collectors/moe.h" | ||
|
||
namespace refactor::computation { | ||
|
||
auto AssignPos::typeId() noexcept -> size_t { | ||
static uint8_t ID = 1; | ||
return reinterpret_cast<size_t>(&ID); | ||
} | ||
auto AssignPos::opTypeId() const noexcept -> size_t { return typeId(); } | ||
auto AssignPos::name() const noexcept -> std::string_view { return "moe::AssignPos"; } | ||
auto AssignPos::candidateKernels(Target target) const -> kernel::CollectorBox { | ||
using Collector_ = kernel::AssignPosCollector; | ||
return std::make_unique<Collector_>(target, topk, numExperts); | ||
} | ||
auto AssignPos::serialize() const noexcept -> std::string { | ||
return "moe::AssignPos()"; | ||
} | ||
|
||
auto Reorder::typeId() noexcept -> size_t { | ||
static uint8_t ID = 1; | ||
return reinterpret_cast<size_t>(&ID); | ||
} | ||
auto Reorder::opTypeId() const noexcept -> size_t { return typeId(); } | ||
auto Reorder::name() const noexcept -> std::string_view { return "moe::Reorder"; } | ||
auto Reorder::candidateKernels(Target target) const -> kernel::CollectorBox { | ||
using Collector_ = kernel::ReorderCollector; | ||
return std::make_unique<Collector_>(target, scatter, topk); | ||
} | ||
auto Reorder::serialize() const noexcept -> std::string { | ||
return "moe::Reorder()"; | ||
} | ||
|
||
}// namespace refactor::computation |
Oops, something went wrong.