Skip to content

Commit

Permalink
refactor: stream 应采用尽量基础的类型
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <[email protected]>
  • Loading branch information
YdrMaster committed Oct 17, 2023
1 parent 233023e commit dc326ad
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 20 deletions.
3 changes: 1 addition & 2 deletions src/03runtime/include/runtime/stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
#include <variant>

namespace refactor::runtime {
using Addresses = absl::InlinedVector<void *, 2>;
using Routine = std::function<void(runtime::Resources &, Addresses, Addresses)>;
using Routine = std::function<void(runtime::Resources &, void const **, void **)>;

struct Address {
std::variant<size_t, mem_manager::SharedForeignBlob> value;
Expand Down
11 changes: 7 additions & 4 deletions src/03runtime/src/stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,21 @@ namespace refactor::runtime {

void Stream::run() {
auto stack = _stack->ptr();
std::vector<void const *> inputs_;
std::vector<void *> outputs_;
for (auto [nodeIdx, inputs, outputs] : _internal.topology) {
auto const &routine = _internal.nodes[nodeIdx];
Addresses
inputs_(inputs.size()),
outputs_(outputs.size());
inputs_.clear();
outputs_.clear();
inputs_.reserve(inputs.size());
outputs_.reserve(outputs.size());
std::transform(inputs.begin(), inputs.end(),
inputs_.begin(),
[stack, this](auto i) { return _internal.edges[i](stack); });
std::transform(outputs.begin(), outputs.end(),
outputs_.begin(),
[stack, this](auto i) { return _internal.edges[i](stack); });
routine(_resources, std::move(inputs_), std::move(outputs_));
routine(_resources, inputs_.data(), outputs_.data());
}
}

Expand Down
1 change: 0 additions & 1 deletion src/04kernel/include/kernel/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <string_view>

namespace refactor::kernel {
using runtime::Addresses;
using runtime::Routine;

class Kernel {
Expand Down
15 changes: 6 additions & 9 deletions src/04kernel/src/kernels/batch_normalization/cpu_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,7 @@ namespace refactor::kernel {
dims = std::accumulate(shape.begin() + 2, shape.end(), 1u, std::multiplies<>()),
sn = c * dims,
sc = dims;
return [n, c, sn, sc, epsilon](
Resources &,
Addresses inputs,
Addresses outputs) {
return [n, c, sn, sc, epsilon](Resources &, void const **inputs, void **outputs) {
auto x = inputs[0],
scale = inputs[1],
bias = inputs[2],
Expand All @@ -65,10 +62,10 @@ namespace refactor::kernel {
dt mean, scale, bias;
};
std::vector<Channel> channels(c);
auto scale_ = reinterpret_cast<t1 *>(scale),
bias_ = reinterpret_cast<t1 *>(bias);
auto mean_ = reinterpret_cast<t2 *>(mean),
var_ = reinterpret_cast<t2 *>(var);
auto scale_ = reinterpret_cast<t1 const *>(scale),
bias_ = reinterpret_cast<t1 const *>(bias);
auto mean_ = reinterpret_cast<t2 const *>(mean),
var_ = reinterpret_cast<t2 const *>(var);
for (auto i : range0_(c)) {
channels[i] = {
static_cast<dt>(mean_[i]),
Expand All @@ -77,7 +74,7 @@ namespace refactor::kernel {
};
}
// Y = (X - input_mean) / sqrt(input_var + epsilon) * scale + B
auto x_ = reinterpret_cast<dt *>(x),
auto x_ = reinterpret_cast<dt const *>(x),
y_ = reinterpret_cast<dt *>(y);
for (auto in : range0_(n))
for (auto ic : range0_(c))
Expand Down
2 changes: 1 addition & 1 deletion src/04kernel/src/kernels/batch_normalization/cudnn_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace refactor::kernel::cudnn {
// nvcc at c++11 doesn't support real move capture
return [d = std::move(d),
param64 = dtParam == DT::F64,
epsilon = this->epsilon](Resources &res, Addresses inputs, Addresses outputs) {
epsilon = this->epsilon](Resources &res, void const **inputs, void **outputs) {
// fetch cudnn handle from resources
auto handle = res.fetchOrStore<CudnnContext>()->handle;
// name inputs and outputs
Expand Down
2 changes: 1 addition & 1 deletion src/04kernel/src/kernels/conv/cudnn_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace refactor::kernel::cudnn {
CUDNN_ASSERT(cudnnSetConvolution2dDescriptor(d->conv, pad[0], pad[1], stride[0], stride[1], dilation[0], dilation[1], CUDNN_CROSS_CORRELATION, cudnnDataType));

// nvcc at c++11 doesn't support real move capture
return [d_ = std::move(d)](Resources &res, Addresses inputs, Addresses outputs) {
return [d_ = std::move(d)](Resources &res, void const **inputs, void **outputs) {
// fetch cudnn handle from resources
auto handle = res.fetchOrStore<CudnnContext>()->handle;
auto const &workspace = *res.fetchOrStore<CudnnWorkspace>();
Expand Down
2 changes: 1 addition & 1 deletion src/04kernel/src/kernels/simple_binary/arthimetic11.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace refactor::kernel {

#define CASE_DT(OP, T) \
case DT::T: \
return [n = this->size](runtime::Resources &, Addresses inputs, Addresses outputs) { \
return [n = this->size](runtime::Resources &, void const **inputs, void **outputs) { \
using T_ = primitive_t<DT::T>::type; \
auto a = static_cast<T_ const *>(inputs[0]); \
auto b = static_cast<T_ const *>(inputs[1]); \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace refactor::kernel::cudnn {
CUDNN_ASSERT(cudnnSetTensorNdDescriptor(d->tensor, cudnnDataTypeConvert(dt), 1, &size, &stride));

// nvcc at c++11 doesn't support real move capture
return [d = std::move(d)](Resources &res, Addresses inputs, Addresses outputs) {
return [d = std::move(d)](Resources &res, void const **inputs, void **outputs) {
// fetch cudnn handle from resources
auto handle = res.fetchOrStore<CudnnContext>()->handle;
// name inputs and outputs
Expand Down

0 comments on commit dc326ad

Please sign in to comment.