Skip to content
This repository has been archived by the owner on Apr 28, 2023. It is now read-only.

Commit

Permalink
Add support for strided tensors
Browse files Browse the repository at this point in the history
This commit is to start support for strided tensors. I made changes
to percolate a vector in TensorInfo down to emitCudaKernel to allow
codegen to cast strided tensors. This required changes to an unit test
to expect the correct cast.
  • Loading branch information
Protonu Basu committed Jun 8, 2018
1 parent cc4b1eb commit 2f842fb
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 15 deletions.
Binary file added .test_tc_mapper_output.txt.swp
Binary file not shown.
5 changes: 4 additions & 1 deletion tc/core/cuda/cuda_tc_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,16 @@ CudaCompilationResult CudaBackend::compileWithTcMapper(
auto parameters = mappedScop->scop().getParameterValues();
auto specializedName = specializeKernelName(tcName, parameters);

auto inputsInfo = makeTensorInfoVector(inputs);

// This updates the launch bounds with the actual result from compilation
// with tightening of launch_bounds. What you get is not necessarily what
// you asked for, the autotuner should adapt to that.
std::string source;
Grid grid;
Block block;
std::tie(source, grid, block) = mappedScop->codegen(specializedName);
std::tie(source, grid, block) =
mappedScop->codegen(specializedName, inputsInfo);
LOG_IF(INFO, FLAGS_dump_cuda) << "generatedCuda: " << source << "\n"
<< "grid: " << grid << " block: " << block;

Expand Down
26 changes: 19 additions & 7 deletions tc/core/polyhedral/cuda/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,23 @@ void emitTensorView(
stringstream& ss,
Halide::OutputImageParam p,
const map<string, Halide::Expr>& paramValues,
bool constInput = false) {
bool constInput = false,
const TensorInfo* tinfo = NULL) {
WS ws;
stringstream ssViewType;
for (int i = 1; i < p.dimensions(); ++i) { // Skip the outermost dimension
Halide::Expr extent = p.parameter().extent_constraint(i);
extent = Halide::Internal::substitute(paramValues, extent);
CHECK(extent.defined())
<< "Undefined extent on input/output tensor. Forward bounds inference should have set these\n";
ssViewType << "[" << extent << "]";
// TODO: Handle non-unit stride in the innermost dimension
if (tinfo && tinfo->strides.size() == p.dimensions() &&
tinfo->strides[p.dimensions() - 1] == 1 &&
tinfo->strides[i - 1] != (tinfo->shape[i] * tinfo->strides[i])) {
ssViewType << "[" << tinfo->strides[i - 1] << "]";
} else {
ssViewType << "[" << extent << "]";
}
}
ss << ws.tab();
ss << (constInput ? "const " : "") << p.type() << " (*" << p.name() << ")"
Expand All @@ -216,9 +224,12 @@ void emitTensorViews(
void emitTensorViews(
stringstream& ss,
const vector<Halide::ImageParam>& params,
const map<string, Halide::Expr>& paramValues) {
for (auto p : params) {
emitTensorView(ss, p, paramValues, true);
const map<string, Halide::Expr>& paramValues,
const std::vector<TensorInfo>& inputsInfo = std::vector<TensorInfo>{}) {
for (size_t i = 0; i < params.size(); ++i) {
inputsInfo.size()
? emitTensorView(ss, params[i], paramValues, true, &inputsInfo[i])
: emitTensorView(ss, params[i], paramValues, true);
}
}

Expand Down Expand Up @@ -738,7 +749,8 @@ std::unordered_set<isl::id, isl::IslIdIslHash> gatherReadOnlySet(

string emitCudaKernel(
const std::string& specializedName,
const MappedScop& mscop) {
const MappedScop& mscop,
const std::vector<TensorInfo>& inputsInfo) {
// Expecting a schedule with domain root and context first child.
CHECK(mscop.schedule()->elemAs<detail::ScheduleTreeElemDomain>());
CHECK(
Expand All @@ -755,7 +767,7 @@ string emitCudaKernel(
emitKernelSignature(ss, specializedName, scop);
emitThreadIdInit(ss, mscop);
emitTensorViews(ss, scop.halide.outputs, paramValues);
emitTensorViews(ss, scop.halide.inputs, paramValues);
emitTensorViews(ss, scop.halide.inputs, paramValues, inputsInfo);
emitTmpDecl(ss, scop);
emitPromotedArrayViewsHalide(ss, scop);
NodeInfoMapType nodeInfoMap;
Expand Down
3 changes: 2 additions & 1 deletion tc/core/polyhedral/cuda/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ struct CodegenStatementContext : CodegenContext {

std::string emitCudaKernel(
const std::string& specializedName,
const MappedScop& scop);
const MappedScop& scop,
const std::vector<TensorInfo>& inputsInfo = std::vector<TensorInfo>{});

} // namespace polyhedral
} // namespace tc
7 changes: 4 additions & 3 deletions tc/core/polyhedral/cuda/mapped_scop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,8 @@ std::unique_ptr<MappedScop> makeSpecializedMappedScop(
// the context of the original scop as top-level
// context node in schedule tree.
std::tuple<std::string, tc::Grid, tc::Block> MappedScop::codegen(
const std::string& specializedName) const {
const std::string& specializedName,
const std::vector<TensorInfo>& inputsInfo) const {
validate(schedule());

auto mappedScopForCodegen = makeSpecializedMappedScop(*this);
Expand All @@ -927,8 +928,8 @@ std::tuple<std::string, tc::Grid, tc::Block> MappedScop::codegen(
code << code::cuda::cubBlockReduce;
}
code << "extern \"C\" {" << std::endl
<< emitCudaKernel(specializedName, *mappedScopForCodegen) << "}"
<< std::endl;
<< emitCudaKernel(specializedName, *mappedScopForCodegen, inputsInfo)
<< "}" << std::endl;

return std::make_tuple(
code.str(),
Expand Down
4 changes: 3 additions & 1 deletion tc/core/polyhedral/cuda/mapped_scop.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ class MappedScop {
// Generate CUDA code at the current state of transformation provided a
// name for the generated function.
std::tuple<std::string, tc::Grid, tc::Block> codegen(
const std::string& specializedName) const;
const std::string& specializedName,
const std::vector<TensorInfo>& inputsInfo =
std::vector<TensorInfo>{}) const;

// Accessors..
// Const accessor to schedule of underlying Scop.
Expand Down
4 changes: 2 additions & 2 deletions test/cuda/test_tc_mapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,8 @@ def tensoraddstrided(float(N, M) I0_view, float(N, M) I1_view) -> (O) {
auto res = Check(TC, name, options, inputs, checkFun);
// This test should be modified when strided tensors are handled
std::string expected =
"const float32 (*I0_view)[64] = "
"reinterpret_cast<const float32 (*)[64]>(pI0_view)";
"const float32 (*I0_view)[128] = "
"reinterpret_cast<const float32 (*)[128]>(pI0_view)";
ASSERT_NE(std::string::npos, res.second.find(expected))
<< "In resulting code:\n"
<< res.second << "\nfound unexpected: " << expected;
Expand Down

0 comments on commit 2f842fb

Please sign in to comment.