-
Notifications
You must be signed in to change notification settings - Fork 0
/
tvm_demo_cuda.cpp
137 lines (115 loc) · 4.49 KB
/
tvm_demo_cuda.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include <dlpack/dlpack.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/registry.h>
#include <opencv2/opencv.hpp>
#include <cstdio>
#include <vector>
using namespace std;
bool pairCompare(const std::pair<float, int>& lhs, const std::pair<float, int>& rhs)
{
return lhs.first > rhs.first;
}
std::vector<int> argmax(const std::vector<float>& v, int topK)
{
std::vector<std::pair<float, int>> pairs(v.size());
for (size_t i = 0; i < v.size(); ++i)
pairs[i] = std::make_pair(v[i], i);
std::partial_sort(pairs.begin(), pairs.begin() + topK, pairs.end(), pairCompare);
std::vector<int> result;
result.reserve(topK);
for (int i = 0; i < topK; ++i)
result.push_back(pairs[i].second);
return result;
}
int main(void) {
std::cout<<"Hello, TVM(CUDA)"<<std::endl;
// load the ResNet18 library
DLDevice dev_cuda{kDLCUDA, 0};
DLDevice dev_cpu{kDLCPU, 0};
tvm::runtime::Module mod_factory = tvm::runtime::Module::LoadFromFile("modcuda.so");
// create the ResNet18 module
tvm::runtime::Module resnet18_mod = mod_factory.GetFunction("default")(dev_cuda);
tvm::runtime::PackedFunc set_input = resnet18_mod.GetFunction("set_input");
tvm::runtime::PackedFunc get_output = resnet18_mod.GetFunction("get_output");
tvm::runtime::PackedFunc run = resnet18_mod.GetFunction("run");
// Use the C++ API
// Replace the input size and data type according to your ResNet18 model
tvm::runtime::NDArray input_cpu = tvm::runtime::NDArray::Empty({32, 3, 224, 224}, DLDataType{kDLFloat, 32, 1}, dev_cpu);
tvm::runtime::NDArray output_cpu = tvm::runtime::NDArray::Empty({32, 1000}, DLDataType{kDLFloat, 32, 1}, dev_cpu);
tvm::runtime::NDArray input_cuda = tvm::runtime::NDArray::Empty({32, 3, 224, 224}, DLDataType{kDLFloat, 32, 1}, dev_cuda);
tvm::runtime::NDArray output_cuda = tvm::runtime::NDArray::Empty({32, 1000}, DLDataType{kDLFloat, 32, 1}, dev_cuda);
// Set input data (replace with your input data preparation logic)
// read image
cv::Mat image = cv::imread("cat.jpg");
cv::resize(image, image, cv::Size(224, 224));
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
vector<float> mean{0.485, 0.456, 0.406}, std{0.229, 0.224, 0.225};
// copy
float* input_data = static_cast<float*>(input_cpu->data);
std::vector<cv::Mat> channels(3);
cv::split(image, channels);
for(int b = 0; b<32;b++){
for(int i=0;i<3;i++){
for(int j=0;j<224;j++){
for(int k=0;k<224;k++){
float point = static_cast<float>(channels[i].at<uchar>(j, k));
input_data[b*3*224*224 + i*224*224 + j*224 + k] = (point / 255.0 - mean[i]) / std[i];
}
}
}
}
input_cpu.CopyTo(input_cuda);
// Set the input
set_input("input", input_cuda);
// Run the ResNet18 model
run();
// Get the output
get_output(0, output_cuda);
output_cuda.CopyTo(output_cpu);
// 打印Tips
printf("******Tips:\n");
string tips = string("280 n02120505 狐狸, grey fox, gray fox, Urocyon cinereoargenteus\n")+
"281 n02123045 猫, tabby, tabby cat\n"+
"282 n02123159 猫, tiger cat\n"+
"283 n02123394 猫, Persian cat\n"+
"284 n02123597 猫, Siamese cat, Siamese\n"+
"285 n02124075 猫, Egyptian cat\n"+
"286 n02125311 猫, cougar, puma, catamount, mountain lion, painter, panther, Felis concolor\n"+
"287 n02127052 猫, lynx, catamount\n"+
"288 n02128385 豹, leopard, Panthera pardus\n"+
"289 n02128757 豹, snow leopard, ounce, Panthera uncia\n"+
"290 n02128925 豹, jaguar, panther, Panthera onca, Felis onca\n";
std::cout<<tips;
// Add your post-processing logic here
float* output_data = static_cast<float*>(output_cpu->data);
for(int b = 0; b<32; b++){
float sum = 0;
std::vector<float> probs(1000);
// 累加
for(int idx=0;idx<1000;idx++){
probs[idx] = exp(output_data[b * 1000 + idx]);
sum += probs[idx];
}
for(int idx=0;idx<1000;idx++){
probs[idx] /= sum;
}
// 找出最大的前5个
std::vector<int> topK = argmax(probs, 5);
if(b == 0 || b==1){
printf("******Top 5:\n");
for(int idx=0;idx<5;idx++){
printf("Batch %d: %d, %f\n", b, topK[idx], probs[topK[idx]]);
}
}
}
// 统计时间
auto start = cv::getTickCount();
for(int i=0;i<100;i++){
run();
}
auto end = cv::getTickCount();
double time = (end - start) / cv::getTickFrequency() / 100;
printf("infer 100 cost time: %f\n", time);
return 0;
}