forked from tenstorrent/tt-metal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloopback.cpp
126 lines (100 loc) · 3.95 KB
/
loopback.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0
#include "tt_metal/host_api.hpp"
#include "tt_metal/impl/device/device.hpp"
#include "common/bfloat16.hpp"
/*
* 1. Host writes data to buffer in DRAM
* 2. dram_copy kernel on logical core {0, 0} BRISC copies data from buffer
* in step 1. to buffer in L1 and back to another buffer in DRAM
* 3. Host reads from buffer written to in step 2.
*/
using namespace tt::tt_metal;
int main(int argc, char **argv) {
if (getenv("TT_METAL_SLOW_DISPATCH_MODE") != nullptr) {
TT_THROW("Test not supported w/ slow dispatch, exiting");
}
bool pass = true;
try {
/*
* Silicon accelerator setup
*/
constexpr int device_id = 0;
Device *device =
CreateDevice(device_id);
/*
* Setup program and command queue to execute along with its buffers and kernels to use
*/
CommandQueue& cq = device->command_queue();
Program program = CreateProgram();
constexpr CoreCoord core = {0, 0};
KernelHandle dram_copy_kernel_id = CreateKernel(
program,
"tt_metal/programming_examples/loopback/kernels/loopback_dram_copy.cpp",
core,
DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default}
);
constexpr uint32_t single_tile_size = 2 * (32 * 32);
constexpr uint32_t num_tiles = 50;
constexpr uint32_t dram_buffer_size = single_tile_size * num_tiles;
tt::tt_metal::InterleavedBufferConfig dram_config{
.device= device,
.size = dram_buffer_size,
.page_size = dram_buffer_size,
.buffer_type = tt::tt_metal::BufferType::DRAM
};
tt::tt_metal::InterleavedBufferConfig l1_config{
.device= device,
.size = dram_buffer_size,
.page_size = dram_buffer_size,
.buffer_type = tt::tt_metal::BufferType::L1
};
auto l1_buffer = CreateBuffer(l1_config);
auto input_dram_buffer = CreateBuffer(dram_config);
const uint32_t input_dram_buffer_addr = input_dram_buffer->address();
auto output_dram_buffer = CreateBuffer(dram_config);
const uint32_t output_dram_buffer_addr = output_dram_buffer->address();
/*
* Create input data and runtime arguments, then execute
*/
std::vector<uint32_t> input_vec = create_random_vector_of_bfloat16(
dram_buffer_size, 100, std::chrono::system_clock::now().time_since_epoch().count());
EnqueueWriteBuffer(cq, input_dram_buffer, input_vec, false);
const std::vector<uint32_t> runtime_args = {
l1_buffer->address(),
input_dram_buffer->address(),
static_cast<uint32_t>(input_dram_buffer->noc_coordinates().x),
static_cast<uint32_t>(input_dram_buffer->noc_coordinates().y),
output_dram_buffer->address(),
static_cast<uint32_t>(output_dram_buffer->noc_coordinates().x),
static_cast<uint32_t>(output_dram_buffer->noc_coordinates().y),
l1_buffer->size()
};
SetRuntimeArgs(
program,
dram_copy_kernel_id,
core,
runtime_args
);
EnqueueProgram(cq, program, false);
Finish(cq);
/*
* Validation & Teardown
*/
std::vector<uint32_t> result_vec;
EnqueueReadBuffer(cq, output_dram_buffer, result_vec, true);
pass &= input_vec == result_vec;
pass &= CloseDevice(device);
} catch (const std::exception &e) {
tt::log_error(tt::LogTest, "Test failed with exception!");
tt::log_error(tt::LogTest, "{}", e.what());
throw;
}
if (pass) {
tt::log_info(tt::LogTest, "Test Passed");
} else {
TT_THROW("Test Failed");
}
return 0;
}