Skip to content

Commit

Permalink
Remove CommandQueue redirecting usages straight to HWCQ (#17219)
Browse files Browse the repository at this point in the history
### Ticket
#17208

### Problem description
Over time `CommandQueue` stopped playing any particular function.
Today it is simply proxying calls to HWCQ via creating temporary command
structures and immediately routing their execution with a big switch
statement, calling into hwcq methods.
This adds unreasonable complexity. 

After the sync with @tt-asaigal @dmakoviichuk-tt @cfjchu @omilyutin-tt ,
we agreed to remove this class.

### What's changed
Remove CommandQueue class.
Redirect all usage to HWCQ.
HWCommandQueue is renamed to CommandQueue

Note:
* command_queue.hpp was not removed. It contains definitions of commands
used by hardware_command_queue, I would prefer us to clean this up in
the next PR.

### Checklist
- [x] [Post commit
CI](https://github.com/tenstorrent/tt-metal/actions/runs/13023052350)
- [x] [Blackhole Post
commit](https://github.com/tenstorrent/tt-metal/actions/runs/13023279261)
- [x] [T3k
Frequent](https://github.com/tenstorrent/tt-metal/actions/runs/13021799389)
-
  • Loading branch information
ayerofieiev-tt authored Jan 29, 2025
1 parent 518fd34 commit 58f9654
Show file tree
Hide file tree
Showing 26 changed files with 303 additions and 1,053 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
EnqueueWriteSubBuffer
=====================

.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteSubBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, HostDataType src, const BufferRegion& region, bool blocking)
.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteSubBuffer(CommandQueue& cq, std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> > buffer, std::vector<DType>& src, const BufferRegion& region, bool blocking)
.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteSubBuffer(CommandQueue& cq, const std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> >& buffer, HostDataType src, const BufferRegion& region, bool blocking)
.. doxygenfunction:: tt::tt_metal::v0::EnqueueWriteSubBuffer(CommandQueue& cq, const std::variant<std::reference_wrapper<Buffer>, std::shared_ptr<Buffer> >& buffer, std::vector<DType>& src, const BufferRegion& region, bool blocking)
1 change: 0 additions & 1 deletion tests/tt_metal/tt_metal/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ set(UNIT_TESTS_API_SRC
${CMAKE_CURRENT_SOURCE_DIR}/test_banked.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_bit_utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_buffer_region.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_CommandQueue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_direct.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_dram_to_l1_multicast.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_dram.cpp
Expand Down
152 changes: 0 additions & 152 deletions tests/tt_metal/tt_metal/api/test_CommandQueue.cpp

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -462,52 +462,6 @@ bool test_EnqueueWriteBuffer_and_EnqueueReadBuffer_multi_queue(
namespace basic_tests {
namespace dram_tests {

TEST_F(CommandQueueBufferFixture, DISABLED_TestAsyncBufferRW) {
// Test Async Enqueue Read and Write + Get Addr + Buffer Allocation and Deallocation
auto& command_queue = this->device_->command_queue();
auto current_mode = CommandQueue::default_mode();
command_queue.set_mode(CommandQueue::CommandQueueMode::ASYNC);
Program program;
for (int j = 0; j < 10; j++) {
// Asynchronously initialize a buffer on device
uint32_t first_buf_value = j + 1;
uint32_t second_buf_value = j + 2;
uint32_t first_buf_size = 4096;
uint32_t second_buf_size = 2048;
// Asynchronously allocate buffer on device
std::shared_ptr<Buffer> buffer =
Buffer::create(this->device_, first_buf_size, first_buf_size, BufferType::DRAM);
std::shared_ptr<uint32_t> allocated_buffer_address = std::make_shared<uint32_t>();
EnqueueGetBufferAddr(this->device_->command_queue(), allocated_buffer_address.get(), buffer.get(), true);
// Ensure returned addr is correct
EXPECT_EQ((*allocated_buffer_address), buffer->address());

std::shared_ptr<std::vector<uint32_t>> vec =
std::make_shared<std::vector<uint32_t>>(first_buf_size / 4, first_buf_value);
std::vector<uint32_t> readback_vec = {};
// Write first vector to existing on device buffer.
EnqueueWriteBuffer(this->device_->command_queue(), buffer, vec, false);
// Reallocate the vector in the main thread after asynchronously pushing it (ensure that worker still has access
// to this data)
vec = std::make_shared<std::vector<uint32_t>>(second_buf_size / 4, second_buf_value);
// Simulate what tt-eager does: Share buffer ownership with program
AssignGlobalBufferToProgram(buffer, program);
// Reallocate buffer (this is safe, since the program also owns the existing buffer, which will not be
// deallocated)
buffer = Buffer::create(this->device_, second_buf_size, second_buf_size, BufferType::DRAM);
// Write second vector to second buffer
EnqueueWriteBuffer(this->device_->command_queue(), buffer, vec, false);
// Have main thread give up ownership immediately after writing
vec.reset();
// Read both buffer and ensure data is correct
EnqueueReadBuffer(this->device_->command_queue(), buffer, readback_vec, true);
for (int i = 0; i < readback_vec.size(); i++) {
EXPECT_EQ(readback_vec[i], second_buf_value);
}
}
command_queue.set_mode(current_mode);
}

TEST_F(CommandQueueSingleCardBufferFixture, WriteOneTileToDramBank0) {
TestBufferConfig config = {.num_pages = 1, .page_size = 2048, .buftype = BufferType::DRAM};
for (IDevice* device : devices_) {
Expand Down
14 changes: 0 additions & 14 deletions tests/tt_metal/tt_metal/integration/test_flatten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,17 +299,3 @@ TEST_F(DispatchFixture, TensixFlatten) {
ASSERT_TRUE(test_flatten::flatten(this, this->devices_.at(id), num_tiles_r, num_tiles_c));
}
}

TEST_F(CommandQueueProgramFixture, DISABLED_TensixTestAsyncFlattenStress) {
auto& command_queue = this->device_->command_queue();
auto current_mode = CommandQueue::default_mode();
command_queue.set_mode(CommandQueue::CommandQueueMode::ASYNC);
uint32_t num_tiles_r = 2;
uint32_t num_tiles_c = 2;
if (!this->IsSlowDispatch()) {
num_tiles_r = 1;
num_tiles_c = 1;
}
ASSERT_TRUE(test_flatten::flatten_stress(this->device_, num_tiles_r, num_tiles_c));
command_queue.set_mode(current_mode);
}
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,6 @@ int main(int argc, char** argv) {
try {
int device_id = 0;
tt_metal::IDevice* device = tt_metal::CreateDevice(device_id);

CommandQueue& cq = device->command_queue();

tt_metal::Program program = tt_metal::CreateProgram();

CoreCoord spoof_prefetch_core = {0, 0};
Expand Down
Loading

0 comments on commit 58f9654

Please sign in to comment.