From 2f0dd607bd086dc95364eac1bda4e7178477ae51 Mon Sep 17 00:00:00 2001 From: Anil Mahmud Date: Thu, 7 Nov 2024 14:57:06 -0500 Subject: [PATCH] #14694: Stall unpacker till MMIO write to unpacker address completes --- llk_lib/llk_unpack_A.h | 9 ++++++--- llk_lib/llk_unpack_AB.h | 3 +++ llk_lib/llk_unpack_reduce.h | 9 ++++++--- llk_lib/llk_unpack_tilize.h | 9 ++++++--- llk_lib/llk_unpack_untilize.h | 9 ++++++--- 5 files changed, 27 insertions(+), 12 deletions(-) diff --git a/llk_lib/llk_unpack_A.h b/llk_lib/llk_unpack_A.h index a7dd132..481d19b 100644 --- a/llk_lib/llk_unpack_A.h +++ b/llk_lib/llk_unpack_A.h @@ -174,9 +174,6 @@ inline void _llk_unpack_A_(const std::uint32_t address, const bool transpose_of_ // Wait for free context wait_for_next_context(2); - // Trisc::SEMPOST for context acquire - semaphore_post(semaphore::UNPACK_SYNC); - // Get tile address if (0 == unp_cfg_context) { if constexpr ((BType == BroadcastType::NONE) && (!acc_to_dest)) { @@ -205,6 +202,12 @@ inline void _llk_unpack_A_(const std::uint32_t address, const bool transpose_of_ } } + // Trisc::SEMPOST for context acquire + semaphore_post(semaphore::UNPACK_SYNC); + + // Stall unpacker until pending CFG writes from Trisc have completed + TTI_STALLWAIT(p_stall::STALL_UNPACK, p_stall::TRISC_CFG); + // Run MOP ckernel::ckernel_template::run(instrn_buffer); diff --git a/llk_lib/llk_unpack_AB.h b/llk_lib/llk_unpack_AB.h index 4c8ab17..393e307 100644 --- a/llk_lib/llk_unpack_AB.h +++ b/llk_lib/llk_unpack_AB.h @@ -124,6 +124,9 @@ inline void _llk_unpack_AB_( // Trisc::SEMPOST for context acquire semaphore_post(semaphore::UNPACK_SYNC); + // Stall unpacker until pending CFG writes from Trisc have completed + TTI_STALLWAIT(p_stall::STALL_UNPACK, p_stall::TRISC_CFG); + // Run MOP ckernel::ckernel_template::run(instrn_buffer); diff --git a/llk_lib/llk_unpack_reduce.h b/llk_lib/llk_unpack_reduce.h index 8484527..8525583 100644 --- a/llk_lib/llk_unpack_reduce.h +++ b/llk_lib/llk_unpack_reduce.h @@ -79,9 +79,6 @@ inline void _llk_unpack_reduce_(const std::uint32_t address) { // Wait for free context wait_for_next_context(2); - // Trisc::SEMPOST for context acquire - semaphore_post(semaphore::UNPACK_SYNC); - // Get tile address if (0 == unp_cfg_context) { cfg[THCON_SEC0_REG3_Base_address_ADDR32] = address; @@ -89,6 +86,12 @@ inline void _llk_unpack_reduce_(const std::uint32_t address) { cfg[THCON_SEC0_REG3_Base_cntx1_address_ADDR32] = address; } + // Trisc::SEMPOST for context acquire + semaphore_post(semaphore::UNPACK_SYNC); + + // Stall unpacker until pending CFG writes from Trisc have completed + TTI_STALLWAIT(p_stall::STALL_UNPACK, p_stall::TRISC_CFG); + // Run MOP ckernel::ckernel_template::run(instrn_buffer); diff --git a/llk_lib/llk_unpack_tilize.h b/llk_lib/llk_unpack_tilize.h index b9462ef..3a9b94d 100644 --- a/llk_lib/llk_unpack_tilize.h +++ b/llk_lib/llk_unpack_tilize.h @@ -100,9 +100,6 @@ inline void _llk_unpack_tilize_(const std::uint32_t base_address, const std::uin // Wait for free context wait_for_next_context(2); - // Trisc::SEMPOST for context acquire - semaphore_post(semaphore::UNPACK_SYNC); - // Get tile address if (0 == unp_cfg_context) { cfg[THCON_SEC0_REG3_Base_address_ADDR32] = address; @@ -110,6 +107,12 @@ inline void _llk_unpack_tilize_(const std::uint32_t base_address, const std::uin cfg[THCON_SEC0_REG3_Base_cntx1_address_ADDR32] = address; } + // Trisc::SEMPOST for context acquire + semaphore_post(semaphore::UNPACK_SYNC); + + // Stall unpacker until pending CFG writes from Trisc have completed + TTI_STALLWAIT(p_stall::STALL_UNPACK, p_stall::TRISC_CFG); + // Run MOP ckernel::ckernel_template::run(instrn_buffer); diff --git a/llk_lib/llk_unpack_untilize.h b/llk_lib/llk_unpack_untilize.h index bd6e739..3ed1fe3 100644 --- a/llk_lib/llk_unpack_untilize.h +++ b/llk_lib/llk_unpack_untilize.h @@ -105,9 +105,6 @@ inline void _llk_unpack_untilize_pass_(const std::uint32_t base_address, const s // Wait for free context wait_for_next_context(2); - // Trisc::SEMPOST for context acquire - semaphore_post(semaphore::UNPACK_SYNC); - // Get tile address if (0 == unp_cfg_context) { cfg[THCON_SEC0_REG3_Base_address_ADDR32] = base_address; @@ -115,6 +112,12 @@ inline void _llk_unpack_untilize_pass_(const std::uint32_t base_address, const s cfg[THCON_SEC0_REG3_Base_cntx1_address_ADDR32] = base_address; } + // Trisc::SEMPOST for context acquire + semaphore_post(semaphore::UNPACK_SYNC); + + // Stall unpacker until pending CFG writes from Trisc have completed + TTI_STALLWAIT(p_stall::STALL_UNPACK, p_stall::TRISC_CFG); + std::uint32_t face_2xr_cnt = 0; for (std::uint32_t r = 0; r < FACE_HEIGHT; r++) { rem_blocks_in_row = block_tile_cols; // reset remaining blocks in row