Skip to content

Commit

Permalink
#5174: Add blackhole llk wrappers
Browse files Browse the repository at this point in the history
  • Loading branch information
rtawfik01 committed Apr 11, 2024
1 parent 74eec23 commit 9517c4f
Show file tree
Hide file tree
Showing 115 changed files with 8,363 additions and 0 deletions.
1,214 changes: 1,214 additions & 0 deletions tt_metal/hw/ckernels/blackhole/inc/ckernel_ops.h

Large diffs are not rendered by default.

55 changes: 55 additions & 0 deletions tt_metal/hw/ckernels/blackhole/metal/common/chlkc_list.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "debug/fw_debug.h"
#include "ckernel.h"
#include "ckernel_gpr_map.h"
#include "llk_param_structs.h"

using namespace ckernel;


#ifdef UCK_CHLKC_MATH
#include "chlkc_unpack_data_format.h"
#include "chlkc_math_fidelity.h"
#include "chlkc_math_approx_mode.h"
#include "chlkc_dst_accum_mode.h"
#include "chlkc_math.cpp"
#endif

#ifdef UCK_CHLKC_PACK
#include "chlkc_pack_data_format.h"
#include "chlkc_dst_accum_mode.h"
#include "chlkc_pack.cpp"
#endif

#ifdef UCK_CHLKC_UNPACK
#include "chlkc_unpack_data_format.h"
#include "chlkc_dst_accum_mode.h"
#include "chlkc_unpack.cpp"
#endif



uint run_kernel() {

#ifdef UCK_CHLKC_MATH
zeroacc();
chlkc_math::math_main();
#endif

#ifdef UCK_CHLKC_PACK
chlkc_pack::pack_main();
#endif

#ifdef UCK_CHLKC_UNPACK
zerosrc();
chlkc_unpack::unpack_main();
#endif

return 0;

}
84 changes: 84 additions & 0 deletions tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_math_binary_api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once
#include "llk_math_common_api.h"
#include "llk_math_eltwise_binary.h"

/*************************************************************************
* LLK ELTWISE BINARY
*************************************************************************/

// Version with no operand
template <
EltwiseBinaryType eltwise_binary_type,
BroadcastType src_b_bcast_type,
int NUM_FIDELITY_PHASES = 0,
EltwiseBinaryReuseDestType binary_reuse_dest = EltwiseBinaryReuseDestType::NONE>
inline void llk_math_eltwise_binary_init(const std::uint32_t transpose = 0, const std::uint32_t acc_to_dest = 0) {
const std::uint32_t num_faces = 4;

_llk_math_eltwise_binary_init_<eltwise_binary_type, src_b_bcast_type, NUM_FIDELITY_PHASES, binary_reuse_dest>(
num_faces, transpose, acc_to_dest);
}

// Version with operands
template <
EltwiseBinaryType eltwise_binary_type,
BroadcastType src_b_bcast_type,
int NUM_FIDELITY_PHASES = 0,
EltwiseBinaryReuseDestType binary_reuse_dest = EltwiseBinaryReuseDestType::NONE>
inline void llk_math_eltwise_binary_init_with_operands(
const std::uint32_t operand_A,
const std::uint32_t operand_B,
const std::uint32_t transpose = 0,
const std::uint32_t acc_to_dest = 0) {
const std::uint32_t operand_id =
get_operand_id(operand_A); // operand_id is used to extract tile dim data which is the same for both operands
const std::uint32_t num_faces = get_operand_num_faces(operand_id);

_llk_math_eltwise_binary_init_<eltwise_binary_type, src_b_bcast_type, NUM_FIDELITY_PHASES, binary_reuse_dest>(
num_faces, transpose, acc_to_dest);
}

template <
EltwiseBinaryType eltwise_binary_type,
BroadcastType src_b_bcast_type,
int NUM_FIDELITY_PHASES = 0,
EltwiseBinaryReuseDestType binary_reuse_dest = EltwiseBinaryReuseDestType::NONE,
bool is_fp32_dest_acc_en = false>
inline void llk_math_eltwise_binary(uint dst_index, const bool clear_fp32_dst_acc = true) {
const std::uint32_t num_faces = 4;

_llk_math_eltwise_binary_<
eltwise_binary_type,
src_b_bcast_type,
DstSync::SyncHalf,
NUM_FIDELITY_PHASES,
binary_reuse_dest,
is_fp32_dest_acc_en>(num_faces, dst_index, clear_fp32_dst_acc);
}

template <
EltwiseBinaryType eltwise_binary_type,
BroadcastType src_b_bcast_type,
int NUM_FIDELITY_PHASES = 0,
EltwiseBinaryReuseDestType binary_reuse_dest = EltwiseBinaryReuseDestType::NONE,
bool is_fp32_dest_acc_en = false>
inline void llk_math_eltwise_binary(
const std::uint32_t operand_A,
const std::uint32_t operand_B,
uint dst_index,
const bool clear_fp32_dst_acc = true) {
const std::uint32_t operand_id = get_operand_id(operand_A); // both operands must have same number of faces
const std::uint32_t num_faces = get_operand_num_faces(operand_id);

_llk_math_eltwise_binary_<
eltwise_binary_type,
src_b_bcast_type,
DstSync::SyncHalf,
NUM_FIDELITY_PHASES,
binary_reuse_dest,
is_fp32_dest_acc_en>(num_faces, dst_index, clear_fp32_dst_acc);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once
#include "llk_math_common_api.h"
#include "llk_math_eltwise_binary_sfpu.h"

/*************************************************************************
* LLK ELTWISE BINARY SFPU
*************************************************************************/

template <SfpuType sfpu_op, bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu(
const uint operand,
uint dst_index_a,
uint dst_index_b,
int vector_mode = (int)VectorMode::RC,
uint param0 = 0,
uint param1 = 0,
uint param2 = 0,
uint param3 = 0,
uint param4 = 0,
uint param5 = 0) {
const std::uint32_t operand_id = get_operand_id(0);
const std::uint32_t num_faces = get_operand_num_faces(operand_id);
const std::uint32_t face_r_dim = get_operand_face_r_dim(operand_id);

_llk_math_eltwise_binary_sfpu_<sfpu_op, APPROXIMATE, DstSync::SyncHalf>(
face_r_dim, num_faces, dst_index_a, dst_index_b, vector_mode, param0, param1, param2, param3, param4, param5);
}

template <SfpuType sfpu_op, bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_init(
uint param0 = 0, uint param1 = 0, uint param2 = 0, uint param3 = 0, uint param4 = 0, uint param5 = 0) {
_llk_math_eltwise_binary_sfpu_init_<sfpu_op, APPROXIMATE>(param0, param1, param2, param3, param4, param5);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_quant_int32(
uint dst_index_a, uint dst_index_b, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_binary_sfpu<SfpuType::quant_int32, APPROXIMATE, DstSync::SyncHalf>(dst_index_a, dst_index_b, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_quant_int32_init(const uint zero_point) {
llk_math_eltwise_binary_sfpu_init<SfpuType::quant_int32, APPROXIMATE>(zero_point);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_requant_int32(
uint dst_index_a, uint dst_index_b, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_binary_sfpu<SfpuType::requant_int32, APPROXIMATE, DstSync::SyncHalf>(dst_index_a, dst_index_b, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_requant_int32_init(const uint zero_point) {
llk_math_eltwise_binary_sfpu_init<SfpuType::requant_int32, APPROXIMATE>(zero_point);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_dequant_int32(
uint dst_index_a, uint dst_index_b, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_binary_sfpu<SfpuType::dequant_int32, APPROXIMATE, DstSync::SyncHalf>(dst_index_a, dst_index_b, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_dequant_int32_init(const uint zero_point) {
llk_math_eltwise_binary_sfpu_init<SfpuType::dequant_int32, APPROXIMATE>(zero_point);
}
107 changes: 107 additions & 0 deletions tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_math_common_api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once
#include "ckernel.h"
#include "ckernel_defs.h"
#include "ckernel_globals.h"
#include "ckernel_template.h"
#include "cmath_common.h"
#include "llk_defs.h"
#include "llk_io.h"
#include "llk_math_common.h"
#include "llk_operands.h"
#include "llk_param_structs.h"

// Need to revisit why we even need this
#define EPS 1.19209e-07 // std::numeric_limits::epsilon() for FP32

/*************************************************************************
* LLK MATH COMMON
*************************************************************************/

inline void llk_math_wait_for_dest_available() {
_llk_math_wait_for_dest_available_<DstSync::SyncHalf>();
}

template <bool is_fp32_dest_acc_en = false>
inline void llk_math_dest_section_done() {
_llk_math_dest_section_done_<DstSync::SyncHalf, is_fp32_dest_acc_en>();
}

template <bool is_fp32_dest_acc_en = false>
inline void llk_math_pack_sync_init() {
_llk_math_pack_sync_init_<DstSync::SyncHalf, is_fp32_dest_acc_en>();
}

template <bool mail2math = true, bool mail2pack = true>
inline void llk_math_get_tile(std::uint32_t operand, std::uint32_t tile_index, std::uint32_t *p_tile) {
_llk_math_get_tile_<mail2math, mail2pack>(tile_index, p_tile);
}

template <bool mail2math = true, bool mail2pack = true>
inline void llk_math_release_tile(std::uint32_t operand) {
_llk_math_release_tile_<mail2math, mail2pack>();
}

inline void llk_math_debug_dump(std::uint8_t *data, std::uint32_t byte_size) { _llk_math_debug_dump_(data, byte_size); }

inline void llk_math_debug_dump_seek(std::uint8_t offset) { _llk_math_debug_dump_seek_(offset); }

inline void llk_math_reconfig_data_format_srca(const std::uint32_t srca_new_operand) {
std::uint32_t new_srca_operand_id = get_operand_id(srca_new_operand);
_llk_math_reconfig_data_format_srca_(unpack_dst_format[new_srca_operand_id]);
}

inline void llk_math_reconfig_data_format_srcb(const std::uint32_t srcb_new_operand) {
std::uint32_t new_srcb_operand_id = get_operand_id(srcb_new_operand);
_llk_math_reconfig_data_format_srcb_(unpack_dst_format[new_srcb_operand_id]);
}

inline void llk_math_reconfig_data_format(const std::uint32_t srca_new_operand, const std::uint32_t srcb_new_operand) {
std::uint32_t new_srca_operand_id = get_operand_id(srca_new_operand);
std::uint32_t new_srcb_operand_id = get_operand_id(srcb_new_operand);

_llk_math_reconfig_data_format_(unpack_dst_format[new_srca_operand_id], unpack_dst_format[new_srcb_operand_id]);
}

inline void llk_math_reconfig_data_format(
const std::uint32_t srca_old_operand,
const std::uint32_t srca_new_operand,
const std::uint32_t srcb_old_operand,
const std::uint32_t srcb_new_operand) {
std::uint32_t old_srca_operand_id = get_operand_id(srca_old_operand);
std::uint32_t new_srca_operand_id = get_operand_id(srca_new_operand);
std::uint32_t old_srcb_operand_id = get_operand_id(srcb_old_operand);
std::uint32_t new_srcb_operand_id = get_operand_id(srcb_new_operand);

if ((unpack_dst_format[old_srca_operand_id] != unpack_dst_format[new_srca_operand_id]) &&
(unpack_dst_format[old_srcb_operand_id] != unpack_dst_format[new_srcb_operand_id])) {
llk_math_reconfig_data_format(srca_new_operand, srcb_new_operand);
} else if ((unpack_dst_format[old_srca_operand_id] != unpack_dst_format[new_srca_operand_id])) {
llk_math_reconfig_data_format_srca(srca_new_operand);
} else if ((unpack_dst_format[old_srcb_operand_id] != unpack_dst_format[new_srcb_operand_id])) {
llk_math_reconfig_data_format_srcb(srcb_new_operand);
}
}

inline void llk_math_reconfig_data_format_srca(
const std::uint32_t srca_old_operand, const std::uint32_t srca_new_operand) {
std::uint32_t old_srca_operand_id = get_operand_id(srca_old_operand);
std::uint32_t new_srca_operand_id = get_operand_id(srca_new_operand);

if ((unpack_dst_format[old_srca_operand_id] != unpack_dst_format[new_srca_operand_id])) {
llk_math_reconfig_data_format_srca(srca_new_operand);
}
}

inline void llk_math_reconfig_data_format_srcb(
const std::uint32_t srcb_old_operand, const std::uint32_t srcb_new_operand) {
std::uint32_t old_srcb_operand_id = get_operand_id(srcb_old_operand);
std::uint32_t new_srcb_operand_id = get_operand_id(srcb_new_operand);

if ((unpack_dst_format[old_srcb_operand_id] != unpack_dst_format[new_srcb_operand_id])) {
llk_math_reconfig_data_format_srcb(srcb_new_operand);
}
}
52 changes: 52 additions & 0 deletions tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_math_matmul_api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once
#include "llk_math_common_api.h"
#include "llk_math_matmul.h"

/*************************************************************************
* LLK MATMUL
*************************************************************************/

template <int NUM_FIDELITY_PHASES>
inline void llk_math_matmul_init(
const std::uint32_t operandA,
const std::uint32_t operandB,
const std::uint32_t transpose = 0,
const std::uint32_t ct_dim = 1,
const std::uint32_t rt_dim = 1,
const std::uint32_t kt_dim = 1) {
const std::uint32_t in0_id = get_operand_id(operandA);
const std::uint32_t in1_id = get_operand_id(operandB);

const bool partial_face = get_operand_partial_face(in0_id);

const std::uint32_t in0_tile_r_dim = get_operand_tile_r_dim(in0_id);
const std::uint32_t in0_tile_c_dim = get_operand_tile_c_dim(in0_id);
const std::uint32_t in1_tile_r_dim = get_operand_tile_r_dim(in1_id);
const std::uint32_t in1_tile_c_dim = get_operand_tile_c_dim(in1_id);

_llk_math_matmul_init_<NUM_FIDELITY_PHASES, DstTileFaceLayout::RowMajor>(
in0_tile_r_dim,
in0_tile_c_dim,
in1_tile_r_dim,
in1_tile_c_dim,
partial_face,
transpose,
ct_dim,
rt_dim,
kt_dim);
}

template <int NUM_FIDELITY_PHASES>
inline void llk_math_matmul(
const uint dst_index,
const bool transpose = false,
const std::uint32_t ct_dim = 1,
const std::uint32_t rt_dim = 1,
const std::uint32_t kt_dim = 1) {

_llk_math_matmul_<NUM_FIDELITY_PHASES, DstTileFaceLayout::RowMajor>(dst_index, transpose, ct_dim, rt_dim, kt_dim);
}
Loading

0 comments on commit 9517c4f

Please sign in to comment.