From d9a12782a58cc33a51cc046f22e2af43fd119d8c Mon Sep 17 00:00:00 2001 From: Nikola Vukobrat Date: Wed, 14 Aug 2024 14:58:54 +0000 Subject: [PATCH 1/3] Ignore python venv from Git trace --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 82131f71a..ca6929ebd 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ tt_debug build net2pipe_output/ third_party/llvm +venv/ /llk_out/ From 8809689fdefe46da101a09e4732b28806249372e Mon Sep 17 00:00:00 2001 From: Nikola Vukobrat Date: Wed, 14 Aug 2024 15:00:48 +0000 Subject: [PATCH 2/3] [Emit] Support lowering of Int8 MLIR data types --- pybuda/csrc/passes/lower_to_mlir.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pybuda/csrc/passes/lower_to_mlir.cpp b/pybuda/csrc/passes/lower_to_mlir.cpp index 8bf6a4266..09f36b0e8 100644 --- a/pybuda/csrc/passes/lower_to_mlir.cpp +++ b/pybuda/csrc/passes/lower_to_mlir.cpp @@ -344,7 +344,7 @@ class MLIRGenerator return builder_.create( get_tt_forge_operation_location(graph, node), shape_vec, - get_float_type(node)); + get_data_type(node)); } /// Emit the return operation for the function. @@ -364,8 +364,8 @@ class MLIRGenerator mlir::ValueRange(returnValues)); } - /// Get the MLIR float type type for a TTForge node. - mlir::FloatType get_float_type(graphlib::Node *node) + /// Get the MLIR data type for a TTForge node. + mlir::Type get_data_type(graphlib::Node *node) { switch (node->output_df()) { @@ -375,7 +375,10 @@ class MLIRGenerator return builder_.getBF16Type(); case tt::DataFormat::Float16: return builder_.getF16Type(); + case tt::DataFormat::Int8: + return builder_.getI8Type(); default: + log_error("Unsupported data format during lowering from TTForge to TTIR: {}", node->output_df()); TT_ASSERT(false); } // TODO add all supported types in switch @@ -390,7 +393,7 @@ class MLIRGenerator { shape_vec.push_back((int64_t)dim); } - return mlir::RankedTensorType::get(shape_vec, get_float_type(node)); + return mlir::RankedTensorType::get(shape_vec, get_data_type(node)); } /// Get the location for a module. From 8316409a3d22fcc86f44d7937f2d02bdc72f8475 Mon Sep 17 00:00:00 2001 From: Nikola Vukobrat Date: Wed, 14 Aug 2024 15:01:18 +0000 Subject: [PATCH 3/3] [Model] Placehoder for Llama 3B model bringup --- .../test/mlir/llama/test_llama_inference.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 pybuda/test/mlir/llama/test_llama_inference.py diff --git a/pybuda/test/mlir/llama/test_llama_inference.py b/pybuda/test/mlir/llama/test_llama_inference.py new file mode 100644 index 000000000..5db01a63b --- /dev/null +++ b/pybuda/test/mlir/llama/test_llama_inference.py @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +# SPDX-License-Identifier: Apache-2.0 + +from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer + +import pybuda + + +def test_llama_inference(): + # Compiler configurations + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.enable_tvm_cpu_fallback = False + + # Load Llama 3B model and tokenizer + model_path = "openlm-research/open_llama_3b" + config = LlamaConfig() + config.hidden_size = 3200 + config.intermediate_size = 8640 + config.num_hidden_layers = 26 + config.pad_token_id = 0 + config.return_dict = False + framework_model = LlamaForCausalLM.from_pretrained( + model_path, device_map="auto", config=config + ) + framework_model.eval() + tokenizer = LlamaTokenizer.from_pretrained(model_path) + + prompt = "Q: What is the largest animal?\nA:" + input_ids = tokenizer(prompt, return_tensors="pt").input_ids + + # Sanity run + generation_output = framework_model.generate(input_ids=input_ids, max_new_tokens=32) + print(tokenizer.decode(generation_output[0])) + + # Compile the model + compiled_model = pybuda.compile(framework_model, input_ids) \ No newline at end of file