From d9a12782a58cc33a51cc046f22e2af43fd119d8c Mon Sep 17 00:00:00 2001
From: Nikola Vukobrat <nvukobrat@tenstorrent.com>
Date: Wed, 14 Aug 2024 14:58:54 +0000
Subject: [PATCH 1/3] Ignore python venv from Git trace

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 82131f71a..ca6929ebd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ tt_debug
 build
 net2pipe_output/
 third_party/llvm
+venv/
 
 /llk_out/
 

From 8809689fdefe46da101a09e4732b28806249372e Mon Sep 17 00:00:00 2001
From: Nikola Vukobrat <nvukobrat@tenstorrent.com>
Date: Wed, 14 Aug 2024 15:00:48 +0000
Subject: [PATCH 2/3] [Emit] Support lowering of Int8 MLIR data types

---
 pybuda/csrc/passes/lower_to_mlir.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pybuda/csrc/passes/lower_to_mlir.cpp b/pybuda/csrc/passes/lower_to_mlir.cpp
index 8bf6a4266..09f36b0e8 100644
--- a/pybuda/csrc/passes/lower_to_mlir.cpp
+++ b/pybuda/csrc/passes/lower_to_mlir.cpp
@@ -344,7 +344,7 @@ class MLIRGenerator
             return builder_.create<mlir::tensor::EmptyOp>(
                 get_tt_forge_operation_location(graph, node),
                 shape_vec,
-                get_float_type(node));
+                get_data_type(node));
         }
 
         /// Emit the return operation for the function.
@@ -364,8 +364,8 @@ class MLIRGenerator
                 mlir::ValueRange(returnValues));
         }
 
-        /// Get the MLIR float type type for a TTForge node.
-        mlir::FloatType get_float_type(graphlib::Node *node)
+        /// Get the MLIR data type for a TTForge node.
+        mlir::Type get_data_type(graphlib::Node *node)
         {
             switch (node->output_df())
             {
@@ -375,7 +375,10 @@ class MLIRGenerator
                     return builder_.getBF16Type();
                 case tt::DataFormat::Float16:
                     return builder_.getF16Type();
+                case tt::DataFormat::Int8:
+                    return builder_.getI8Type();
                 default:
+                    log_error("Unsupported data format during lowering from TTForge to TTIR: {}", node->output_df());
                     TT_ASSERT(false);
             }
             // TODO add all supported types in switch
@@ -390,7 +393,7 @@ class MLIRGenerator
             {
                 shape_vec.push_back((int64_t)dim);
             }
-            return mlir::RankedTensorType::get(shape_vec, get_float_type(node));
+            return mlir::RankedTensorType::get(shape_vec, get_data_type(node));
         }
 
         /// Get the location for a module.

From 8316409a3d22fcc86f44d7937f2d02bdc72f8475 Mon Sep 17 00:00:00 2001
From: Nikola Vukobrat <nvukobrat@tenstorrent.com>
Date: Wed, 14 Aug 2024 15:01:18 +0000
Subject: [PATCH 3/3] [Model] Placehoder for Llama 3B model bringup

---
 .../test/mlir/llama/test_llama_inference.py   | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 pybuda/test/mlir/llama/test_llama_inference.py

diff --git a/pybuda/test/mlir/llama/test_llama_inference.py b/pybuda/test/mlir/llama/test_llama_inference.py
new file mode 100644
index 000000000..5db01a63b
--- /dev/null
+++ b/pybuda/test/mlir/llama/test_llama_inference.py
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
+
+# SPDX-License-Identifier: Apache-2.0
+
+from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer
+
+import pybuda
+
+
+def test_llama_inference():
+    # Compiler configurations
+    compiler_cfg = pybuda.config._get_global_compiler_config()
+    compiler_cfg.enable_tvm_cpu_fallback = False
+
+    # Load Llama 3B model and tokenizer
+    model_path = "openlm-research/open_llama_3b"
+    config = LlamaConfig()
+    config.hidden_size = 3200
+    config.intermediate_size = 8640
+    config.num_hidden_layers = 26
+    config.pad_token_id = 0
+    config.return_dict = False
+    framework_model = LlamaForCausalLM.from_pretrained(
+        model_path, device_map="auto", config=config
+    )
+    framework_model.eval()
+    tokenizer = LlamaTokenizer.from_pretrained(model_path)
+
+    prompt = "Q: What is the largest animal?\nA:"
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+
+    # Sanity run
+    generation_output = framework_model.generate(input_ids=input_ids, max_new_tokens=32)
+    print(tokenizer.decode(generation_output[0]))
+
+    # Compile the model
+    compiled_model = pybuda.compile(framework_model, input_ids)
\ No newline at end of file