From 8316409a3d22fcc86f44d7937f2d02bdc72f8475 Mon Sep 17 00:00:00 2001
From: Nikola Vukobrat <nvukobrat@tenstorrent.com>
Date: Wed, 14 Aug 2024 15:01:18 +0000
Subject: [PATCH] [Model] Placehoder for Llama 3B model bringup

---
 .../test/mlir/llama/test_llama_inference.py   | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 pybuda/test/mlir/llama/test_llama_inference.py

diff --git a/pybuda/test/mlir/llama/test_llama_inference.py b/pybuda/test/mlir/llama/test_llama_inference.py
new file mode 100644
index 000000000..5db01a63b
--- /dev/null
+++ b/pybuda/test/mlir/llama/test_llama_inference.py
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
+
+# SPDX-License-Identifier: Apache-2.0
+
+from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer
+
+import pybuda
+
+
+def test_llama_inference():
+    # Compiler configurations
+    compiler_cfg = pybuda.config._get_global_compiler_config()
+    compiler_cfg.enable_tvm_cpu_fallback = False
+
+    # Load Llama 3B model and tokenizer
+    model_path = "openlm-research/open_llama_3b"
+    config = LlamaConfig()
+    config.hidden_size = 3200
+    config.intermediate_size = 8640
+    config.num_hidden_layers = 26
+    config.pad_token_id = 0
+    config.return_dict = False
+    framework_model = LlamaForCausalLM.from_pretrained(
+        model_path, device_map="auto", config=config
+    )
+    framework_model.eval()
+    tokenizer = LlamaTokenizer.from_pretrained(model_path)
+
+    prompt = "Q: What is the largest animal?\nA:"
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+
+    # Sanity run
+    generation_output = framework_model.generate(input_ids=input_ids, max_new_tokens=32)
+    print(tokenizer.decode(generation_output[0]))
+
+    # Compile the model
+    compiled_model = pybuda.compile(framework_model, input_ids)
\ No newline at end of file