From 8316409a3d22fcc86f44d7937f2d02bdc72f8475 Mon Sep 17 00:00:00 2001 From: Nikola Vukobrat Date: Wed, 14 Aug 2024 15:01:18 +0000 Subject: [PATCH] [Model] Placehoder for Llama 3B model bringup --- .../test/mlir/llama/test_llama_inference.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 pybuda/test/mlir/llama/test_llama_inference.py diff --git a/pybuda/test/mlir/llama/test_llama_inference.py b/pybuda/test/mlir/llama/test_llama_inference.py new file mode 100644 index 000000000..5db01a63b --- /dev/null +++ b/pybuda/test/mlir/llama/test_llama_inference.py @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +# SPDX-License-Identifier: Apache-2.0 + +from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer + +import pybuda + + +def test_llama_inference(): + # Compiler configurations + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.enable_tvm_cpu_fallback = False + + # Load Llama 3B model and tokenizer + model_path = "openlm-research/open_llama_3b" + config = LlamaConfig() + config.hidden_size = 3200 + config.intermediate_size = 8640 + config.num_hidden_layers = 26 + config.pad_token_id = 0 + config.return_dict = False + framework_model = LlamaForCausalLM.from_pretrained( + model_path, device_map="auto", config=config + ) + framework_model.eval() + tokenizer = LlamaTokenizer.from_pretrained(model_path) + + prompt = "Q: What is the largest animal?\nA:" + input_ids = tokenizer(prompt, return_tensors="pt").input_ids + + # Sanity run + generation_output = framework_model.generate(input_ids=input_ids, max_new_tokens=32) + print(tokenizer.decode(generation_output[0])) + + # Compile the model + compiled_model = pybuda.compile(framework_model, input_ids) \ No newline at end of file