diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/Pipeline-Models/llama.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/Pipeline-Models/llama.py
index 469081c3..08bbb55e 100644
--- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/Pipeline-Models/llama.py
+++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/Pipeline-Models/llama.py
@@ -60,6 +60,7 @@ if __name__ == "__main__":
                                                  optimize_model=True,
                                                  pipeline=True,
                                                  max_output_len=args.max_output_len,
+                                                 torch_dtype=torch.float16,
                                                  attn_implementation="eager")
 
     tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)