diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md index 4a4990c1..b7443017 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/README.md @@ -11,6 +11,7 @@ In this directory, you will find a C++ example on how to run LLM models on Intel | Qwen2 | [Qwen/Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct), [Qwen/Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct) | | Qwen2.5 | [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct), [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) | | MiniCPM | [openbmb/MiniCPM-1B-sft-bf16](https://huggingface.co/openbmb/MiniCPM-1B-sft-bf16), [openbmb/MiniCPM-2B-sft-bf16](https://huggingface.co/openbmb/MiniCPM-2B-sft-bf16) | +| DeepSeek-R1 | [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B), [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | Please refer to [Quickstart](../../../../../../../docs/mddocs/Quickstart/npu_quickstart.md#c-api) for details about verified platforms. @@ -72,6 +73,12 @@ python convert.py --repo-id-or-model-path openbmb/MiniCPM-1B-sft-bf16 --save-di :: to convert MiniCPM-2B-sft-bf16 python convert.py --repo-id-or-model-path openbmb/MiniCPM-2B-sft-bf16 --save-directory + +:: to convert DeepSeek-R1-Distill-Qwen-1.5B +python convert.py --repo-id-or-model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --save-directory + +:: to convert DeepSeek-R1-Distill-Qwen-7B +python convert.py --repo-id-or-model-path deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --save-directory ``` Arguments info: diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llama-cli-npu.cpp b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llama-cli-npu.cpp index f4254170..9ad0f94c 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llama-cli-npu.cpp +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/CPP_Examples/llama-cli-npu.cpp @@ -34,6 +34,7 @@ const std::string llama2_template = "[INST] <>\n\n<>\n\n%s [/INST] const std::string llama3_template = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n%s<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"; const std::string minicpm_template = "<用户>%s"; const std::string qwen2_template = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n%s<|im_end|>\n<|im_start|>assistant\n"; +const std::string qwen2_deepseek_template = "<|begin▁of▁sentence|><|begin▁of▁sentence|>You are a helpful assistant.<|User|>%s<|Assistant|>"; std::string add_chat_history(npu_model_params model_params, @@ -66,6 +67,20 @@ std::string add_chat_history(npu_model_params model_params, sprintf_s(prompt, res_template.c_str(), chat_history.c_str(), new_prompt.c_str()); } } + } else if (model_params.model_type == std::string("qwen2") && model_params.max_position_embeddings == 131072) { + // For DeepSeek-R1 + if (chat_history == ""){ + sprintf_s(prompt, qwen2_deepseek_template.c_str(), new_prompt.c_str()); + }else{ + if (is_input){ + std::string input_template = "%s%s<|Assistant|>"; + sprintf_s(prompt, input_template.c_str(), chat_history.c_str(), new_prompt.c_str()); + } + else{ + std::string res_template = "%s%s<|User|>"; + sprintf_s(prompt, res_template.c_str(), chat_history.c_str(), new_prompt.c_str()); + } + } } else if (model_params.model_type == std::string("qwen2")) { if (chat_history == ""){ sprintf_s(prompt, qwen2_template.c_str(), new_prompt.c_str());