diff --git a/python/llm/src/bigdl/llm/cli/llm-cli b/python/llm/src/bigdl/llm/cli/llm-cli index 429b6e05..6fd86cae 100755 --- a/python/llm/src/bigdl/llm/cli/llm-cli +++ b/python/llm/src/bigdl/llm/cli/llm-cli @@ -53,6 +53,12 @@ function gptneox { eval "$command" } +function starcoder { + command="$lib_dir/main-starcoder_$avx_flag -t $threads -n $n_predict ${filteredArguments[*]}" + echo "$command" + eval "$command" +} + # Remove model_family/x parameter filteredArguments=() while [[ $# -gt 0 ]]; do @@ -91,6 +97,8 @@ elif [[ "$model_family" == "bloom" ]]; then bloom elif [[ "$model_family" == "gptneox" ]]; then gptneox +elif [[ "$model_family" == "starcoder" ]]; then + starcoder else echo "Invalid model_family: $model_family" display_help diff --git a/python/llm/src/bigdl/llm/cli/llm-cli.ps1 b/python/llm/src/bigdl/llm/cli/llm-cli.ps1 index 51a514ae..cef9ca76 100755 --- a/python/llm/src/bigdl/llm/cli/llm-cli.ps1 +++ b/python/llm/src/bigdl/llm/cli/llm-cli.ps1 @@ -40,6 +40,13 @@ function gptneox Invoke-Expression $command } +function starcoder +{ + $command = "$lib_dir/main-starcoder.exe -t $threads -n $n_predict $filteredArguments" + Write-Host "$command" + Invoke-Expression $command +} + # Remove model_family/x parameter $filteredArguments = @() for ($i = 0; $i -lt $args.Length; $i++) { @@ -79,6 +86,9 @@ switch ($model_family) "gptneox" { gptneox } + "starcoder" { + starcoder + } default { Write-Host "Invalid model_family: $model_family" Display-Help diff --git a/python/llm/src/bigdl/llm/models.py b/python/llm/src/bigdl/llm/models.py index 345a62df..e81444b5 100644 --- a/python/llm/src/bigdl/llm/models.py +++ b/python/llm/src/bigdl/llm/models.py @@ -22,3 +22,4 @@ from bigdl.llm.ggml.model.llama import Llama from bigdl.llm.ggml.model.gptneox import Gptneox from bigdl.llm.ggml.model.bloom import Bloom +from bigdl.llm.ggml.model.starcoder import Starcoder diff --git a/python/llm/src/bigdl/llm/utils/convert_util.py b/python/llm/src/bigdl/llm/utils/convert_util.py index d00eb76a..3faa9f8b 100644 --- a/python/llm/src/bigdl/llm/utils/convert_util.py +++ b/python/llm/src/bigdl/llm/utils/convert_util.py @@ -1427,7 +1427,7 @@ def _convert_starcoder_hf_to_ggml(model_path, outfile_dir, outtype): model = AutoModelForCausalLM.from_pretrained(model_path, config=config, torch_dtype=torch.float16 if outtype == "f16" else torch.float32, - low_cpu_mem_usage=True, + # low_cpu_mem_usage=True, trust_remote_code=True, offload_state_dict=True)