update quickstart md related to llama.cpp/ollama (#13265)
* update quickstart md related to llama.cpp/ollama * update troubleshooting * update quickstart/troubleshooting according to RuonanWang's comments
This commit is contained in:
		
							parent
							
								
									68c5103a0a
								
							
						
					
					
						commit
						951c23739d
					
				
					 7 changed files with 19 additions and 28 deletions
				
			
		| 
						 | 
					@ -50,7 +50,6 @@ To use GPU acceleration, several environment variables are required or recommend
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  ```bash
 | 
					  ```bash
 | 
				
			||||||
  source /opt/intel/oneapi/setvars.sh
 | 
					  source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
  export SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
					  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
				
			||||||
| 
						 | 
					@ -62,7 +61,6 @@ To use GPU acceleration, several environment variables are required or recommend
 | 
				
			||||||
  Please run the following command in Miniforge Prompt.
 | 
					  Please run the following command in Miniforge Prompt.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ```cmd
 | 
					  ```cmd
 | 
				
			||||||
  set SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
  ```
 | 
					  ```
 | 
				
			||||||
| 
						 | 
					@ -135,7 +133,6 @@ Launch the Ollama service:
 | 
				
			||||||
  export ZES_ENABLE_SYSMAN=1
 | 
					  export ZES_ENABLE_SYSMAN=1
 | 
				
			||||||
  export OLLAMA_NUM_GPU=999
 | 
					  export OLLAMA_NUM_GPU=999
 | 
				
			||||||
  source /opt/intel/oneapi/setvars.sh
 | 
					  source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
  export SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
					  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
				
			||||||
| 
						 | 
					@ -152,7 +149,6 @@ Launch the Ollama service:
 | 
				
			||||||
  set no_proxy=localhost,127.0.0.1
 | 
					  set no_proxy=localhost,127.0.0.1
 | 
				
			||||||
  set ZES_ENABLE_SYSMAN=1
 | 
					  set ZES_ENABLE_SYSMAN=1
 | 
				
			||||||
  set OLLAMA_NUM_GPU=999
 | 
					  set OLLAMA_NUM_GPU=999
 | 
				
			||||||
  set SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -123,7 +123,6 @@ To use GPU acceleration, several environment variables are required or recommend
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  ```bash
 | 
					  ```bash
 | 
				
			||||||
  source /opt/intel/oneapi/setvars.sh
 | 
					  source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
  export SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
					  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
				
			||||||
| 
						 | 
					@ -135,7 +134,6 @@ To use GPU acceleration, several environment variables are required or recommend
 | 
				
			||||||
  Please run the following command in Miniforge Prompt.
 | 
					  Please run the following command in Miniforge Prompt.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ```cmd
 | 
					  ```cmd
 | 
				
			||||||
  set SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
  ```
 | 
					  ```
 | 
				
			||||||
| 
						 | 
					@ -384,3 +382,7 @@ If you meet this error, please check your Linux kernel version first. You may en
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#### 16. `backend buffer base cannot be NULL` error
 | 
					#### 16. `backend buffer base cannot be NULL` error
 | 
				
			||||||
If you meet `ggml-backend.c:96: GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL") failed`, simply adding `-c xx` parameter during inference, for example `-c 1024` would resolve this problem.
 | 
					If you meet `ggml-backend.c:96: GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL") failed`, simply adding `-c xx` parameter during inference, for example `-c 1024` would resolve this problem.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 17. `The program was built for 1 devices` error
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you meet error looks like `The program was built for 1 devices. Build program log for 'Intel(R) Arc(TM) A770 Graphics`:, this may be caused by the command `set/export SYCL_CACHE_PERSISTENT=1`. Please try `unset SYCL_CACHE_PERSISTENT` in the terminal. If the variable has been written into a configuration file such as `~/.bashrc`, you need to manually delete or comment out the conrresponding line.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -124,7 +124,6 @@ cd llama-cpp
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  ```bash
 | 
					  ```bash
 | 
				
			||||||
  source /opt/intel/oneapi/setvars.sh
 | 
					  source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
  export SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
					  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
				
			||||||
| 
						 | 
					@ -136,7 +135,6 @@ cd llama-cpp
 | 
				
			||||||
  请在 Miniforge Prompt 中运行下列命令。
 | 
					  请在 Miniforge Prompt 中运行下列命令。
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ```cmd
 | 
					  ```cmd
 | 
				
			||||||
  set SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
  ```
 | 
					  ```
 | 
				
			||||||
| 
						 | 
					@ -385,3 +383,8 @@ llama_perf_context_print:       total time =      xx.xx ms /    62 tokens
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#### 16. `backend buffer base cannot be NULL` 错误
 | 
					#### 16. `backend buffer base cannot be NULL` 错误
 | 
				
			||||||
如果你遇到`ggml-backend.c:96: GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL") failed`错误,在推理时传入参数`-c xx`,如`-c 1024`即可解决。
 | 
					如果你遇到`ggml-backend.c:96: GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL") failed`错误,在推理时传入参数`-c xx`,如`-c 1024`即可解决。
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 17.  `The program was built for 1 devices` 错误
 | 
				
			||||||
 | 
					如果遇到错误`The program was built for 1 devices]. Build program log for 'Intel(R) Arc(TM) A770 Graphics':`, 这是因为设置了`SYCL_CACHE_PERSISTENT=1`。请按照以下命令操作:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					在命令行中输入`unset SYCL_CACHE_PERSISTENT`;如果写入了配置文件,比如`~.bashrc`等,需要手动删去或注释掉对应的行。
 | 
				
			||||||
| 
						 | 
					@ -49,10 +49,6 @@ Then, extract the zip file to a folder.
 | 
				
			||||||
### Step 2: Runtime Configuration
 | 
					### Step 2: Runtime Configuration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Open "Command Prompt" (cmd), and enter the extracted folder through `cd /d PATH\TO\EXTRACTED\FOLDER`
 | 
					- Open "Command Prompt" (cmd), and enter the extracted folder through `cd /d PATH\TO\EXTRACTED\FOLDER`
 | 
				
			||||||
- To use GPU acceleration, several environment variables are required or recommended before running `llama.cpp`.
 | 
					 | 
				
			||||||
  ```cmd
 | 
					 | 
				
			||||||
  set SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  ```
 | 
					 | 
				
			||||||
- For multi-GPUs user, go to [Tips](#multi-gpus-usage) for how to select specific GPU.
 | 
					- For multi-GPUs user, go to [Tips](#multi-gpus-usage) for how to select specific GPU.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Step 3: Run GGUF models
 | 
					### Step 3: Run GGUF models
 | 
				
			||||||
| 
						 | 
					@ -133,10 +129,6 @@ Then, extract the tgz file to a folder.
 | 
				
			||||||
### Step 2: Runtime Configuration
 | 
					### Step 2: Runtime Configuration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Open a "Terminal", and enter the extracted folder through `cd /PATH/TO/EXTRACTED/FOLDER`
 | 
					- Open a "Terminal", and enter the extracted folder through `cd /PATH/TO/EXTRACTED/FOLDER`
 | 
				
			||||||
- To use GPU acceleration, several environment variables are required or recommended before running `llama.cpp`.
 | 
					 | 
				
			||||||
  ```bash
 | 
					 | 
				
			||||||
  export SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  ```
 | 
					 | 
				
			||||||
- For multi-GPUs user, go to [Tips](#multi-gpus-usage) for how to select specific GPU.
 | 
					- For multi-GPUs user, go to [Tips](#multi-gpus-usage) for how to select specific GPU.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Step 3: Run GGUF models
 | 
					### Step 3: Run GGUF models
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -49,10 +49,6 @@
 | 
				
			||||||
### 步骤 2:运行时配置
 | 
					### 步骤 2:运行时配置
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- 打开命令提示符(cmd),并通过在命令行输入指令 `cd /d PATH\TO\EXTRACTED\FOLDER` 进入解压缩后的文件夹。
 | 
					- 打开命令提示符(cmd),并通过在命令行输入指令 `cd /d PATH\TO\EXTRACTED\FOLDER` 进入解压缩后的文件夹。
 | 
				
			||||||
- 要使用 GPU 加速,在运行 `llama.cpp` 之前,建议设置如下环境变量。
 | 
					 | 
				
			||||||
  ```cmd
 | 
					 | 
				
			||||||
  set SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  ```
 | 
					 | 
				
			||||||
- 对于多 GPU 用户,请转至[提示](#多-gpu-配置)了解如何选择特定的 GPU。
 | 
					- 对于多 GPU 用户,请转至[提示](#多-gpu-配置)了解如何选择特定的 GPU。
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### 步骤 3:运行 GGUF 模型
 | 
					### 步骤 3:运行 GGUF 模型
 | 
				
			||||||
| 
						 | 
					@ -135,10 +131,6 @@ llama_perf_context_print:       total time =   xxxxx.xx ms /  1385 tokens
 | 
				
			||||||
### 步骤 2:运行时配置
 | 
					### 步骤 2:运行时配置
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- 开启一个终端,输入命令 `cd /PATH/TO/EXTRACTED/FOLDER` 进入解压缩后的文件夹。
 | 
					- 开启一个终端,输入命令 `cd /PATH/TO/EXTRACTED/FOLDER` 进入解压缩后的文件夹。
 | 
				
			||||||
- 要使用 GPU 加速,在运行 `llama.cpp` 之前,建议设置如下环境变量。
 | 
					 | 
				
			||||||
  ```bash
 | 
					 | 
				
			||||||
  export SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  ```
 | 
					 | 
				
			||||||
- 对于多 GPU 用户,请转至[提示](#多-gpu-配置)了解如何选择特定的 GPU。
 | 
					- 对于多 GPU 用户,请转至[提示](#多-gpu-配置)了解如何选择特定的 GPU。
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### 步骤 3:运行 GGUF 模型
 | 
					### 步骤 3:运行 GGUF 模型
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -86,7 +86,6 @@ You may launch the Ollama service as below:
 | 
				
			||||||
  export ZES_ENABLE_SYSMAN=1
 | 
					  export ZES_ENABLE_SYSMAN=1
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  source /opt/intel/oneapi/setvars.sh
 | 
					  source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
  export SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
					  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
				
			||||||
| 
						 | 
					@ -103,7 +102,6 @@ You may launch the Ollama service as below:
 | 
				
			||||||
  set OLLAMA_NUM_GPU=999
 | 
					  set OLLAMA_NUM_GPU=999
 | 
				
			||||||
  set no_proxy=localhost,127.0.0.1
 | 
					  set no_proxy=localhost,127.0.0.1
 | 
				
			||||||
  set ZES_ENABLE_SYSMAN=1
 | 
					  set ZES_ENABLE_SYSMAN=1
 | 
				
			||||||
  set SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -246,3 +244,8 @@ When you start `ollama serve` and execute `ollama run <model_name>`, but `ollama
 | 
				
			||||||
 | 
					
 | 
				
			||||||
1. On Linux, you may run `systemctl stop ollama` to stop all ollama processes, and then rerun `ollama serve` in your current directory.
 | 
					1. On Linux, you may run `systemctl stop ollama` to stop all ollama processes, and then rerun `ollama serve` in your current directory.
 | 
				
			||||||
2. On Windows, you may `set OLLAMA_HOST=0.0.0.0` to ensure that the ollama commands run on the current `ollama serve`.
 | 
					2. On Windows, you may `set OLLAMA_HOST=0.0.0.0` to ensure that the ollama commands run on the current `ollama serve`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 11. Error `The program was built for 1 devices` when executing `ollama serve`
 | 
				
			||||||
 | 
					When you start `ollama serve` and execute `ollama run <model_name>`, but encounter the error `The program was built for 1 devices. Build program log for 'Intel(R) Arc(TM) A770 Graphics':`. This may be caused by the command `set/export SYCL_CACHE_PERSISTENT=1`. Please run commands as below:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					run `unset SYCL_CACHE_PERSISTENT` in the terminal; if the variable has been written into a configuration file such as `~/.bashrc`, you need to manually delete or comment out the conrresponding line.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -86,7 +86,6 @@ IPEX-LLM 现在已支持在 Linux 和 Windows 系统上运行 `Ollama`。
 | 
				
			||||||
  export ZES_ENABLE_SYSMAN=1
 | 
					  export ZES_ENABLE_SYSMAN=1
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  source /opt/intel/oneapi/setvars.sh
 | 
					  source /opt/intel/oneapi/setvars.sh
 | 
				
			||||||
  export SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
					  # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
 | 
				
			||||||
| 
						 | 
					@ -103,7 +102,6 @@ IPEX-LLM 现在已支持在 Linux 和 Windows 系统上运行 `Ollama`。
 | 
				
			||||||
  set OLLAMA_NUM_GPU=999
 | 
					  set OLLAMA_NUM_GPU=999
 | 
				
			||||||
  set no_proxy=localhost,127.0.0.1
 | 
					  set no_proxy=localhost,127.0.0.1
 | 
				
			||||||
  set ZES_ENABLE_SYSMAN=1
 | 
					  set ZES_ENABLE_SYSMAN=1
 | 
				
			||||||
  set SYCL_CACHE_PERSISTENT=1
 | 
					 | 
				
			||||||
  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
					  rem under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
 | 
				
			||||||
  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
					  set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -241,3 +239,8 @@ Ollama 默认每 5 分钟从 GPU 内存卸载一次模型。针对 ollama 的最
 | 
				
			||||||
 | 
					
 | 
				
			||||||
在 Linux 上,你可以运行 `systemctl stop ollama` 来停止所有的 ollama 进程,然后在当前目录重新执行 `ollama serve`。
 | 
					在 Linux 上,你可以运行 `systemctl stop ollama` 来停止所有的 ollama 进程,然后在当前目录重新执行 `ollama serve`。
 | 
				
			||||||
在 Windows 上,你可以运行 `set OLLAMA_HOST=0.0.0.0` 以确保 ollama 命令通过当前的 `ollama serve` 上运行。
 | 
					在 Windows 上,你可以运行 `set OLLAMA_HOST=0.0.0.0` 以确保 ollama 命令通过当前的 `ollama serve` 上运行。
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 11. 执行 `ollama serve` 时报 `The program was built for 1 devices` 错误
 | 
				
			||||||
 | 
					当启动`ollama serve`或者`ollama run <model_name>`时,产生报错`The program was built for 1 devices. Build program log for 'Intel(R) Arc(TM) A770 Graphics':`, 这是因为设置了`SYCL_CACHE_PERSISTENT=1`。请按照以下命令操作:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					在命令行中输入`unset SYCL_CACHE_PERSISTENT`;如果写入了配置文件,比如`~.bashrc`等,需要手动删去或注释掉对应的行。
 | 
				
			||||||
		Loading…
	
		Reference in a new issue