add pv for llm-serving k8s deployment (#9906)
This commit is contained in:
parent
8d7326ae03
commit
05ea0ecd70
3 changed files with 61 additions and 4 deletions
|
|
@ -35,6 +35,27 @@ The entrypoint of the image will try to set `OMP_NUM_THREADS` to the correct num
|
||||||
|
|
||||||
If you want to use the vllm AsyncLLMEngine for serving, you should set the args -w vllm_worker in worker part of deployment.yaml.
|
If you want to use the vllm AsyncLLMEngine for serving, you should set the args -w vllm_worker in worker part of deployment.yaml.
|
||||||
|
|
||||||
|
### PersistentVolume
|
||||||
|
We use the following yaml file for PersistentVolume deployment:
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: models-pv
|
||||||
|
labels:
|
||||||
|
app: models
|
||||||
|
spec:
|
||||||
|
capacity:
|
||||||
|
storage: 10Gi #Modify according to model size
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
storageClassName: models
|
||||||
|
nfs:
|
||||||
|
path: YOUR_NFS_PATH
|
||||||
|
server: YOUR_NFS_SERVER
|
||||||
|
|
||||||
|
```
|
||||||
|
Then you should upload model to `YOUR_NFS_PATH`
|
||||||
|
|
||||||
### Controller
|
### Controller
|
||||||
|
|
||||||
|
|
@ -154,8 +175,8 @@ spec:
|
||||||
restartPolicy: "Always"
|
restartPolicy: "Always"
|
||||||
volumes:
|
volumes:
|
||||||
- name: llm-models
|
- name: llm-models
|
||||||
hostPath:
|
persistentVolumeClaim:
|
||||||
path: /home/llm/models # change this in other envs
|
claimName: models-pvc
|
||||||
```
|
```
|
||||||
|
|
||||||
You may want to change the `MODEL_PATH` variable in the yaml. Also, please remember to change the volume path accordingly.
|
You may want to change the `MODEL_PATH` variable in the yaml. Also, please remember to change the volume path accordingly.
|
||||||
|
|
@ -200,6 +221,12 @@ print(completion.choices[0].message.content)
|
||||||
#### cURL
|
#### cURL
|
||||||
cURL is another good tool for observing the output of the api.
|
cURL is another good tool for observing the output of the api.
|
||||||
|
|
||||||
|
Before using cURL, you should set your `http_proxy` and `https_proxy` to empty
|
||||||
|
```bash
|
||||||
|
export http_proxy=
|
||||||
|
export https_proxy=
|
||||||
|
```
|
||||||
|
|
||||||
For the following examples, you may also change the service deployment address.
|
For the following examples, you may also change the service deployment address.
|
||||||
|
|
||||||
List Models:
|
List Models:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,19 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: models-pvc
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
storageClassName: models
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
|
# selector: # Optional
|
||||||
|
# matchLabels:
|
||||||
|
# app: kubecon-models
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
kind: Pod
|
kind: Pod
|
||||||
metadata:
|
metadata:
|
||||||
name: bigdl-fschat-a1234bd-controller
|
name: bigdl-fschat-a1234bd-controller
|
||||||
|
|
@ -105,5 +120,5 @@ spec:
|
||||||
restartPolicy: "Always"
|
restartPolicy: "Always"
|
||||||
volumes:
|
volumes:
|
||||||
- name: llm-models
|
- name: llm-models
|
||||||
hostPath:
|
persistentVolumeClaim:
|
||||||
path: /home/llm/models # change this in other envs
|
claimName: models-pvc
|
||||||
15
docker/llm/serving/cpu/kubernetes/models-pv.yaml
Normal file
15
docker/llm/serving/cpu/kubernetes/models-pv.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: models-pv
|
||||||
|
labels:
|
||||||
|
app: models
|
||||||
|
spec:
|
||||||
|
capacity:
|
||||||
|
storage: 10Gi #Modify according to model size
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteMany
|
||||||
|
storageClassName: models
|
||||||
|
nfs:
|
||||||
|
path: YOUR_NFS_PATH
|
||||||
|
server: YOUR_NFS_SERVER
|
||||||
Loading…
Reference in a new issue