126 lines
3 KiB
YAML
126 lines
3 KiB
YAML
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: models-pvc
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteMany
|
|
storageClassName: models
|
|
resources:
|
|
requests:
|
|
storage: 10Gi
|
|
# selector: # Optional
|
|
# matchLabels:
|
|
# app: kubecon-models
|
|
---
|
|
apiVersion: v1
|
|
kind: Pod
|
|
metadata:
|
|
name: ipex-llm-fschat-a1234bd-controller
|
|
labels:
|
|
fastchat-appid: a1234bd
|
|
fastchat-app-type: controller
|
|
spec:
|
|
dnsPolicy: "ClusterFirst"
|
|
containers:
|
|
- name: fastchat-controller # fixed
|
|
image: intelanalytics/ipex-llm-serving-cpu:2.1.0-SNAPSHOT
|
|
imagePullPolicy: IfNotPresent
|
|
env:
|
|
- name: CONTROLLER_HOST # fixed
|
|
value: "0.0.0.0"
|
|
- name: CONTROLLER_PORT # fixed
|
|
value: "21005"
|
|
- name: API_HOST # fixed
|
|
value: "0.0.0.0"
|
|
- name: API_PORT # fixed
|
|
value: "8000"
|
|
- name: "GRADIO_PORT" # You can change this port
|
|
value: "8002"
|
|
ports:
|
|
- containerPort: 21005
|
|
name: con-port
|
|
- containerPort: 8000
|
|
name: api-port
|
|
resources:
|
|
requests:
|
|
memory: 16Gi
|
|
cpu: 4
|
|
limits:
|
|
memory: 16Gi
|
|
cpu: 4
|
|
args: ["-m", "controller"]
|
|
restartPolicy: "Never"
|
|
---
|
|
# Service for the controller
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: ipex-llm-a1234bd-fschat-controller-service
|
|
spec:
|
|
# You may also want to change this to use the cluster's feature
|
|
type: NodePort
|
|
selector:
|
|
fastchat-appid: a1234bd
|
|
fastchat-app-type: controller
|
|
ports:
|
|
- name: cont-port
|
|
protocol: TCP
|
|
port: 21005
|
|
targetPort: 21005
|
|
- name: api-port
|
|
protocol: TCP
|
|
port: 8000
|
|
targetPort: 8000
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: ipex-llm-fschat-a1234bd-worker-deployment
|
|
spec:
|
|
# Change this to the number you want
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
fastchat: worker
|
|
template:
|
|
metadata:
|
|
labels:
|
|
fastchat: worker
|
|
spec:
|
|
dnsPolicy: "ClusterFirst"
|
|
containers:
|
|
- name: fastchat-worker # fixed
|
|
image: intelanalytics/ipex-llm-serving-cpu:2.1.0-SNAPSHOT
|
|
imagePullPolicy: IfNotPresent
|
|
env:
|
|
- name: CONTROLLER_HOST # fixed
|
|
value: ipex-llm-a1234bd-fschat-controller-service
|
|
- name: CONTROLLER_PORT # fixed
|
|
value: "21005"
|
|
- name: WORKER_HOST # fixed
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: status.podIP
|
|
- name: WORKER_PORT # fixed
|
|
value: "21841"
|
|
- name: MODEL_PATH
|
|
value: "/llm/models/vicuna-7b-v1.5-ipex-llm/" # change this to your model
|
|
- name: OMP_NUM_THREADS
|
|
value: "16"
|
|
resources:
|
|
requests:
|
|
memory: 32Gi
|
|
cpu: 16
|
|
limits:
|
|
memory: 32Gi
|
|
cpu: 16
|
|
args: ["-m", "worker"]
|
|
volumeMounts:
|
|
- name: llm-models
|
|
mountPath: /llm/models/
|
|
restartPolicy: "Always"
|
|
volumes:
|
|
- name: llm-models
|
|
persistentVolumeClaim:
|
|
claimName: models-pvc
|