126 lines
		
	
	
	
		
			3 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			126 lines
		
	
	
	
		
			3 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
apiVersion: v1
 | 
						|
kind: PersistentVolumeClaim
 | 
						|
metadata:
 | 
						|
  name: models-pvc
 | 
						|
spec:
 | 
						|
  accessModes:
 | 
						|
    - ReadWriteMany
 | 
						|
  storageClassName: models
 | 
						|
  resources:
 | 
						|
    requests:
 | 
						|
      storage: 10Gi
 | 
						|
  # selector: # Optional
 | 
						|
  #   matchLabels:
 | 
						|
  #     app: kubecon-models
 | 
						|
---
 | 
						|
apiVersion: v1
 | 
						|
kind: Pod
 | 
						|
metadata:
 | 
						|
  name: ipex-llm-fschat-a1234bd-controller
 | 
						|
  labels:
 | 
						|
    fastchat-appid: a1234bd
 | 
						|
    fastchat-app-type: controller
 | 
						|
spec:
 | 
						|
  dnsPolicy: "ClusterFirst"
 | 
						|
  containers:
 | 
						|
  - name: fastchat-controller # fixed
 | 
						|
    image: intelanalytics/ipex-llm-serving-cpu:2.1.0-SNAPSHOT
 | 
						|
    imagePullPolicy: IfNotPresent
 | 
						|
    env:
 | 
						|
    - name: CONTROLLER_HOST # fixed
 | 
						|
      value: "0.0.0.0"
 | 
						|
    - name: CONTROLLER_PORT # fixed
 | 
						|
      value: "21005"
 | 
						|
    - name: API_HOST # fixed
 | 
						|
      value: "0.0.0.0"
 | 
						|
    - name: API_PORT # fixed
 | 
						|
      value: "8000"
 | 
						|
    - name: "GRADIO_PORT" # You can change this port
 | 
						|
      value: "8002"
 | 
						|
    ports:
 | 
						|
      - containerPort: 21005
 | 
						|
        name: con-port
 | 
						|
      - containerPort: 8000
 | 
						|
        name: api-port
 | 
						|
    resources:
 | 
						|
      requests:
 | 
						|
        memory: 16Gi
 | 
						|
        cpu: 4
 | 
						|
      limits:
 | 
						|
        memory: 16Gi
 | 
						|
        cpu: 4
 | 
						|
    args: ["-m", "controller"]
 | 
						|
  restartPolicy: "Never"
 | 
						|
---
 | 
						|
# Service for the controller
 | 
						|
apiVersion: v1
 | 
						|
kind: Service
 | 
						|
metadata:
 | 
						|
  name: ipex-llm-a1234bd-fschat-controller-service
 | 
						|
spec:
 | 
						|
  # You may also want to change this to use the cluster's feature
 | 
						|
  type: NodePort
 | 
						|
  selector:
 | 
						|
    fastchat-appid: a1234bd
 | 
						|
    fastchat-app-type: controller
 | 
						|
  ports:
 | 
						|
    - name: cont-port
 | 
						|
      protocol: TCP
 | 
						|
      port: 21005
 | 
						|
      targetPort: 21005
 | 
						|
    - name: api-port
 | 
						|
      protocol: TCP
 | 
						|
      port: 8000
 | 
						|
      targetPort: 8000
 | 
						|
---
 | 
						|
apiVersion: apps/v1
 | 
						|
kind: Deployment
 | 
						|
metadata:
 | 
						|
        name: ipex-llm-fschat-a1234bd-worker-deployment
 | 
						|
spec:
 | 
						|
  # Change this to the number you want
 | 
						|
  replicas: 1
 | 
						|
  selector:
 | 
						|
    matchLabels:
 | 
						|
      fastchat: worker
 | 
						|
  template:
 | 
						|
    metadata:
 | 
						|
      labels:
 | 
						|
        fastchat: worker
 | 
						|
    spec:
 | 
						|
      dnsPolicy: "ClusterFirst"
 | 
						|
      containers:
 | 
						|
      - name: fastchat-worker # fixed
 | 
						|
        image: intelanalytics/ipex-llm-serving-cpu:2.1.0-SNAPSHOT
 | 
						|
        imagePullPolicy: IfNotPresent
 | 
						|
        env:
 | 
						|
        - name: CONTROLLER_HOST # fixed
 | 
						|
          value: ipex-llm-a1234bd-fschat-controller-service
 | 
						|
        - name: CONTROLLER_PORT # fixed
 | 
						|
          value: "21005"
 | 
						|
        - name: WORKER_HOST # fixed
 | 
						|
          valueFrom:
 | 
						|
            fieldRef:
 | 
						|
              fieldPath: status.podIP
 | 
						|
        - name: WORKER_PORT # fixed
 | 
						|
          value: "21841"
 | 
						|
        - name: MODEL_PATH
 | 
						|
          value: "/llm/models/vicuna-7b-v1.5-ipex-llm/" # change this to your model
 | 
						|
        - name: OMP_NUM_THREADS
 | 
						|
          value: "16"
 | 
						|
        resources:
 | 
						|
          requests:
 | 
						|
            memory: 32Gi
 | 
						|
            cpu: 16
 | 
						|
          limits:
 | 
						|
            memory: 32Gi
 | 
						|
            cpu: 16
 | 
						|
        args: ["-m", "worker"]
 | 
						|
        volumeMounts:
 | 
						|
          - name: llm-models
 | 
						|
            mountPath: /llm/models/
 | 
						|
      restartPolicy: "Always"
 | 
						|
      volumes:
 | 
						|
      - name: llm-models
 | 
						|
        persistentVolumeClaim:
 | 
						|
          claimName: models-pvc
 |