Add TDX attestation for LLM Finetuning in TDX CoCo --------- Co-authored-by: Heyang Sun <60865256+Uxito-Ada@users.noreply.github.com>
99 lines
No EOL
3.4 KiB
YAML
99 lines
No EOL
3.4 KiB
YAML
{{- if eq .Values.TEEMode "native" }}
|
|
apiVersion: kubeflow.org/v2beta1
|
|
kind: MPIJob
|
|
metadata:
|
|
name: bigdl-lora-finetuning-job
|
|
namespace: bigdl-lora-finetuning
|
|
spec:
|
|
slotsPerWorker: 1
|
|
runPolicy:
|
|
cleanPodPolicy: Running
|
|
sshAuthMountPath: /home/mpiuser/.ssh
|
|
mpiImplementation: Intel
|
|
mpiReplicaSpecs:
|
|
Launcher:
|
|
replicas: 1
|
|
template:
|
|
spec:
|
|
volumes:
|
|
- name: nfs-storage
|
|
persistentVolumeClaim:
|
|
claimName: nfs-pvc
|
|
containers:
|
|
- image: {{ .Values.imageName }}
|
|
name: bigdl-ppml-finetuning-launcher
|
|
securityContext:
|
|
runAsUser: 1000
|
|
command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh']
|
|
env:
|
|
- name: WORKER_ROLE
|
|
value: "launcher"
|
|
- name: WORLD_SIZE
|
|
value: "{{ .Values.trainerNum }}"
|
|
- name: MICRO_BATCH_SIZE
|
|
value: "{{ .Values.microBatchSize }}"
|
|
- name: MASTER_PORT
|
|
value: "42679"
|
|
- name: MASTER_ADDR
|
|
value: "bigdl-lora-finetuning-job-worker-0.bigdl-lora-finetuning-job-worker"
|
|
- name: DATA_SUB_PATH
|
|
value: "{{ .Values.dataSubPath }}"
|
|
- name: OMP_NUM_THREADS
|
|
value: "{{ .Values.ompNumThreads }}"
|
|
- name: LOCAL_POD_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
volumeMounts:
|
|
- name: nfs-storage
|
|
subPath: {{ .Values.modelSubPath }}
|
|
mountPath: /ppml/model
|
|
- name: nfs-storage
|
|
subPath: {{ .Values.dataSubPath }}
|
|
mountPath: "/ppml/data/{{ .Values.dataSubPath }}"
|
|
- name: nfs-storage
|
|
subPath: {{ .Values.outputSubPath }}
|
|
mountPath: "/ppml/output"
|
|
Worker:
|
|
replicas: {{ .Values.trainerNum }}
|
|
template:
|
|
spec:
|
|
containers:
|
|
- image: {{ .Values.imageName }}
|
|
name: bigdl-ppml-finetuning-worker
|
|
securityContext:
|
|
runAsUser: 1000
|
|
command: ['sh' , '-c', 'bash /ppml/bigdl-lora-finetuing-entrypoint.sh']
|
|
env:
|
|
- name: WORKER_ROLE
|
|
value: "trainer"
|
|
- name: WORLD_SIZE
|
|
value: "{{ .Values.trainerNum }}"
|
|
- name: MICRO_BATCH_SIZE
|
|
value: "{{ .Values.microBatchSize }}"
|
|
- name: MASTER_PORT
|
|
value: "42679"
|
|
- name: MASTER_ADDR
|
|
value: "bigdl-lora-finetuning-job-worker-0.bigdl-lora-finetuning-job-worker"
|
|
- name: LOCAL_POD_NAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
volumeMounts:
|
|
- name: nfs-storage
|
|
subPath: {{ .Values.modelSubPath }}
|
|
mountPath: /ppml/model
|
|
- name: nfs-storage
|
|
subPath: {{ .Values.dataSubPath }}
|
|
mountPath: "/ppml/data/{{ .Values.dataSubPath }}"
|
|
- name: nfs-storage
|
|
subPath: {{ .Values.outputSubPath }}
|
|
mountPath: "/ppml/output"
|
|
resources:
|
|
requests:
|
|
cpu: {{ .Values.cpuPerPod }}
|
|
volumes:
|
|
- name: nfs-storage
|
|
persistentVolumeClaim:
|
|
claimName: nfs-pvc
|
|
{{- end }} |