[PPML] Enable TLS in Attestation API Serving for LLM finetuning (#8945)

Add enableTLS flag to enable TLS in Attestation API Serving for LLM finetuning.
2023-09-18 09:32:25 +08:00 · 2023-09-18 09:32:25 +08:00 · 52878d3e5f
commit 52878d3e5f
parent 32716106e0
4 changed files with 56 additions and 4 deletions
--- a/docker/llm/finetune/lora/README.md
+++ b/docker/llm/finetune/lora/README.md
@ -59,6 +59,23 @@ From the log, you can see whether finetuning process has been invoked successful

 You can deploy this workload in TDX CoCo and enable Remote Attestation API Serving with setting `TEEMode` in `./kubernetes/values.yaml` to `tdx`. The main diffences are it's need to execute the pods as root and mount TDX device, and a flask service is responsible for generating launcher's quote and collecting workers' quotes. 

+### (Optional) Enable TLS
+To enable TLS in Remote Attestation API Serving, you should provide a TLS certificate and setting `enableTLS` ( to `true` ), `base64ServerCrt` and `base64ServerKey` in `./kubernetes/values.yaml`.
+```bash
+# Generate a self-signed TLS certificate (DEBUG USE ONLY)
+export COUNTRY_NAME=your_country_name
+export CITY_NAME=your_city_name
+export ORGANIZATION_NAME=your_organization_name
+export COMMON_NAME=your_common_name
+export EMAIL_ADDRESS=your_email_address
+
+openssl req -x509 -newkey rsa:4096 -nodes -out server.crt -keyout server.key -days 365 -subj "/C=$COUNTRY_NAME/ST=$CITY_NAME/L=$CITY_NAME/O=$ORGANIZATION_NAME/OU=$ORGANIZATION_NAME/CN=$COMMON_NAME/emailAddress=$EMAIL_ADDRESS/"
+
+# Calculate Base64 format string in values.yaml
+cat server.crt | base64 -w 0 # Set in base64ServerCrt
+cat server.key | base64 -w 0 # Set in base64ServerKey
+```
+
 To use RA Rest API, you need to get the IP of job-launcher:
 ``` bash
 kubectl get all -n bigdl-lora-finetuning 
--- a/docker/llm/finetune/lora/docker/bigdl_aa.py
+++ b/docker/llm/finetune/lora/docker/bigdl_aa.py
@ -7,7 +7,6 @@ import requests
 import subprocess

 app = Flask(__name__)
-use_secure_cert = False

@app.route('/gen_quote', methods=['POST'])
 def gen_quote():
@ -48,4 +47,12 @@ def get_cluster_quote_list():
 if __name__ == '__main__':
    print("BigDL-AA: Agent Started.")
    port = int(os.environ.get('ATTESTATION_API_SERVICE_PORT'))
-    app.run(host='0.0.0.0', port=port)
+    enable_tls = os.environ.get('ENABLE_TLS')
+    if enable_tls == 'true':
+        context = ssl.SSLContext(ssl.PROTOCOL_TLS)
+        context.load_cert_chain(certfile='/ppml/keys/server.crt', keyfile='/ppml/keys/server.key')
+        # https_key_store_token = os.environ.get('HTTPS_KEY_STORE_TOKEN')
+        # context.load_cert_chain(certfile='/ppml/keys/server.crt', keyfile='/ppml/keys/server.key', password=https_key_store_token)
+        app.run(host='0.0.0.0', port=port, ssl_context=context)
+    else:
+        app.run(host='0.0.0.0', port=port)
--- a/docker/llm/finetune/lora/kubernetes/templates/bigdl-lora-finetuning-tdx-job.yaml
+++ b/docker/llm/finetune/lora/kubernetes/templates/bigdl-lora-finetuning-tdx-job.yaml
@ -22,6 +22,11 @@ spec:
           - name: dev
             hostPath:
               path: /dev
+           {{- if eq .Values.enableTLS true }}
+           - name: ssl-keys
+             secret:
+               secretName: ssl-keys
+           {{- end }}
           runtimeClassName: kata-qemu-tdx
           containers:
           - image: {{ .Values.imageName }}
@ -57,6 +62,8 @@ spec:
               value: "/ppml/output/cache"
             - name: ATTESTATION_API_SERVICE_PORT
               value: "{{ .Values.attestionApiServicePort }}"
+             - name: ENABLE_TLS
+               value: "{{ .Values.enableTLS }}"
             volumeMounts:
             - name: nfs-storage
               subPath: {{ .Values.modelSubPath }}
@ -69,6 +76,10 @@ spec:
               mountPath: "/ppml/output"
             - name: dev
               mountPath: /dev
+             {{- if eq .Values.enableTLS true }}
+             - name: ssl-keys
+               mountPath: /ppml/keys
+             {{- end }}
    Worker:
      replicas: {{ .Values.trainerNum }}
      template:
@ -141,4 +152,17 @@ spec:
      port: {{ .Values.attestionApiServicePort }}
      targetPort: {{ .Values.attestionApiServicePort }}
  type: ClusterIP
-{{- end }}
+---
+{{- if eq .Values.enableTLS true }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: ssl-keys
+  namespace: bigdl-lora-finetuning
+type: Opaque
+data:
+  server.crt: {{ .Values.base64ServerCrt }}
+  server.key: {{ .Values.base64ServerKey }}
+{{- end }}
+
+{{- end }}
--- a/docker/llm/finetune/lora/kubernetes/values.yaml
+++ b/docker/llm/finetune/lora/kubernetes/values.yaml
@ -9,4 +9,8 @@ modelSubPath: llama-7b-hf # a subpath of the model file (dir) under nfs director
 outputSubPath: output # a subpath of the empty directory under the nfs directory to save finetuned model, for example, if you make an empty dir named 'output' at the nfsPath, the value should be 'output'
 ompNumThreads: 14
 cpuPerPod: 42
-attestionApiServicePort: 9870
+attestionApiServicePort: 9870
+
+enableTLS: false # true or false
+base64ServerCrt: "your_base64_format_server_crt"
+base64ServerKey: "your_base64_format_server_key"