From 52878d3e5fc27f07d67e3fd3016e7aff2ae3a091 Mon Sep 17 00:00:00 2001 From: Xiangyu Tian <109123695+xiangyuT@users.noreply.github.com> Date: Mon, 18 Sep 2023 09:32:25 +0800 Subject: [PATCH] [PPML] Enable TLS in Attestation API Serving for LLM finetuning (#8945) Add enableTLS flag to enable TLS in Attestation API Serving for LLM finetuning. --- docker/llm/finetune/lora/README.md | 17 ++++++++++++ docker/llm/finetune/lora/docker/bigdl_aa.py | 11 ++++++-- .../bigdl-lora-finetuning-tdx-job.yaml | 26 ++++++++++++++++++- .../llm/finetune/lora/kubernetes/values.yaml | 6 ++++- 4 files changed, 56 insertions(+), 4 deletions(-) diff --git a/docker/llm/finetune/lora/README.md b/docker/llm/finetune/lora/README.md index 90528c16..81b61add 100644 --- a/docker/llm/finetune/lora/README.md +++ b/docker/llm/finetune/lora/README.md @@ -59,6 +59,23 @@ From the log, you can see whether finetuning process has been invoked successful You can deploy this workload in TDX CoCo and enable Remote Attestation API Serving with setting `TEEMode` in `./kubernetes/values.yaml` to `tdx`. The main diffences are it's need to execute the pods as root and mount TDX device, and a flask service is responsible for generating launcher's quote and collecting workers' quotes. +### (Optional) Enable TLS +To enable TLS in Remote Attestation API Serving, you should provide a TLS certificate and setting `enableTLS` ( to `true` ), `base64ServerCrt` and `base64ServerKey` in `./kubernetes/values.yaml`. +```bash +# Generate a self-signed TLS certificate (DEBUG USE ONLY) +export COUNTRY_NAME=your_country_name +export CITY_NAME=your_city_name +export ORGANIZATION_NAME=your_organization_name +export COMMON_NAME=your_common_name +export EMAIL_ADDRESS=your_email_address + +openssl req -x509 -newkey rsa:4096 -nodes -out server.crt -keyout server.key -days 365 -subj "/C=$COUNTRY_NAME/ST=$CITY_NAME/L=$CITY_NAME/O=$ORGANIZATION_NAME/OU=$ORGANIZATION_NAME/CN=$COMMON_NAME/emailAddress=$EMAIL_ADDRESS/" + +# Calculate Base64 format string in values.yaml +cat server.crt | base64 -w 0 # Set in base64ServerCrt +cat server.key | base64 -w 0 # Set in base64ServerKey +``` + To use RA Rest API, you need to get the IP of job-launcher: ``` bash kubectl get all -n bigdl-lora-finetuning diff --git a/docker/llm/finetune/lora/docker/bigdl_aa.py b/docker/llm/finetune/lora/docker/bigdl_aa.py index 734ee16b..d848fd65 100644 --- a/docker/llm/finetune/lora/docker/bigdl_aa.py +++ b/docker/llm/finetune/lora/docker/bigdl_aa.py @@ -7,7 +7,6 @@ import requests import subprocess app = Flask(__name__) -use_secure_cert = False @app.route('/gen_quote', methods=['POST']) def gen_quote(): @@ -48,4 +47,12 @@ def get_cluster_quote_list(): if __name__ == '__main__': print("BigDL-AA: Agent Started.") port = int(os.environ.get('ATTESTATION_API_SERVICE_PORT')) - app.run(host='0.0.0.0', port=port) + enable_tls = os.environ.get('ENABLE_TLS') + if enable_tls == 'true': + context = ssl.SSLContext(ssl.PROTOCOL_TLS) + context.load_cert_chain(certfile='/ppml/keys/server.crt', keyfile='/ppml/keys/server.key') + # https_key_store_token = os.environ.get('HTTPS_KEY_STORE_TOKEN') + # context.load_cert_chain(certfile='/ppml/keys/server.crt', keyfile='/ppml/keys/server.key', password=https_key_store_token) + app.run(host='0.0.0.0', port=port, ssl_context=context) + else: + app.run(host='0.0.0.0', port=port) diff --git a/docker/llm/finetune/lora/kubernetes/templates/bigdl-lora-finetuning-tdx-job.yaml b/docker/llm/finetune/lora/kubernetes/templates/bigdl-lora-finetuning-tdx-job.yaml index e0c3de6a..cd4d260b 100644 --- a/docker/llm/finetune/lora/kubernetes/templates/bigdl-lora-finetuning-tdx-job.yaml +++ b/docker/llm/finetune/lora/kubernetes/templates/bigdl-lora-finetuning-tdx-job.yaml @@ -22,6 +22,11 @@ spec: - name: dev hostPath: path: /dev + {{- if eq .Values.enableTLS true }} + - name: ssl-keys + secret: + secretName: ssl-keys + {{- end }} runtimeClassName: kata-qemu-tdx containers: - image: {{ .Values.imageName }} @@ -57,6 +62,8 @@ spec: value: "/ppml/output/cache" - name: ATTESTATION_API_SERVICE_PORT value: "{{ .Values.attestionApiServicePort }}" + - name: ENABLE_TLS + value: "{{ .Values.enableTLS }}" volumeMounts: - name: nfs-storage subPath: {{ .Values.modelSubPath }} @@ -69,6 +76,10 @@ spec: mountPath: "/ppml/output" - name: dev mountPath: /dev + {{- if eq .Values.enableTLS true }} + - name: ssl-keys + mountPath: /ppml/keys + {{- end }} Worker: replicas: {{ .Values.trainerNum }} template: @@ -141,4 +152,17 @@ spec: port: {{ .Values.attestionApiServicePort }} targetPort: {{ .Values.attestionApiServicePort }} type: ClusterIP -{{- end }} \ No newline at end of file +--- +{{- if eq .Values.enableTLS true }} +apiVersion: v1 +kind: Secret +metadata: + name: ssl-keys + namespace: bigdl-lora-finetuning +type: Opaque +data: + server.crt: {{ .Values.base64ServerCrt }} + server.key: {{ .Values.base64ServerKey }} +{{- end }} + +{{- end }} diff --git a/docker/llm/finetune/lora/kubernetes/values.yaml b/docker/llm/finetune/lora/kubernetes/values.yaml index 40a547d0..70691935 100644 --- a/docker/llm/finetune/lora/kubernetes/values.yaml +++ b/docker/llm/finetune/lora/kubernetes/values.yaml @@ -9,4 +9,8 @@ modelSubPath: llama-7b-hf # a subpath of the model file (dir) under nfs director outputSubPath: output # a subpath of the empty directory under the nfs directory to save finetuned model, for example, if you make an empty dir named 'output' at the nfsPath, the value should be 'output' ompNumThreads: 14 cpuPerPod: 42 -attestionApiServicePort: 9870 \ No newline at end of file +attestionApiServicePort: 9870 + +enableTLS: false # true or false +base64ServerCrt: "your_base64_format_server_crt" +base64ServerKey: "your_base64_format_server_key" \ No newline at end of file