Fix import error of ds autotp (#11307)

2024-06-13 16:22:52 +08:00 · 2024-06-13 16:22:52 +08:00 · f97cce2642
commit f97cce2642
parent 3682c6a979
7 changed files with 7 additions and 5 deletions
--- a/python/llm/dev/benchmark/all-in-one/run-deepspeed-arc.sh
+++ b/python/llm/dev/benchmark/all-in-one/run-deepspeed-arc.sh
@ -14,5 +14,5 @@ if grep -q "Core" /proc/cpuinfo; then
    export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 fi
 export TORCH_LLM_ALLREDUCE=0 # Different from PVC
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank python run.py
--- a/python/llm/dev/benchmark/all-in-one/run-deepspeed-pvc.sh
+++ b/python/llm/dev/benchmark/all-in-one/run-deepspeed-pvc.sh
@ -13,4 +13,5 @@ source $basekit_root/ccl/latest/env/vars.sh --force
 export OMP_NUM_THREADS=$((56/$NUM_GPUS))
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 export TORCH_LLM_ALLREDUCE=1
 export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank python run.py
--- a/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/start-deepspeed-autotp-ipex-llm-serving.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/start-deepspeed-autotp-ipex-llm-serving.sh
@ -31,6 +31,6 @@ export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 export TORCH_LLM_ALLREDUCE=0
 export WORLD_SIZE=2
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
        python serving.py --repo-id-or-model-path YOUR_REPO_ID_OR_MODEL_PATH --low-bit 'fp8' --port 8000 --max-num-seqs 8 --max-num-batched-tokens 8192
--- a/python/llm/example/GPU/Deepspeed-AutoTP/run_llama2_70b_pvc_1550_1_card.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/run_llama2_70b_pvc_1550_1_card.sh
@ -29,5 +29,6 @@ source $basekit_root/ccl/latest/env/vars.sh --force
 export OMP_NUM_THREADS=$((56/$NUM_GPUS))
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 export TORCH_LLM_ALLREDUCE=1
 export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
    python deepspeed_autotp.py --repo-id-or-model-path 'meta-llama/Llama-2-70b-chat-hf' --low-bit 'sym_int4'
--- a/python/llm/example/GPU/Deepspeed-AutoTP/run_mistral_7b_instruct_flex_2_card.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/run_mistral_7b_instruct_flex_2_card.sh
@ -28,6 +28,6 @@ NUM_GPUS=2 # number of used GPU
 export USE_XETLA=OFF
 export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 export TORCH_LLM_ALLREDUCE=0 # Different from PVC
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
    python deepspeed_autotp.py --repo-id-or-model-path 'mistralai/Mistral-7B-Instruct-v0.1' --low-bit 'sym_int4'
--- a/python/llm/example/GPU/Deepspeed-AutoTP/run_qwen_14b_arc_2_card.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/run_qwen_14b_arc_2_card.sh
@ -33,6 +33,6 @@ if grep -q "Core" /proc/cpuinfo; then
    export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 fi
 export TORCH_LLM_ALLREDUCE=0 # Different from PVC
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
    python deepspeed_autotp.py --repo-id-or-model-path 'Qwen/Qwen1.5-14B-Chat' --low-bit 'sym_int4'
--- a/python/llm/example/GPU/Deepspeed-AutoTP/run_vicuna_33b_arc_2_card.sh
+++ b/python/llm/example/GPU/Deepspeed-AutoTP/run_vicuna_33b_arc_2_card.sh
@ -30,6 +30,6 @@ if grep -q "Core" /proc/cpuinfo; then
    export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=2
 fi
 export TORCH_LLM_ALLREDUCE=0 # Different from PVC
-
+export BIGDL_IMPORT_IPEX=0
 mpirun -np $NUM_GPUS --prepend-rank \
    python deepspeed_autotp.py --repo-id-or-model-path 'lmsys/vicuna-33b-v1.3' --low-bit 'sym_int4'