add python style check (#10620)
* add python style check * fix style checks * update runner * add ipex-llm-finetune-qlora-cpu-k8s to manually_build workflow * update tag to 2.1.0-SNAPSHOT
This commit is contained in:
		
							parent
							
								
									58b57177e3
								
							
						
					
					
						commit
						a10f5a1b8d
					
				
					 13 changed files with 120 additions and 33 deletions
				
			
		
							
								
								
									
										51
									
								
								.github/workflows/manually_build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										51
									
								
								.github/workflows/manually_build.yml
									
									
									
									
										vendored
									
									
								
							| 
						 | 
					@ -10,13 +10,14 @@ on:
 | 
				
			||||||
        type: choice
 | 
					        type: choice
 | 
				
			||||||
        options:
 | 
					        options:
 | 
				
			||||||
        - all
 | 
					        - all
 | 
				
			||||||
        - ipex-llm-finetune-lora-cpu
 | 
					 | 
				
			||||||
        - ipex-llm-finetune-qlora-cpu
 | 
					 | 
				
			||||||
        - ipex-llm-finetune-qlora-xpu
 | 
					 | 
				
			||||||
        - ipex-llm-xpu
 | 
					 | 
				
			||||||
        - ipex-llm-cpu
 | 
					        - ipex-llm-cpu
 | 
				
			||||||
        - ipex-llm-serving-xpu
 | 
					        - ipex-llm-xpu
 | 
				
			||||||
        - ipex-llm-serving-cpu
 | 
					        - ipex-llm-serving-cpu
 | 
				
			||||||
 | 
					        - ipex-llm-serving-xpu
 | 
				
			||||||
 | 
					        - ipex-llm-finetune-lora-cpu
 | 
				
			||||||
 | 
					        - ipex-llm-finetune-qlora-cpu-standalone
 | 
				
			||||||
 | 
					        - ipex-llm-finetune-qlora-cpu-k8s
 | 
				
			||||||
 | 
					        - ipex-llm-finetune-qlora-xpu
 | 
				
			||||||
      tag:
 | 
					      tag:
 | 
				
			||||||
        description: 'docker image tag (e.g. 2.1.0-SNAPSHOT)'
 | 
					        description: 'docker image tag (e.g. 2.1.0-SNAPSHOT)'
 | 
				
			||||||
        required: true
 | 
					        required: true
 | 
				
			||||||
| 
						 | 
					@ -72,8 +73,8 @@ jobs:
 | 
				
			||||||
        sudo docker push ${image}:latest
 | 
					        sudo docker push ${image}:latest
 | 
				
			||||||
        sudo docker rmi -f ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} ${image}:latest
 | 
					        sudo docker rmi -f ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} ${image}:latest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ipex-llm-finetune-qlora-cpu:
 | 
					  ipex-llm-finetune-qlora-cpu-standalone:
 | 
				
			||||||
    if: ${{ inputs.artifact == 'ipex-llm-finetune-qlora-cpu' || inputs.artifact == 'all' }}
 | 
					    if: ${{ inputs.artifact == 'ipex-llm-finetune-qlora-cpu-standalone' || inputs.artifact == 'all' }}
 | 
				
			||||||
    runs-on: [self-hosted, Shire]
 | 
					    runs-on: [self-hosted, Shire]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    steps:
 | 
					    steps:
 | 
				
			||||||
| 
						 | 
					@ -81,12 +82,12 @@ jobs:
 | 
				
			||||||
      - name: docker login
 | 
					      - name: docker login
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
          docker login -u ${DOCKERHUB_USERNAME} -p ${DOCKERHUB_PASSWORD}
 | 
					          docker login -u ${DOCKERHUB_USERNAME} -p ${DOCKERHUB_PASSWORD}
 | 
				
			||||||
      - name: ipex-llm-finetune-qlora-cpu
 | 
					      - name: ipex-llm-finetune-qlora-cpu-standalone
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
          echo "##############################################################"
 | 
					          echo "##############################################################"
 | 
				
			||||||
          echo "####### ipex-llm-finetune-qlora-cpu ########"
 | 
					          echo "####### ipex-llm-finetune-qlora-cpu-standalone ########"
 | 
				
			||||||
          echo "##############################################################"
 | 
					          echo "##############################################################"
 | 
				
			||||||
          export image=intelanalytics/ipex-llm-finetune-qlora-cpu
 | 
					          export image=intelanalytics/ipex-llm-finetune-qlora-cpu-standalone
 | 
				
			||||||
          cd docker/llm/finetune/qlora/cpu/docker
 | 
					          cd docker/llm/finetune/qlora/cpu/docker
 | 
				
			||||||
          sudo docker build \
 | 
					          sudo docker build \
 | 
				
			||||||
            --no-cache=true \
 | 
					            --no-cache=true \
 | 
				
			||||||
| 
						 | 
					@ -102,6 +103,36 @@ jobs:
 | 
				
			||||||
          sudo docker push ${image}:latest
 | 
					          sudo docker push ${image}:latest
 | 
				
			||||||
          sudo docker rmi -f ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} ${image}:latest
 | 
					          sudo docker rmi -f ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} ${image}:latest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ipex-llm-finetune-qlora-cpu-k8s:
 | 
				
			||||||
 | 
					    if: ${{ inputs.artifact == 'ipex-llm-finetune-qlora-cpu-k8s' || inputs.artifact == 'all' }}
 | 
				
			||||||
 | 
					    runs-on: [self-hosted, Shire]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    steps:
 | 
				
			||||||
 | 
					      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
 | 
				
			||||||
 | 
					      - name: docker login
 | 
				
			||||||
 | 
					        run: |
 | 
				
			||||||
 | 
					          docker login -u ${DOCKERHUB_USERNAME} -p ${DOCKERHUB_PASSWORD}
 | 
				
			||||||
 | 
					      - name: ipex-llm-finetune-qlora-cpu-k8s
 | 
				
			||||||
 | 
					        run: |
 | 
				
			||||||
 | 
					          echo "##############################################################"
 | 
				
			||||||
 | 
					          echo "####### ipex-llm-finetune-qlora-cpu-k8s ########"
 | 
				
			||||||
 | 
					          echo "##############################################################"
 | 
				
			||||||
 | 
					          export image=intelanalytics/ipex-llm-finetune-qlora-cpu-k8s
 | 
				
			||||||
 | 
					          cd docker/llm/finetune/qlora/cpu/docker
 | 
				
			||||||
 | 
					          sudo docker build \
 | 
				
			||||||
 | 
					            --no-cache=true \
 | 
				
			||||||
 | 
					            --build-arg http_proxy=${HTTP_PROXY} \
 | 
				
			||||||
 | 
					            --build-arg https_proxy=${HTTPS_PROXY} \
 | 
				
			||||||
 | 
					            --build-arg no_proxy=${NO_PROXY} \
 | 
				
			||||||
 | 
					            -t ${image}:${TAG} -f ./Dockerfile.k8s .
 | 
				
			||||||
 | 
					          sudo docker push ${image}:${TAG}
 | 
				
			||||||
 | 
					          sudo docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG}
 | 
				
			||||||
 | 
					          sudo docker push 10.239.45.10/arda/${image}:${TAG}
 | 
				
			||||||
 | 
					          # tag 'latest'
 | 
				
			||||||
 | 
					          sudo docker tag ${image}:${TAG} ${image}:latest
 | 
				
			||||||
 | 
					          sudo docker push ${image}:latest
 | 
				
			||||||
 | 
					          sudo docker rmi -f ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} ${image}:latest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ipex-llm-finetune-qlora-xpu:
 | 
					  ipex-llm-finetune-qlora-xpu:
 | 
				
			||||||
    if: ${{ inputs.artifact == 'ipex-llm-finetune-qlora-xpu' || inputs.artifact == 'all' }}
 | 
					    if: ${{ inputs.artifact == 'ipex-llm-finetune-qlora-xpu' || inputs.artifact == 'all' }}
 | 
				
			||||||
    runs-on: [self-hosted, Shire]
 | 
					    runs-on: [self-hosted, Shire]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										54
									
								
								.github/workflows/python-style-check.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								.github/workflows/python-style-check.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,54 @@
 | 
				
			||||||
 | 
					name: Python Style Check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Cancel previous runs in the PR when you push new commits
 | 
				
			||||||
 | 
					concurrency:
 | 
				
			||||||
 | 
					  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
 | 
				
			||||||
 | 
					  cancel-in-progress: true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					permissions:
 | 
				
			||||||
 | 
					  contents: read
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Controls when the action will run.
 | 
				
			||||||
 | 
					on:
 | 
				
			||||||
 | 
					  push:
 | 
				
			||||||
 | 
					    branches: [ main ]
 | 
				
			||||||
 | 
					    paths:
 | 
				
			||||||
 | 
					      - 'python/**'
 | 
				
			||||||
 | 
					      - '.github/workflows/python-style-check.yml'
 | 
				
			||||||
 | 
					  pull_request:
 | 
				
			||||||
 | 
					    branches: [ main ]
 | 
				
			||||||
 | 
					    paths:
 | 
				
			||||||
 | 
					      - 'python/**'
 | 
				
			||||||
 | 
					      - '.github/workflows/python-style-check.yml'
 | 
				
			||||||
 | 
					  # schedule:
 | 
				
			||||||
 | 
					  #   - cron: '0 16 * * *'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# A workflow run is made up of one or more jobs that can run sequentially or in parallel
 | 
				
			||||||
 | 
					jobs:
 | 
				
			||||||
 | 
					  # This workflow contains a single job called "build"
 | 
				
			||||||
 | 
					  style-check:
 | 
				
			||||||
 | 
					    # The type of runner that the job will run on
 | 
				
			||||||
 | 
					    runs-on: ubuntu-latest
 | 
				
			||||||
 | 
					    strategy:
 | 
				
			||||||
 | 
					      matrix:
 | 
				
			||||||
 | 
					        python-version: [3.7]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Steps represent a sequence of tasks that will be executed as part of the job
 | 
				
			||||||
 | 
					    steps:
 | 
				
			||||||
 | 
					      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
 | 
				
			||||||
 | 
					      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # actions/checkout@v2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      - name: Set up Python ${{ matrix.python-version }}
 | 
				
			||||||
 | 
					        uses: actions/setup-python@v2
 | 
				
			||||||
 | 
					        with:
 | 
				
			||||||
 | 
					          python-version: ${{ matrix.python-version }}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      - name: Install dependencies
 | 
				
			||||||
 | 
					        run: pip install pycodestyle
 | 
				
			||||||
 | 
					             pip install pydocstyle
 | 
				
			||||||
 | 
					             pip install mypy==0.982
 | 
				
			||||||
 | 
					             pip install wheel
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      - name: LLM style checking
 | 
				
			||||||
 | 
					        run: bash python/llm/dev/test/lint-python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,10 +8,10 @@ You can download directly from Dockerhub like:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
# For standalone
 | 
					# For standalone
 | 
				
			||||||
docker pull intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
 | 
					docker pull intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# For k8s
 | 
					# For k8s
 | 
				
			||||||
docker pull intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.5.0-SNAPSHOT
 | 
					docker pull intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.1.0-SNAPSHOT
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Or build the image from source:
 | 
					Or build the image from source:
 | 
				
			||||||
| 
						 | 
					@ -24,7 +24,7 @@ export HTTPS_PROXY=your_https_proxy
 | 
				
			||||||
docker build \
 | 
					docker build \
 | 
				
			||||||
  --build-arg http_proxy=${HTTP_PROXY} \
 | 
					  --build-arg http_proxy=${HTTP_PROXY} \
 | 
				
			||||||
  --build-arg https_proxy=${HTTPS_PROXY} \
 | 
					  --build-arg https_proxy=${HTTPS_PROXY} \
 | 
				
			||||||
  -t intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT \
 | 
					  -t intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT \
 | 
				
			||||||
  -f ./Dockerfile .
 | 
					  -f ./Dockerfile .
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# For k8s
 | 
					# For k8s
 | 
				
			||||||
| 
						 | 
					@ -34,7 +34,7 @@ export HTTPS_PROXY=your_https_proxy
 | 
				
			||||||
docker build \
 | 
					docker build \
 | 
				
			||||||
  --build-arg http_proxy=${HTTP_PROXY} \
 | 
					  --build-arg http_proxy=${HTTP_PROXY} \
 | 
				
			||||||
  --build-arg https_proxy=${HTTPS_PROXY} \
 | 
					  --build-arg https_proxy=${HTTPS_PROXY} \
 | 
				
			||||||
  -t intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.5.0-SNAPSHOT \
 | 
					  -t intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.1.0-SNAPSHOT \
 | 
				
			||||||
  -f ./Dockerfile.k8s .
 | 
					  -f ./Dockerfile.k8s .
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -55,7 +55,7 @@ docker run -itd \
 | 
				
			||||||
   -e https_proxy=${HTTPS_PROXY} \
 | 
					   -e https_proxy=${HTTPS_PROXY} \
 | 
				
			||||||
   -v $BASE_MODE_PATH:/ipex_llm/model \
 | 
					   -v $BASE_MODE_PATH:/ipex_llm/model \
 | 
				
			||||||
   -v $DATA_PATH:/ipex_llm/data/alpaca-cleaned \
 | 
					   -v $DATA_PATH:/ipex_llm/data/alpaca-cleaned \
 | 
				
			||||||
   intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
 | 
					   intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The download and mount of base model and data to a docker container demonstrates a standard fine-tuning process. You can skip this step for a quick start, and in this way, the fine-tuning codes will automatically download the needed files:
 | 
					The download and mount of base model and data to a docker container demonstrates a standard fine-tuning process. You can skip this step for a quick start, and in this way, the fine-tuning codes will automatically download the needed files:
 | 
				
			||||||
| 
						 | 
					@ -69,7 +69,7 @@ docker run -itd \
 | 
				
			||||||
   --name=ipex-llm-fintune-qlora-cpu \
 | 
					   --name=ipex-llm-fintune-qlora-cpu \
 | 
				
			||||||
   -e http_proxy=${HTTP_PROXY} \
 | 
					   -e http_proxy=${HTTP_PROXY} \
 | 
				
			||||||
   -e https_proxy=${HTTPS_PROXY} \
 | 
					   -e https_proxy=${HTTPS_PROXY} \
 | 
				
			||||||
   intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
 | 
					   intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
However, we do recommend you to handle them manually, because the automatical download can be blocked by Internet access and Huggingface authentication etc. according to different environment, and the manual method allows you to fine-tune in a custom way (with different base model and dataset).
 | 
					However, we do recommend you to handle them manually, because the automatical download can be blocked by Internet access and Huggingface authentication etc. according to different environment, and the manual method allows you to fine-tune in a custom way (with different base model and dataset).
 | 
				
			||||||
| 
						 | 
					@ -130,7 +130,7 @@ docker run -itd \
 | 
				
			||||||
 -e WORKER_COUNT_DOCKER=your_worker_count \
 | 
					 -e WORKER_COUNT_DOCKER=your_worker_count \
 | 
				
			||||||
 -v your_downloaded_base_model_path:/ipex_llm/model \
 | 
					 -v your_downloaded_base_model_path:/ipex_llm/model \
 | 
				
			||||||
 -v your_downloaded_data_path:/ipex_llm/data/alpaca_data_cleaned_archive.json \
 | 
					 -v your_downloaded_data_path:/ipex_llm/data/alpaca_data_cleaned_archive.json \
 | 
				
			||||||
 intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
 | 
					 intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Note that `STANDALONE_DOCKER` is set to **TRUE** here.
 | 
					Note that `STANDALONE_DOCKER` is set to **TRUE** here.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,4 @@
 | 
				
			||||||
imageName: intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.5.0-SNAPSHOT
 | 
					imageName: intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.1.0-SNAPSHOT
 | 
				
			||||||
trainerNum: 2
 | 
					trainerNum: 2
 | 
				
			||||||
microBatchSize: 8
 | 
					microBatchSize: 8
 | 
				
			||||||
enableGradientCheckpoint: false # true will save more memory but increase latency
 | 
					enableGradientCheckpoint: false # true will save more memory but increase latency
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -21,7 +21,7 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 | 
				
			||||||
PYTHON_ROOT_DIR="$SCRIPT_DIR/.."
 | 
					PYTHON_ROOT_DIR="$SCRIPT_DIR/.."
 | 
				
			||||||
echo $PYTHON_ROOT_DIR
 | 
					echo $PYTHON_ROOT_DIR
 | 
				
			||||||
PATHS_TO_CHECK="$SCRIPT_DIR/../../src"
 | 
					PATHS_TO_CHECK="$SCRIPT_DIR/../../src"
 | 
				
			||||||
PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/bigdl/llm/langchain/*,$SCRIPT_DIR/../../src/bigdl/llm/transformers/gguf/models/model_implement/yuan2/*"
 | 
					PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/ipex_llm/langchain/*,$SCRIPT_DIR/../../src/ipex_llm/transformers/gguf/models/model_implement/yuan2/*"
 | 
				
			||||||
PEP8_REPORT_PATH="$PYTHON_ROOT_DIR/test/pep8-report.txt"
 | 
					PEP8_REPORT_PATH="$PYTHON_ROOT_DIR/test/pep8-report.txt"
 | 
				
			||||||
PYLINT_REPORT_PATH="$PYTHON_ROOT_DIR/test/pylint-report.txt"
 | 
					PYLINT_REPORT_PATH="$PYTHON_ROOT_DIR/test/pylint-report.txt"
 | 
				
			||||||
PYLINT_INSTALL_INFO="$PYTHON_ROOT_DIR/test/pylint-info.txt"
 | 
					PYLINT_INSTALL_INFO="$PYTHON_ROOT_DIR/test/pylint-info.txt"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -87,6 +87,8 @@ IQ1_S = ggml_tensor_qtype["gguf_iq1_s"]
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# Note this format cannot be used directly in IPEX-LLM's mm_int4, which expects
 | 
					# Note this format cannot be used directly in IPEX-LLM's mm_int4, which expects
 | 
				
			||||||
# row major but packing two consecutive columns.
 | 
					# row major but packing two consecutive columns.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def q4_0_xpu_transpose(ggml_weight, weight_shape):
 | 
					def q4_0_xpu_transpose(ggml_weight, weight_shape):
 | 
				
			||||||
    from ipex_llm.transformers.low_bit_linear import get_block_size
 | 
					    from ipex_llm.transformers.low_bit_linear import get_block_size
 | 
				
			||||||
    Q4_0 = get_block_size("sym_int4")
 | 
					    Q4_0 = get_block_size("sym_int4")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -227,8 +227,8 @@ def chatglm2_quantized_attention_forward_8eb45c(
 | 
				
			||||||
            key_layer = key_layer.transpose(0, 1)
 | 
					            key_layer = key_layer.transpose(0, 1)
 | 
				
			||||||
            query_layer_cur = query_layer[..., :rot_dim]
 | 
					            query_layer_cur = query_layer[..., :rot_dim]
 | 
				
			||||||
            key_layer_cur = key_layer[..., :rot_dim]
 | 
					            key_layer_cur = key_layer[..., :rot_dim]
 | 
				
			||||||
            # ipex_llm's apply_rotary_embedding can change the origin storage, so query_layer will get
 | 
					            # ipex_llm's apply_rotary_embedding can change the origin storage,
 | 
				
			||||||
            # the result directly.
 | 
					            # so query_layer will get the result directly.
 | 
				
			||||||
            torch.ops.torch_ipex.apply_rotary_embedding(query_layer_cur, sin, cos, query_layer_cur)
 | 
					            torch.ops.torch_ipex.apply_rotary_embedding(query_layer_cur, sin, cos, query_layer_cur)
 | 
				
			||||||
            torch.ops.torch_ipex.apply_rotary_embedding(key_layer_cur, sin, cos, key_layer_cur)
 | 
					            torch.ops.torch_ipex.apply_rotary_embedding(key_layer_cur, sin, cos, key_layer_cur)
 | 
				
			||||||
            query_layer = query_layer.transpose(0, 1)
 | 
					            query_layer = query_layer.transpose(0, 1)
 | 
				
			||||||
| 
						 | 
					@ -367,8 +367,8 @@ def chatglm2_attention_forward_8eb45c(
 | 
				
			||||||
            key_layer = key_layer.transpose(0, 1)
 | 
					            key_layer = key_layer.transpose(0, 1)
 | 
				
			||||||
            query_layer_cur = query_layer[..., :rot_dim]
 | 
					            query_layer_cur = query_layer[..., :rot_dim]
 | 
				
			||||||
            key_layer_cur = key_layer[..., :rot_dim]
 | 
					            key_layer_cur = key_layer[..., :rot_dim]
 | 
				
			||||||
            # ipex_llm's apply_rotary_embedding can change the origin storage, so query_layer will get
 | 
					            # ipex_llm's apply_rotary_embedding can change the origin storage,
 | 
				
			||||||
            # the result directly.
 | 
					            # so query_layer will get the result directly.
 | 
				
			||||||
            torch.ops.torch_ipex.apply_rotary_embedding(query_layer_cur, sin, cos, query_layer_cur)
 | 
					            torch.ops.torch_ipex.apply_rotary_embedding(query_layer_cur, sin, cos, query_layer_cur)
 | 
				
			||||||
            torch.ops.torch_ipex.apply_rotary_embedding(key_layer_cur, sin, cos, key_layer_cur)
 | 
					            torch.ops.torch_ipex.apply_rotary_embedding(key_layer_cur, sin, cos, key_layer_cur)
 | 
				
			||||||
            query_layer = query_layer.transpose(0, 1)
 | 
					            query_layer = query_layer.transpose(0, 1)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -91,7 +91,7 @@ def use_quantize_kv_cache(linear: torch.nn.Module, x: torch.Tensor) -> bool:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def kv_cache_device_check(x: torch.Tensor) -> bool:
 | 
					def kv_cache_device_check(x: torch.Tensor) -> bool:
 | 
				
			||||||
    return get_xpu_device_type(x) == "mtl" or \
 | 
					    return get_xpu_device_type(x) == "mtl" or \
 | 
				
			||||||
        ((get_xpu_device_type(x) == "arc" or get_xpu_device_type(x) == "flex") and \
 | 
					        ((get_xpu_device_type(x) == "arc" or get_xpu_device_type(x) == "flex") and
 | 
				
			||||||
            1 < x.size(0) and x.size(0) <= 8)
 | 
					            1 < x.size(0) and x.size(0) <= 8)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue