add python style check (#10620)

* add python style check * fix style checks * update runner * add ipex-llm-finetune-qlora-cpu-k8s to manually_build workflow * update tag to 2.1.0-SNAPSHOT
2024-04-02 16:17:56 +08:00 · 2024-04-02 16:17:56 +08:00 · a10f5a1b8d
commit a10f5a1b8d
parent 58b57177e3
13 changed files with 120 additions and 33 deletions
--- a/.github/workflows/manually_build.yml
+++ b/.github/workflows/manually_build.yml
@ -10,13 +10,14 @@ on:
        type: choice
        options:
        - all
        - ipex-llm-finetune-lora-cpu
        - ipex-llm-finetune-qlora-cpu
        - ipex-llm-finetune-qlora-xpu
        - ipex-llm-xpu
        - ipex-llm-cpu
-        - ipex-llm-serving-xpu
+        - ipex-llm-xpu
        - ipex-llm-serving-cpu
        - ipex-llm-serving-xpu
        - ipex-llm-finetune-lora-cpu
        - ipex-llm-finetune-qlora-cpu-standalone
        - ipex-llm-finetune-qlora-cpu-k8s
        - ipex-llm-finetune-qlora-xpu
      tag:
        description: 'docker image tag (e.g. 2.1.0-SNAPSHOT)'
        required: true
@ -72,8 +73,8 @@ jobs:
        sudo docker push ${image}:latest
        sudo docker rmi -f ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} ${image}:latest
-  ipex-llm-finetune-qlora-cpu:
+  ipex-llm-finetune-qlora-cpu-standalone:
-    if: ${{ inputs.artifact == 'ipex-llm-finetune-qlora-cpu' || inputs.artifact == 'all' }}
+    if: ${{ inputs.artifact == 'ipex-llm-finetune-qlora-cpu-standalone' || inputs.artifact == 'all' }}
    runs-on: [self-hosted, Shire]
    steps:
@ -81,12 +82,12 @@ jobs:
      - name: docker login
        run: |
          docker login -u ${DOCKERHUB_USERNAME} -p ${DOCKERHUB_PASSWORD}
-      - name: ipex-llm-finetune-qlora-cpu
+      - name: ipex-llm-finetune-qlora-cpu-standalone
        run: |
          echo "##############################################################"
-          echo "####### ipex-llm-finetune-qlora-cpu ########"
+          echo "####### ipex-llm-finetune-qlora-cpu-standalone ########"
          echo "##############################################################"
-          export image=intelanalytics/ipex-llm-finetune-qlora-cpu
+          export image=intelanalytics/ipex-llm-finetune-qlora-cpu-standalone
          cd docker/llm/finetune/qlora/cpu/docker
          sudo docker build \
            --no-cache=true \
@ -102,6 +103,36 @@ jobs:
          sudo docker push ${image}:latest
          sudo docker rmi -f ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} ${image}:latest
  ipex-llm-finetune-qlora-cpu-k8s:
    if: ${{ inputs.artifact == 'ipex-llm-finetune-qlora-cpu-k8s' || inputs.artifact == 'all' }}
    runs-on: [self-hosted, Shire]
    steps:
      - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # actions/checkout@v3
      - name: docker login
        run: |
          docker login -u ${DOCKERHUB_USERNAME} -p ${DOCKERHUB_PASSWORD}
      - name: ipex-llm-finetune-qlora-cpu-k8s
        run: |
          echo "##############################################################"
          echo "####### ipex-llm-finetune-qlora-cpu-k8s ########"
          echo "##############################################################"
          export image=intelanalytics/ipex-llm-finetune-qlora-cpu-k8s
          cd docker/llm/finetune/qlora/cpu/docker
          sudo docker build \
            --no-cache=true \
            --build-arg http_proxy=${HTTP_PROXY} \
            --build-arg https_proxy=${HTTPS_PROXY} \
            --build-arg no_proxy=${NO_PROXY} \
            -t ${image}:${TAG} -f ./Dockerfile.k8s .
          sudo docker push ${image}:${TAG}
          sudo docker tag ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG}
          sudo docker push 10.239.45.10/arda/${image}:${TAG}
          # tag 'latest'
          sudo docker tag ${image}:${TAG} ${image}:latest
          sudo docker push ${image}:latest
          sudo docker rmi -f ${image}:${TAG} 10.239.45.10/arda/${image}:${TAG} ${image}:latest
  ipex-llm-finetune-qlora-xpu:
    if: ${{ inputs.artifact == 'ipex-llm-finetune-qlora-xpu' || inputs.artifact == 'all' }}
    runs-on: [self-hosted, Shire]
--- a/.github/workflows/python-style-check.yml
+++ b/.github/workflows/python-style-check.yml
@ -0,0 +1,54 @@
 name: Python Style Check
 # Cancel previous runs in the PR when you push new commits
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
  cancel-in-progress: true
 permissions:
  contents: read
 # Controls when the action will run.
 on:
  push:
    branches: [ main ]
    paths:
      - 'python/**'
      - '.github/workflows/python-style-check.yml'
  pull_request:
    branches: [ main ]
    paths:
      - 'python/**'
      - '.github/workflows/python-style-check.yml'
  # schedule:
  #   - cron: '0 16 * * *'
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
  # This workflow contains a single job called "build"
  style-check:
    # The type of runner that the job will run on
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: [3.7]
    # Steps represent a sequence of tasks that will be executed as part of the job
    steps:
      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # actions/checkout@v2
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        run: pip install pycodestyle
             pip install pydocstyle
             pip install mypy==0.982
             pip install wheel
      - name: LLM style checking
        run: bash python/llm/dev/test/lint-python
--- a/docker/llm/finetune/qlora/cpu/docker/README.md
+++ b/docker/llm/finetune/qlora/cpu/docker/README.md
@ -8,10 +8,10 @@ You can download directly from Dockerhub like:
 ```bash
 # For standalone
-docker pull intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
+docker pull intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT
 # For k8s
-docker pull intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.5.0-SNAPSHOT
+docker pull intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.1.0-SNAPSHOT
 ```
 Or build the image from source:
@ -24,7 +24,7 @@ export HTTPS_PROXY=your_https_proxy
 docker build \
  --build-arg http_proxy=${HTTP_PROXY} \
  --build-arg https_proxy=${HTTPS_PROXY} \
-  -t intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT \
+  -t intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT \
  -f ./Dockerfile .
 # For k8s
@ -34,7 +34,7 @@ export HTTPS_PROXY=your_https_proxy
 docker build \
  --build-arg http_proxy=${HTTP_PROXY} \
  --build-arg https_proxy=${HTTPS_PROXY} \
-  -t intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.5.0-SNAPSHOT \
+  -t intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.1.0-SNAPSHOT \
  -f ./Dockerfile.k8s .
 ```
@ -55,7 +55,7 @@ docker run -itd \
   -e https_proxy=${HTTPS_PROXY} \
   -v $BASE_MODE_PATH:/ipex_llm/model \
   -v $DATA_PATH:/ipex_llm/data/alpaca-cleaned \
-   intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
+   intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT
 ```
 The download and mount of base model and data to a docker container demonstrates a standard fine-tuning process. You can skip this step for a quick start, and in this way, the fine-tuning codes will automatically download the needed files:
@ -69,7 +69,7 @@ docker run -itd \
   --name=ipex-llm-fintune-qlora-cpu \
   -e http_proxy=${HTTP_PROXY} \
   -e https_proxy=${HTTPS_PROXY} \
-   intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
+   intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT
 ```
 However, we do recommend you to handle them manually, because the automatical download can be blocked by Internet access and Huggingface authentication etc. according to different environment, and the manual method allows you to fine-tune in a custom way (with different base model and dataset).
@ -130,7 +130,7 @@ docker run -itd \
 -e WORKER_COUNT_DOCKER=your_worker_count \
 -v your_downloaded_base_model_path:/ipex_llm/model \
 -v your_downloaded_data_path:/ipex_llm/data/alpaca_data_cleaned_archive.json \
- intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.5.0-SNAPSHOT
+ intelanalytics/ipex-llm-finetune-qlora-cpu-standalone:2.1.0-SNAPSHOT
 ```
 Note that `STANDALONE_DOCKER` is set to **TRUE** here.
--- a/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml
+++ b/docker/llm/finetune/qlora/cpu/kubernetes/values.yaml
@ -1,4 +1,4 @@
-imageName: intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.5.0-SNAPSHOT
+imageName: intelanalytics/ipex-llm-finetune-qlora-cpu-k8s:2.1.0-SNAPSHOT
 trainerNum: 2
 microBatchSize: 8
 enableGradientCheckpoint: false # true will save more memory but increase latency
--- a/python/llm/dev/test/lint-python
+++ b/python/llm/dev/test/lint-python
@ -21,7 +21,7 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 PYTHON_ROOT_DIR="$SCRIPT_DIR/.."
 echo $PYTHON_ROOT_DIR
 PATHS_TO_CHECK="$SCRIPT_DIR/../../src"
-PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/bigdl/llm/langchain/*,$SCRIPT_DIR/../../src/bigdl/llm/transformers/gguf/models/model_implement/yuan2/*"
+PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/ipex_llm/langchain/*,$SCRIPT_DIR/../../src/ipex_llm/transformers/gguf/models/model_implement/yuan2/*"
 PEP8_REPORT_PATH="$PYTHON_ROOT_DIR/test/pep8-report.txt"
 PYLINT_REPORT_PATH="$PYTHON_ROOT_DIR/test/pylint-report.txt"
 PYLINT_INSTALL_INFO="$PYTHON_ROOT_DIR/test/pylint-info.txt"
--- a/python/llm/src/ipex_llm/transformers/low_bit_linear.py
+++ b/python/llm/src/ipex_llm/transformers/low_bit_linear.py
@ -87,6 +87,8 @@ IQ1_S = ggml_tensor_qtype["gguf_iq1_s"]
 #
 # Note this format cannot be used directly in IPEX-LLM's mm_int4, which expects
 # row major but packing two consecutive columns.
 def q4_0_xpu_transpose(ggml_weight, weight_shape):
    from ipex_llm.transformers.low_bit_linear import get_block_size
    Q4_0 = get_block_size("sym_int4")
--- a/python/llm/src/ipex_llm/transformers/model.py
+++ b/python/llm/src/ipex_llm/transformers/model.py
@ -312,7 +312,7 @@ class _BaseAutoModelClass:
            cpu_embedding = kwargs.get("cpu_embedding", False)
            # for 2bit, default use embedding_quantization
            if q_k in ["gguf_iq2_xxs", "gguf_iq2_xs", "gguf_iq1_s", "q2_k"] and \
-                not cpu_embedding and embedding_qtype is None:
+               not cpu_embedding and embedding_qtype is None:
                embedding_qtype = "q2_k"
            if imatrix_file is not None:
                imatrix_data = load_imatrix_data(imatrix_file)
--- a/python/llm/src/ipex_llm/transformers/models/chatglm2.py
+++ b/python/llm/src/ipex_llm/transformers/models/chatglm2.py
@ -227,8 +227,8 @@ def chatglm2_quantized_attention_forward_8eb45c(
            key_layer = key_layer.transpose(0, 1)
            query_layer_cur = query_layer[..., :rot_dim]
            key_layer_cur = key_layer[..., :rot_dim]
-            # ipex_llm's apply_rotary_embedding can change the origin storage, so query_layer will get
+            # ipex_llm's apply_rotary_embedding can change the origin storage,
-            # the result directly.
+            # so query_layer will get the result directly.
            torch.ops.torch_ipex.apply_rotary_embedding(query_layer_cur, sin, cos, query_layer_cur)
            torch.ops.torch_ipex.apply_rotary_embedding(key_layer_cur, sin, cos, key_layer_cur)
            query_layer = query_layer.transpose(0, 1)
@ -367,8 +367,8 @@ def chatglm2_attention_forward_8eb45c(
            key_layer = key_layer.transpose(0, 1)
            query_layer_cur = query_layer[..., :rot_dim]
            key_layer_cur = key_layer[..., :rot_dim]
-            # ipex_llm's apply_rotary_embedding can change the origin storage, so query_layer will get
+            # ipex_llm's apply_rotary_embedding can change the origin storage,
-            # the result directly.
+            # so query_layer will get the result directly.
            torch.ops.torch_ipex.apply_rotary_embedding(query_layer_cur, sin, cos, query_layer_cur)
            torch.ops.torch_ipex.apply_rotary_embedding(key_layer_cur, sin, cos, key_layer_cur)
            query_layer = query_layer.transpose(0, 1)
--- a/python/llm/src/ipex_llm/transformers/models/llama.py
+++ b/python/llm/src/ipex_llm/transformers/models/llama.py
@ -64,7 +64,7 @@ from transformers import logging
 logger = logging.get_logger(__name__)
-def llama_decoding_fast_path_qtype_check(proj): 
+def llama_decoding_fast_path_qtype_check(proj):
    # IQ2_XXS only can be used in Llama-like model
    qtype = getattr(proj, "qtype", None)
    return qtype in [SYM_INT4, FP8E5, IQ2_XXS, FP4]
--- a/python/llm/src/ipex_llm/transformers/models/qwen.py
+++ b/python/llm/src/ipex_llm/transformers/models/qwen.py
@ -136,7 +136,7 @@ def qwen_attention_forward_original(
    device = hidden_states.device
    # for flash attention
    original_dtype = hidden_states.dtype
-    position_ids = rotary_pos_emb_list[-1] # the last one is posisiton_ids
+    position_ids = rotary_pos_emb_list[-1]  # the last one is posisiton_ids
    rotary_pos_emb_list = rotary_pos_emb_list[:-1]
    use_fuse_rope = should_use_fuse_rope(self, hidden_states)
@ -332,7 +332,7 @@ def qwen_attention_forward_quantized(
    bsz, q_len, _ = hidden_states.size()
    device = hidden_states.device
-    position_ids = rotary_pos_emb_list[-1] # the last one is posisiton_ids
+    position_ids = rotary_pos_emb_list[-1]  # the last one is posisiton_ids
    rotary_pos_emb_list = rotary_pos_emb_list[:-1]
    use_fuse_rope = should_use_fuse_rope(self, hidden_states)
--- a/python/llm/src/ipex_llm/transformers/models/utils.py
+++ b/python/llm/src/ipex_llm/transformers/models/utils.py
@ -29,7 +29,7 @@ SILU = 0
 GELU = 1
-def decoding_fast_path_qtype_check(proj): 
+def decoding_fast_path_qtype_check(proj):
    qtype = getattr(proj, "qtype", None)
    return qtype in [SYM_INT4, FP8E5, FP4]
@ -91,7 +91,7 @@ def use_quantize_kv_cache(linear: torch.nn.Module, x: torch.Tensor) -> bool:
 def kv_cache_device_check(x: torch.Tensor) -> bool:
    return get_xpu_device_type(x) == "mtl" or \
-        ((get_xpu_device_type(x) == "arc" or get_xpu_device_type(x) == "flex") and \
+        ((get_xpu_device_type(x) == "arc" or get_xpu_device_type(x) == "flex") and
            1 < x.size(0) and x.size(0) <= 8)
@ -330,8 +330,8 @@ def use_esimd_sdp(q_len, k_len, head_dim, query_states, attention_mask=None):
    device_name = torch.xpu.get_device_name(query_states.device.index)
    if device_name.startswith("Intel(R) Arc(TM) A") or \
-              device_name.startswith("Intel(R) Data Center GPU Flex") or \
+       device_name.startswith("Intel(R) Data Center GPU Flex") or \
-              device_name.startswith("Intel(R) Data Center GPU Max"):
+       device_name.startswith("Intel(R) Data Center GPU Max"):
        import linear_fp16_esimd
        if not hasattr(linear_fp16_esimd, "sdp_forward"):
            return False
--- a/python/llm/src/ipex_llm/vllm/core/scheduler.py
+++ b/python/llm/src/ipex_llm/vllm/core/scheduler.py
@ -43,7 +43,7 @@ from ipex_llm.vllm.core.policy import PolicyFactory
 from ipex_llm.vllm.logger import init_logger
 from ipex_llm.vllm.sequence import SequenceData, SequenceStatus
 from ipex_llm.vllm.sequence import (Sequence, SequenceGroup,
-                                     SequenceGroupMetadata)
+                                    SequenceGroupMetadata)
 from ipex_llm.utils.common import invalidInputError
 logger = init_logger(__name__)
--- a/python/llm/src/ipex_llm/vllm/model_executor/layers/bigdl_sampler.py
+++ b/python/llm/src/ipex_llm/vllm/model_executor/layers/bigdl_sampler.py
@ -43,7 +43,7 @@ from ipex_llm.vllm.model_executor.input_metadata import InputMetadata
 from ipex_llm.vllm.sampling_params import SamplingParams, SamplingType
 from ipex_llm.vllm.sequence import (SamplerOutput, SequenceGroupMetadata,
-                                     SequenceData, SequenceOutputs)
+                                    SequenceData, SequenceOutputs)
 import time