[LLM] Add LLM pep8 coding style checking (#8233)

* add LLM pep8 coding checking

* resolve bugs in testing scripts and code style revision
This commit is contained in:
Pingchuan Ma (Henry) 2023-05-30 15:58:14 +08:00 committed by GitHub
parent 4638b85f3e
commit 1f913a6941
3 changed files with 2242 additions and 9 deletions

72
python/llm/dev/test/lint-python Executable file
View file

@ -0,0 +1,72 @@
#!/usr/bin/env bash
#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Originally from Spark
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
PYTHON_ROOT_DIR="$SCRIPT_DIR/.."
echo $PYTHON_ROOT_DIR
PATHS_TO_CHECK="$SCRIPT_DIR/../../src"
PEP8_REPORT_PATH="$PYTHON_ROOT_DIR/test/pep8-report.txt"
PYLINT_REPORT_PATH="$PYTHON_ROOT_DIR/test/pylint-report.txt"
PYLINT_INSTALL_INFO="$PYTHON_ROOT_DIR/test/pylint-info.txt"
SPHINXBUILD=${SPHINXBUILD:=sphinx-build}
SPHINX_REPORT_PATH="$PYTHON_ROOT_DIR/test/sphinx-report.txt"
cd "$PYTHON_ROOT_DIR"
# compileall: https://docs.python.org/2/library/compileall.html
python -B -m compileall -q -l $PATHS_TO_CHECK > "$PEP8_REPORT_PATH"
compile_status="${PIPESTATUS[0]}"
PEP8_VERSION="1.7.0"
PEP8_SCRIPT_PATH="$PYTHON_ROOT_DIR/test/pep8-$PEP8_VERSION.py"
PEP8_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/jcrocholl/pep8/$PEP8_VERSION/pep8.py"
echo "PEP8_SCRIPT_PATH" "$PEP8_SCRIPT_PATH"
# Easy install pylint in /dev/pylint. To easy_install into a directory, the PYTHONPATH should
# be set to the directory.
# dev/pylint should be appended to the PATH variable as well.
# Jenkins by default installs the pylint3 version, so for now this just checks the code quality
# of python3.
export "PYTHONPATH=$PYTHON_ROOT_DIR/pylint"
export "PYLINT_HOME=$PYTHONPATH"
export "PATH=$PYTHONPATH:$PATH"
# There is no need to write this output to a file
#+ first, but we do so so that the check status can
#+ be output before the report, like with the
#+ scalastyle and RAT checks.
python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 --exclude=__init__.py,log4Error.py --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH"
pep8_status="${PIPESTATUS[0]}"
if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then
lint_status=0
else
lint_status=1
fi
if [ "$lint_status" -ne 0 ]; then
echo "PEP8 checks failed."
cat "$PEP8_REPORT_PATH"
rm "$PEP8_REPORT_PATH"
exit "$lint_status"
else
echo "PEP8 checks passed."
rm "$PEP8_REPORT_PATH"
fi

File diff suppressed because it is too large Load diff

View file

@ -38,15 +38,17 @@ _quantize_type = {"llama": _llama_quantize_type,
"bloomz": _bloomz_quantize_type, "bloomz": _bloomz_quantize_type,
"gptneox": _gptneox_quantize_type} "gptneox": _gptneox_quantize_type}
_valid_types = set(list(_llama_quantize_type.keys()) + list(_bloomz_quantize_type.keys()) + list(_gptneox_quantize_type.keys())) _valid_types = set(list(_llama_quantize_type.keys()) + list(_bloomz_quantize_type.keys()) +
list(_gptneox_quantize_type.keys()))
def quantize(input_path: str, output_path: str=None, model_family: str = 'llama', dtype: str='q4_0'): def quantize(input_path: str, output_path: str=None,
model_family: str = 'llama', dtype: str='q4_0'):
""" """
Quantize ggml file to lower precision. Quantize ggml file to lower precision.
:param input_path: Path of input ggml file, for example `./ggml-model-f16.bin`. :param input_path: Path of input ggml file, for example `./ggml-model-f16.bin`.
:param output_path: Save path of output quantized model. Default to `None`. :param output_path: Save path of output quantized model. Default to `None`.
If you don't specify this parameter, quantized model will be saved in If you don't specify this parameter, quantized model will be saved in
the same directory as the input and just replace precision with quantize_type the same directory as the input and just replace precision with quantize_type
like `./ggml-model-q4_0.bin`. like `./ggml-model-q4_0.bin`.
@ -60,7 +62,8 @@ def quantize(input_path: str, output_path: str=None, model_family: str = 'llama'
gptneox : "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0" gptneox : "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0"
""" """
invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'], invalidInputError(model_family in ['llama', 'bloomz', 'gptneox'],
"Now we only support quantization of model family('llama', 'bloomz', 'gptneox')", "Now we only support quantization of model \
family('llama', 'bloomz', 'gptneox')",
"{} is not in the list.".format(model_family)) "{} is not in the list.".format(model_family))
invalidInputError(os.path.isfile(input_path), invalidInputError(os.path.isfile(input_path),
"The file {} was not found".format(input_path)) "The file {} was not found".format(input_path))
@ -69,11 +72,11 @@ def quantize(input_path: str, output_path: str=None, model_family: str = 'llama'
output_path = input_path.replace("f16", dtype) output_path = input_path.replace("f16", dtype)
# convert quantize type str into corresponding int value # convert quantize type str into corresponding int value
quantize_type_map = _quantize_type[model_family] quantize_type_map = _quantize_type[model_family]
invalidInputError(dtype in quantize_type_map, invalidInputError(dtype in quantize_type_map, "{0} model just accept {1} now, \
"{0} model just accept {1} now, but you pass in {2}.".format( but you pass in {2}.".format(
model_family, model_family,
list(quantize_type_map.keys()), list(quantize_type_map.keys()),
dtype)) dtype))
quantize_type = quantize_type_map[dtype] quantize_type = quantize_type_map[dtype]
quantize_args = "{0}/bin/quantize-{1} {2} {3} {4}".format(bin_dirname, quantize_args = "{0}/bin/quantize-{1} {2} {3} {4}".format(bin_dirname,
model_family, model_family,