diff --git a/python/llm/src/ipex_llm/serving/fastchat/model_worker.py b/python/llm/src/ipex_llm/serving/fastchat/model_worker.py index c822769f..de8b78f6 100644 --- a/python/llm/src/ipex_llm/serving/fastchat/model_worker.py +++ b/python/llm/src/ipex_llm/serving/fastchat/model_worker.py @@ -470,7 +470,7 @@ if __name__ == "__main__": if args.gpus: invalidInputError(len(args.gpus.split(",")) > args.num_gpus, f"Larger --num-gpus " - "({args.num_gpus}) than --gpus {args.gpus}!") + f"({args.num_gpus}) than --gpus {args.gpus}!") os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus gptq_config = GptqConfig( diff --git a/python/llm/src/ipex_llm/transformers/model.py b/python/llm/src/ipex_llm/transformers/model.py index aa7d89c3..185c46b6 100644 --- a/python/llm/src/ipex_llm/transformers/model.py +++ b/python/llm/src/ipex_llm/transformers/model.py @@ -672,7 +672,7 @@ class _BaseAutoModelClass: else: invalidInputError(False, f'`torch_dtype` can be either `torch.dtype` or `"auto"`,' - 'but received {torch_dtype}') + f'but received {torch_dtype}') dtype_orig = model_class._set_default_torch_dtype(torch_dtype) # Pretrained Model diff --git a/python/llm/src/ipex_llm/transformers/npu_model.py b/python/llm/src/ipex_llm/transformers/npu_model.py index 725fff86..1b38bb2b 100644 --- a/python/llm/src/ipex_llm/transformers/npu_model.py +++ b/python/llm/src/ipex_llm/transformers/npu_model.py @@ -217,7 +217,7 @@ class _BaseAutoModelClass: max_prompt_len < max_context_len, ( f"max_prompt_len ({max_prompt_len}) should be less" - " than max_context_len ({max_context_len})" + f" than max_context_len ({max_context_len})" ), ) optimize_kwargs = { @@ -553,7 +553,7 @@ class _BaseAutoModelClass: invalidInputError( False, f'`torch_dtype` can be either `torch.dtype` or `"auto"`,' - "but received {torch_dtype}", + f"but received {torch_dtype}", ) dtype_orig = model_class._set_default_torch_dtype(torch_dtype) @@ -588,7 +588,7 @@ class _BaseAutoModelClass: max_prompt_len < max_context_len, ( f"max_prompt_len ({max_prompt_len}) should be less" - " than max_context_len ({max_context_len})" + f" than max_context_len ({max_context_len})" ), ) from ipex_llm.transformers.npu_models.convert_mp import optimize_llm_pre diff --git a/python/llm/src/ipex_llm/transformers/npu_models/phi3.py b/python/llm/src/ipex_llm/transformers/npu_models/phi3.py index 6889c9ee..beff5dee 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/phi3.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/phi3.py @@ -127,7 +127,7 @@ def phi3_attention_forward( invalidInputError( False, f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}," - " but is {attention_mask.size()}" + f" but is {attention_mask.size()}" ) attn_weights = attn_weights + attention_mask diff --git a/python/llm/src/ipex_llm/transformers/utils.py b/python/llm/src/ipex_llm/transformers/utils.py index 3f351440..bcdfc9ef 100644 --- a/python/llm/src/ipex_llm/transformers/utils.py +++ b/python/llm/src/ipex_llm/transformers/utils.py @@ -92,7 +92,7 @@ def load_state_dict(checkpoint_file: Union[str, os.PathLike]): except Exception as e: invalidInputError(False, f"Unable to load weights" - "from pytorch checkpoint file for '{checkpoint_file}' " + f"from pytorch checkpoint file for '{checkpoint_file}' " f"at '{checkpoint_file}'. ") diff --git a/python/llm/src/ipex_llm/utils/lazy_load_torch.py b/python/llm/src/ipex_llm/utils/lazy_load_torch.py index 5de30b94..b0cfed42 100644 --- a/python/llm/src/ipex_llm/utils/lazy_load_torch.py +++ b/python/llm/src/ipex_llm/utils/lazy_load_torch.py @@ -112,7 +112,7 @@ def _load(pickle_fp, map_location, picklemoudle, pickle_file='data.pkl', zip_fil data = fp.read(size) return torch.frombuffer(bytearray(data), dtype=dtype) description = f'storage data_type={data_type} ' \ - 'path-in-zip={filename} path={self.zip_file.filename}' + f'path-in-zip={filename} path={self.zip_file.filename}' return LazyStorage(load=load, kind=pid[1], description=description) @staticmethod