diff --git a/python/llm/src/ipex_llm/serving/fastchat/model_worker.py b/python/llm/src/ipex_llm/serving/fastchat/model_worker.py
index c822769f..de8b78f6 100644
--- a/python/llm/src/ipex_llm/serving/fastchat/model_worker.py
+++ b/python/llm/src/ipex_llm/serving/fastchat/model_worker.py
@@ -470,7 +470,7 @@ if __name__ == "__main__":
 
     if args.gpus:
         invalidInputError(len(args.gpus.split(",")) > args.num_gpus, f"Larger --num-gpus "
-                          "({args.num_gpus}) than --gpus {args.gpus}!")
+                          f"({args.num_gpus}) than --gpus {args.gpus}!")
         os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
 
     gptq_config = GptqConfig(
diff --git a/python/llm/src/ipex_llm/transformers/model.py b/python/llm/src/ipex_llm/transformers/model.py
index aa7d89c3..185c46b6 100644
--- a/python/llm/src/ipex_llm/transformers/model.py
+++ b/python/llm/src/ipex_llm/transformers/model.py
@@ -672,7 +672,7 @@ class _BaseAutoModelClass:
                 else:
                     invalidInputError(False,
                                       f'`torch_dtype` can be either `torch.dtype` or `"auto"`,'
-                                      'but received {torch_dtype}')
+                                      f'but received {torch_dtype}')
             dtype_orig = model_class._set_default_torch_dtype(torch_dtype)
 
         # Pretrained Model
diff --git a/python/llm/src/ipex_llm/transformers/npu_model.py b/python/llm/src/ipex_llm/transformers/npu_model.py
index 725fff86..1b38bb2b 100644
--- a/python/llm/src/ipex_llm/transformers/npu_model.py
+++ b/python/llm/src/ipex_llm/transformers/npu_model.py
@@ -217,7 +217,7 @@ class _BaseAutoModelClass:
                     max_prompt_len < max_context_len,
                     (
                         f"max_prompt_len ({max_prompt_len}) should be less"
-                        " than max_context_len ({max_context_len})"
+                        f" than max_context_len ({max_context_len})"
                     ),
                 )
                 optimize_kwargs = {
@@ -553,7 +553,7 @@ class _BaseAutoModelClass:
                     invalidInputError(
                         False,
                         f'`torch_dtype` can be either `torch.dtype` or `"auto"`,'
-                        "but received {torch_dtype}",
+                        f"but received {torch_dtype}",
                     )
             dtype_orig = model_class._set_default_torch_dtype(torch_dtype)
 
@@ -588,7 +588,7 @@ class _BaseAutoModelClass:
                 max_prompt_len < max_context_len,
                 (
                     f"max_prompt_len ({max_prompt_len}) should be less"
-                    " than max_context_len ({max_context_len})"
+                    f" than max_context_len ({max_context_len})"
                 ),
             )
             from ipex_llm.transformers.npu_models.convert_mp import optimize_llm_pre
diff --git a/python/llm/src/ipex_llm/transformers/npu_models/phi3.py b/python/llm/src/ipex_llm/transformers/npu_models/phi3.py
index 6889c9ee..beff5dee 100644
--- a/python/llm/src/ipex_llm/transformers/npu_models/phi3.py
+++ b/python/llm/src/ipex_llm/transformers/npu_models/phi3.py
@@ -127,7 +127,7 @@ def phi3_attention_forward(
                 invalidInputError(
                     False,
                     f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)},"
-                    " but is {attention_mask.size()}"
+                    f" but is {attention_mask.size()}"
                 )
             attn_weights = attn_weights + attention_mask
 
diff --git a/python/llm/src/ipex_llm/transformers/utils.py b/python/llm/src/ipex_llm/transformers/utils.py
index 3f351440..bcdfc9ef 100644
--- a/python/llm/src/ipex_llm/transformers/utils.py
+++ b/python/llm/src/ipex_llm/transformers/utils.py
@@ -92,7 +92,7 @@ def load_state_dict(checkpoint_file: Union[str, os.PathLike]):
     except Exception as e:
         invalidInputError(False,
                           f"Unable to load weights"
-                          "from pytorch checkpoint file for '{checkpoint_file}' "
+                          f"from pytorch checkpoint file for '{checkpoint_file}' "
                           f"at '{checkpoint_file}'. ")
 
 
diff --git a/python/llm/src/ipex_llm/utils/lazy_load_torch.py b/python/llm/src/ipex_llm/utils/lazy_load_torch.py
index 5de30b94..b0cfed42 100644
--- a/python/llm/src/ipex_llm/utils/lazy_load_torch.py
+++ b/python/llm/src/ipex_llm/utils/lazy_load_torch.py
@@ -112,7 +112,7 @@ def _load(pickle_fp, map_location, picklemoudle, pickle_file='data.pkl', zip_fil
                 data = fp.read(size)
                 return torch.frombuffer(bytearray(data), dtype=dtype)
             description = f'storage data_type={data_type} ' \
-                          'path-in-zip={filename} path={self.zip_file.filename}'
+                          f'path-in-zip={filename} path={self.zip_file.filename}'
             return LazyStorage(load=load, kind=pid[1], description=description)
 
         @staticmethod