diff --git a/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py b/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py
index e9a84a94..979e83a0 100644
--- a/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py
+++ b/python/llm/src/bigdl/llm/ggml/model/llama/llama_cpp.py
@@ -80,6 +80,10 @@ def _load_shared_library(lib_base_name: str):
     cdll_args = dict()  # type: ignore
     # Add the library directory to the DLL search path on Windows (if needed)
     if sys.platform == "win32" and sys.version_info >= (3, 8):
+        # On windows, pytorch and our native library use different OMP, we should
+        # set OMP_WAIT_POLICY=PASSIVE to avoid OMP waiting.
+        os.environ["OMP_WAIT_POLICY"] = "PASSIVE"
+
         os.add_dll_directory(str(_base_path))
         os.environ["PATH"] = str(_base_path) + ";" + os.environ["PATH"]
         if "CUDA_PATH" in os.environ: