enable inference mode for deepspeed tp serving (#11742)
This commit is contained in:
parent
9e65cf00b3
commit
107f7aafd0
1 changed files with 3 additions and 1 deletions
|
|
@ -121,6 +121,8 @@ def load_model(model_path, low_bit):
|
|||
current_accel = XPU_Accelerator()
|
||||
set_accelerator(current_accel)
|
||||
|
||||
model=model.eval()
|
||||
|
||||
# Move model back to xpu
|
||||
model = model.to(f"xpu:{local_rank}")
|
||||
model = BenchmarkWrapper(model)
|
||||
|
|
|
|||
Loading…
Reference in a new issue