From 19183ef4762e9566b804d7bf7027540fcf33ab09 Mon Sep 17 00:00:00 2001
From: SONG Ge <38711238+sgwhat@users.noreply.github.com>
Date: Thu, 1 Feb 2024 15:45:39 +0800
Subject: [PATCH] [WebUI] Reset bigdl-llm loader options with default value
 (#10064)

* reset bigdl-llm loader options with default value

* remove options which maybe complex for naive users
---
 .../Text-Generation-WebUI/modules/loaders.py       |  4 ++--
 .../Text-Generation-WebUI/modules/models.py        |  4 ++--
 .../Text-Generation-WebUI/modules/shared.py        | 14 +++++++-------
 .../example/Text-Generation-WebUI/modules/ui.py    |  4 ++--
 .../Text-Generation-WebUI/modules/ui_model_menu.py |  4 ++--
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/python/llm/example/Text-Generation-WebUI/modules/loaders.py b/python/llm/example/Text-Generation-WebUI/modules/loaders.py
index 3c0048ca..b32d9cae 100644
--- a/python/llm/example/Text-Generation-WebUI/modules/loaders.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/loaders.py
@@ -133,9 +133,9 @@ loaders_and_params = OrderedDict({
         'load_in_4bit',
         'load_in_low_bit',
         'optimize_model',
-        'modules_to_not_convert',
+        #'modules_to_not_convert',
         'cpu_embedding',
-        'lightweight_bmm',
+        #'lightweight_bmm',
         'trust_remote_code',
         'use_cache',
     ],
diff --git a/python/llm/example/Text-Generation-WebUI/modules/models.py b/python/llm/example/Text-Generation-WebUI/modules/models.py
index 33843779..60a16b9e 100644
--- a/python/llm/example/Text-Generation-WebUI/modules/models.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/models.py
@@ -362,9 +362,9 @@ def bigdl_llm_loader(model_name):
                 load_in_4bit=shared.args.load_in_4bit,
                 load_in_low_bit=shared.args.load_in_low_bit,
                 optimize_model=shared.args.optimize_model,
-                modules_to_not_convert=shared.args.modules_to_not_convert,
+                #modules_to_not_convert=shared.args.modules_to_not_convert,
                 cpu_embedding=shared.args.cpu_embedding,
-                lightweight_bmm=shared.args.lightweight_bmm,
+                #lightweight_bmm=shared.args.lightweight_bmm,
                 trust_remote_code=shared.args.trust_remote_code,
                 use_cache=shared.args.use_cache,
                 )
diff --git a/python/llm/example/Text-Generation-WebUI/modules/shared.py b/python/llm/example/Text-Generation-WebUI/modules/shared.py
index ce569319..53819b41 100644
--- a/python/llm/example/Text-Generation-WebUI/modules/shared.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/shared.py
@@ -176,18 +176,18 @@ group.add_argument('--monkey-patch', action='store_true', help='Apply the monkey
 
 # BigDL-LLM
 group = parser.add_argument_group('BigDL-LLM')
-group.add_argument('--device', type=str, default='cpu', help='the device type, it could be CPU or GPU')
+group.add_argument('--device', type=str, default='GPU', help='the device type, it could be CPU or GPU')
 group.add_argument('--load-in-4bit', action='store_true', default=False, help='boolean value, True means loading linear’s weight to symmetric int 4 if'\
                    'the model is a regular fp16/bf16/fp32 model, and to asymmetric int 4 if the model is GPTQ model.Default to be False')
 group.add_argument('--load-in-low-bit', type=str, default=None, help='str value, options are sym_int4, asym_int4, sym_int5, asym_int5'\
                    ', sym_int8, nf3, nf4, fp4, fp8, fp8_e4m3, fp8_e5m2, fp16 or bf16. sym_int4 means symmetric int 4, asym_int4 means asymmetric int 4,'\
                    'nf4 means 4-bit NormalFloat, etc. Relevant low bit optimizations will be applied to the model.')
-group.add_argument('--optimize-model', action='store_true', help='boolean value, Whether to further optimize the low_bit llm model.')
-group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.')
-group.add_argument('--cpu-embedding', action='store_true', help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`')
-group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.')
-group.add_argument('--use-cache', action='store_true', help='If use_cache is True, past key values are used to speed up decoding if applicable to model.')
-group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
+group.add_argument('--optimize-model', action='store_true', default=True, help='boolean value, Whether to further optimize the low_bit llm model.')
+#group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.')
+group.add_argument('--cpu-embedding', action='store_true', default=True, help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`')
+#group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.')
+group.add_argument('--use-cache', action='store_true', default=True, help='If use_cache is True, past key values are used to speed up decoding if applicable to model.')
+group.add_argument('--trust-remote-code', action='store_true', default=True, help='Set trust_remote_code=True while loading the model. Necessary for some models.')
 
 # HQQ
 group = parser.add_argument_group('HQQ')
diff --git a/python/llm/example/Text-Generation-WebUI/modules/ui.py b/python/llm/example/Text-Generation-WebUI/modules/ui.py
index 37b182d9..fa63d9f9 100644
--- a/python/llm/example/Text-Generation-WebUI/modules/ui.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/ui.py
@@ -80,9 +80,9 @@ def list_model_elements():
         'load_in_4bit',
         'load_in_low_bit',
         'optimize_model',
-        'modules_to_not_convert',
+        #'modules_to_not_convert',
         'cpu_embedding',
-        'lightweight_bmm',
+        #'lightweight_bmm',
         'use_cache',
         'compute_dtype',
         'quant_type',
diff --git a/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py b/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py
index 25aab074..9573b9ea 100644
--- a/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py
@@ -154,8 +154,8 @@ def create_ui():
                             shared.gradio['cpu_embedding'] = gr.Checkbox(label="cpu-embedding", value=shared.args.cpu_embedding, info="Whether to replace the Embedding layer.")
                             shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
                             shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
-                            shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.")
-                            shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.")
+                            #shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.")
+                            #shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.")
                             shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.')
                             shared.gradio['use_cache'] = gr.Checkbox(label="use-cache", value=shared.args.use_cache, info="Wether to use past_key_values to speed up model decoding.")
                             shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.')