[WebUI] Reset bigdl-llm loader options with default value (#10064)

* reset bigdl-llm loader options with default value * remove options which maybe complex for naive users
2024-02-01 15:45:39 +08:00 · 2024-02-01 15:45:39 +08:00 · 19183ef476
commit 19183ef476
parent 6e0f1a1e92
5 changed files with 15 additions and 15 deletions
--- a/python/llm/example/Text-Generation-WebUI/modules/loaders.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/loaders.py
@ -133,9 +133,9 @@ loaders_and_params = OrderedDict({
        'load_in_4bit',
        'load_in_low_bit',
        'optimize_model',
-        'modules_to_not_convert',
+        #'modules_to_not_convert',
        'cpu_embedding',
-        'lightweight_bmm',
+        #'lightweight_bmm',
        'trust_remote_code',
        'use_cache',
    ],
--- a/python/llm/example/Text-Generation-WebUI/modules/models.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/models.py
@ -362,9 +362,9 @@ def bigdl_llm_loader(model_name):
                load_in_4bit=shared.args.load_in_4bit,
                load_in_low_bit=shared.args.load_in_low_bit,
                optimize_model=shared.args.optimize_model,
-                modules_to_not_convert=shared.args.modules_to_not_convert,
+                #modules_to_not_convert=shared.args.modules_to_not_convert,
                cpu_embedding=shared.args.cpu_embedding,
-                lightweight_bmm=shared.args.lightweight_bmm,
+                #lightweight_bmm=shared.args.lightweight_bmm,
                trust_remote_code=shared.args.trust_remote_code,
                use_cache=shared.args.use_cache,
                )
--- a/python/llm/example/Text-Generation-WebUI/modules/shared.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/shared.py
@ -176,18 +176,18 @@ group.add_argument('--monkey-patch', action='store_true', help='Apply the monkey
 # BigDL-LLM
 group = parser.add_argument_group('BigDL-LLM')
-group.add_argument('--device', type=str, default='cpu', help='the device type, it could be CPU or GPU')
+group.add_argument('--device', type=str, default='GPU', help='the device type, it could be CPU or GPU')
 group.add_argument('--load-in-4bit', action='store_true', default=False, help='boolean value, True means loading linear’s weight to symmetric int 4 if'\
                   'the model is a regular fp16/bf16/fp32 model, and to asymmetric int 4 if the model is GPTQ model.Default to be False')
 group.add_argument('--load-in-low-bit', type=str, default=None, help='str value, options are sym_int4, asym_int4, sym_int5, asym_int5'\
                   ', sym_int8, nf3, nf4, fp4, fp8, fp8_e4m3, fp8_e5m2, fp16 or bf16. sym_int4 means symmetric int 4, asym_int4 means asymmetric int 4,'\
                   'nf4 means 4-bit NormalFloat, etc. Relevant low bit optimizations will be applied to the model.')
-group.add_argument('--optimize-model', action='store_true', help='boolean value, Whether to further optimize the low_bit llm model.')
+group.add_argument('--optimize-model', action='store_true', default=True, help='boolean value, Whether to further optimize the low_bit llm model.')
-group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.')
+#group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.')
-group.add_argument('--cpu-embedding', action='store_true', help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`')
+group.add_argument('--cpu-embedding', action='store_true', default=True, help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`')
-group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.')
+#group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.')
-group.add_argument('--use-cache', action='store_true', help='If use_cache is True, past key values are used to speed up decoding if applicable to model.')
+group.add_argument('--use-cache', action='store_true', default=True, help='If use_cache is True, past key values are used to speed up decoding if applicable to model.')
-group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
+group.add_argument('--trust-remote-code', action='store_true', default=True, help='Set trust_remote_code=True while loading the model. Necessary for some models.')
 # HQQ
 group = parser.add_argument_group('HQQ')
--- a/python/llm/example/Text-Generation-WebUI/modules/ui.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/ui.py
@ -80,9 +80,9 @@ def list_model_elements():
        'load_in_4bit',
        'load_in_low_bit',
        'optimize_model',
-        'modules_to_not_convert',
+        #'modules_to_not_convert',
        'cpu_embedding',
-        'lightweight_bmm',
+        #'lightweight_bmm',
        'use_cache',
        'compute_dtype',
        'quant_type',
--- a/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py
+++ b/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py
@ -154,8 +154,8 @@ def create_ui():
                            shared.gradio['cpu_embedding'] = gr.Checkbox(label="cpu-embedding", value=shared.args.cpu_embedding, info="Whether to replace the Embedding layer.")
                            shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
                            shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
-                            shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.")
+                            #shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.")
-                            shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.")
+                            #shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.")
                            shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.')
                            shared.gradio['use_cache'] = gr.Checkbox(label="use-cache", value=shared.args.use_cache, info="Wether to use past_key_values to speed up model decoding.")
                            shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.')