From 19183ef4762e9566b804d7bf7027540fcf33ab09 Mon Sep 17 00:00:00 2001 From: SONG Ge <38711238+sgwhat@users.noreply.github.com> Date: Thu, 1 Feb 2024 15:45:39 +0800 Subject: [PATCH] [WebUI] Reset bigdl-llm loader options with default value (#10064) * reset bigdl-llm loader options with default value * remove options which maybe complex for naive users --- .../Text-Generation-WebUI/modules/loaders.py | 4 ++-- .../Text-Generation-WebUI/modules/models.py | 4 ++-- .../Text-Generation-WebUI/modules/shared.py | 14 +++++++------- .../example/Text-Generation-WebUI/modules/ui.py | 4 ++-- .../Text-Generation-WebUI/modules/ui_model_menu.py | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/python/llm/example/Text-Generation-WebUI/modules/loaders.py b/python/llm/example/Text-Generation-WebUI/modules/loaders.py index 3c0048ca..b32d9cae 100644 --- a/python/llm/example/Text-Generation-WebUI/modules/loaders.py +++ b/python/llm/example/Text-Generation-WebUI/modules/loaders.py @@ -133,9 +133,9 @@ loaders_and_params = OrderedDict({ 'load_in_4bit', 'load_in_low_bit', 'optimize_model', - 'modules_to_not_convert', + #'modules_to_not_convert', 'cpu_embedding', - 'lightweight_bmm', + #'lightweight_bmm', 'trust_remote_code', 'use_cache', ], diff --git a/python/llm/example/Text-Generation-WebUI/modules/models.py b/python/llm/example/Text-Generation-WebUI/modules/models.py index 33843779..60a16b9e 100644 --- a/python/llm/example/Text-Generation-WebUI/modules/models.py +++ b/python/llm/example/Text-Generation-WebUI/modules/models.py @@ -362,9 +362,9 @@ def bigdl_llm_loader(model_name): load_in_4bit=shared.args.load_in_4bit, load_in_low_bit=shared.args.load_in_low_bit, optimize_model=shared.args.optimize_model, - modules_to_not_convert=shared.args.modules_to_not_convert, + #modules_to_not_convert=shared.args.modules_to_not_convert, cpu_embedding=shared.args.cpu_embedding, - lightweight_bmm=shared.args.lightweight_bmm, + #lightweight_bmm=shared.args.lightweight_bmm, trust_remote_code=shared.args.trust_remote_code, use_cache=shared.args.use_cache, ) diff --git a/python/llm/example/Text-Generation-WebUI/modules/shared.py b/python/llm/example/Text-Generation-WebUI/modules/shared.py index ce569319..53819b41 100644 --- a/python/llm/example/Text-Generation-WebUI/modules/shared.py +++ b/python/llm/example/Text-Generation-WebUI/modules/shared.py @@ -176,18 +176,18 @@ group.add_argument('--monkey-patch', action='store_true', help='Apply the monkey # BigDL-LLM group = parser.add_argument_group('BigDL-LLM') -group.add_argument('--device', type=str, default='cpu', help='the device type, it could be CPU or GPU') +group.add_argument('--device', type=str, default='GPU', help='the device type, it could be CPU or GPU') group.add_argument('--load-in-4bit', action='store_true', default=False, help='boolean value, True means loading linear’s weight to symmetric int 4 if'\ 'the model is a regular fp16/bf16/fp32 model, and to asymmetric int 4 if the model is GPTQ model.Default to be False') group.add_argument('--load-in-low-bit', type=str, default=None, help='str value, options are sym_int4, asym_int4, sym_int5, asym_int5'\ ', sym_int8, nf3, nf4, fp4, fp8, fp8_e4m3, fp8_e5m2, fp16 or bf16. sym_int4 means symmetric int 4, asym_int4 means asymmetric int 4,'\ 'nf4 means 4-bit NormalFloat, etc. Relevant low bit optimizations will be applied to the model.') -group.add_argument('--optimize-model', action='store_true', help='boolean value, Whether to further optimize the low_bit llm model.') -group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.') -group.add_argument('--cpu-embedding', action='store_true', help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`') -group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.') -group.add_argument('--use-cache', action='store_true', help='If use_cache is True, past key values are used to speed up decoding if applicable to model.') -group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.') +group.add_argument('--optimize-model', action='store_true', default=True, help='boolean value, Whether to further optimize the low_bit llm model.') +#group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.') +group.add_argument('--cpu-embedding', action='store_true', default=True, help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`') +#group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.') +group.add_argument('--use-cache', action='store_true', default=True, help='If use_cache is True, past key values are used to speed up decoding if applicable to model.') +group.add_argument('--trust-remote-code', action='store_true', default=True, help='Set trust_remote_code=True while loading the model. Necessary for some models.') # HQQ group = parser.add_argument_group('HQQ') diff --git a/python/llm/example/Text-Generation-WebUI/modules/ui.py b/python/llm/example/Text-Generation-WebUI/modules/ui.py index 37b182d9..fa63d9f9 100644 --- a/python/llm/example/Text-Generation-WebUI/modules/ui.py +++ b/python/llm/example/Text-Generation-WebUI/modules/ui.py @@ -80,9 +80,9 @@ def list_model_elements(): 'load_in_4bit', 'load_in_low_bit', 'optimize_model', - 'modules_to_not_convert', + #'modules_to_not_convert', 'cpu_embedding', - 'lightweight_bmm', + #'lightweight_bmm', 'use_cache', 'compute_dtype', 'quant_type', diff --git a/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py b/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py index 25aab074..9573b9ea 100644 --- a/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py +++ b/python/llm/example/Text-Generation-WebUI/modules/ui_model_menu.py @@ -154,8 +154,8 @@ def create_ui(): shared.gradio['cpu_embedding'] = gr.Checkbox(label="cpu-embedding", value=shared.args.cpu_embedding, info="Whether to replace the Embedding layer.") shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant) shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17') - shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.") - shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.") + #shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.") + #shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.") shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.') shared.gradio['use_cache'] = gr.Checkbox(label="use-cache", value=shared.args.use_cache, info="Wether to use past_key_values to speed up model decoding.") shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.')