[WebUI] Reset bigdl-llm loader options with default value (#10064)

* reset bigdl-llm loader options with default value

* remove options which maybe complex for naive users
This commit is contained in:
SONG Ge 2024-02-01 15:45:39 +08:00 committed by GitHub
parent 6e0f1a1e92
commit 19183ef476
5 changed files with 15 additions and 15 deletions

View file

@ -133,9 +133,9 @@ loaders_and_params = OrderedDict({
'load_in_4bit', 'load_in_4bit',
'load_in_low_bit', 'load_in_low_bit',
'optimize_model', 'optimize_model',
'modules_to_not_convert', #'modules_to_not_convert',
'cpu_embedding', 'cpu_embedding',
'lightweight_bmm', #'lightweight_bmm',
'trust_remote_code', 'trust_remote_code',
'use_cache', 'use_cache',
], ],

View file

@ -362,9 +362,9 @@ def bigdl_llm_loader(model_name):
load_in_4bit=shared.args.load_in_4bit, load_in_4bit=shared.args.load_in_4bit,
load_in_low_bit=shared.args.load_in_low_bit, load_in_low_bit=shared.args.load_in_low_bit,
optimize_model=shared.args.optimize_model, optimize_model=shared.args.optimize_model,
modules_to_not_convert=shared.args.modules_to_not_convert, #modules_to_not_convert=shared.args.modules_to_not_convert,
cpu_embedding=shared.args.cpu_embedding, cpu_embedding=shared.args.cpu_embedding,
lightweight_bmm=shared.args.lightweight_bmm, #lightweight_bmm=shared.args.lightweight_bmm,
trust_remote_code=shared.args.trust_remote_code, trust_remote_code=shared.args.trust_remote_code,
use_cache=shared.args.use_cache, use_cache=shared.args.use_cache,
) )

View file

@ -176,18 +176,18 @@ group.add_argument('--monkey-patch', action='store_true', help='Apply the monkey
# BigDL-LLM # BigDL-LLM
group = parser.add_argument_group('BigDL-LLM') group = parser.add_argument_group('BigDL-LLM')
group.add_argument('--device', type=str, default='cpu', help='the device type, it could be CPU or GPU') group.add_argument('--device', type=str, default='GPU', help='the device type, it could be CPU or GPU')
group.add_argument('--load-in-4bit', action='store_true', default=False, help='boolean value, True means loading linears weight to symmetric int 4 if'\ group.add_argument('--load-in-4bit', action='store_true', default=False, help='boolean value, True means loading linears weight to symmetric int 4 if'\
'the model is a regular fp16/bf16/fp32 model, and to asymmetric int 4 if the model is GPTQ model.Default to be False') 'the model is a regular fp16/bf16/fp32 model, and to asymmetric int 4 if the model is GPTQ model.Default to be False')
group.add_argument('--load-in-low-bit', type=str, default=None, help='str value, options are sym_int4, asym_int4, sym_int5, asym_int5'\ group.add_argument('--load-in-low-bit', type=str, default=None, help='str value, options are sym_int4, asym_int4, sym_int5, asym_int5'\
', sym_int8, nf3, nf4, fp4, fp8, fp8_e4m3, fp8_e5m2, fp16 or bf16. sym_int4 means symmetric int 4, asym_int4 means asymmetric int 4,'\ ', sym_int8, nf3, nf4, fp4, fp8, fp8_e4m3, fp8_e5m2, fp16 or bf16. sym_int4 means symmetric int 4, asym_int4 means asymmetric int 4,'\
'nf4 means 4-bit NormalFloat, etc. Relevant low bit optimizations will be applied to the model.') 'nf4 means 4-bit NormalFloat, etc. Relevant low bit optimizations will be applied to the model.')
group.add_argument('--optimize-model', action='store_true', help='boolean value, Whether to further optimize the low_bit llm model.') group.add_argument('--optimize-model', action='store_true', default=True, help='boolean value, Whether to further optimize the low_bit llm model.')
group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.') #group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.')
group.add_argument('--cpu-embedding', action='store_true', help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`') group.add_argument('--cpu-embedding', action='store_true', default=True, help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`')
group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.') #group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.')
group.add_argument('--use-cache', action='store_true', help='If use_cache is True, past key values are used to speed up decoding if applicable to model.') group.add_argument('--use-cache', action='store_true', default=True, help='If use_cache is True, past key values are used to speed up decoding if applicable to model.')
group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.') group.add_argument('--trust-remote-code', action='store_true', default=True, help='Set trust_remote_code=True while loading the model. Necessary for some models.')
# HQQ # HQQ
group = parser.add_argument_group('HQQ') group = parser.add_argument_group('HQQ')

View file

@ -80,9 +80,9 @@ def list_model_elements():
'load_in_4bit', 'load_in_4bit',
'load_in_low_bit', 'load_in_low_bit',
'optimize_model', 'optimize_model',
'modules_to_not_convert', #'modules_to_not_convert',
'cpu_embedding', 'cpu_embedding',
'lightweight_bmm', #'lightweight_bmm',
'use_cache', 'use_cache',
'compute_dtype', 'compute_dtype',
'quant_type', 'quant_type',

View file

@ -154,8 +154,8 @@ def create_ui():
shared.gradio['cpu_embedding'] = gr.Checkbox(label="cpu-embedding", value=shared.args.cpu_embedding, info="Whether to replace the Embedding layer.") shared.gradio['cpu_embedding'] = gr.Checkbox(label="cpu-embedding", value=shared.args.cpu_embedding, info="Whether to replace the Embedding layer.")
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant) shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17') shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.") #shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.")
shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.") #shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.")
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.') shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.')
shared.gradio['use_cache'] = gr.Checkbox(label="use-cache", value=shared.args.use_cache, info="Wether to use past_key_values to speed up model decoding.") shared.gradio['use_cache'] = gr.Checkbox(label="use-cache", value=shared.args.use_cache, info="Wether to use past_key_values to speed up model decoding.")
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.') shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.')