[WebUI] Reset bigdl-llm loader options with default value (#10064)
* reset bigdl-llm loader options with default value * remove options which maybe complex for naive users
This commit is contained in:
parent
6e0f1a1e92
commit
19183ef476
5 changed files with 15 additions and 15 deletions
|
|
@ -133,9 +133,9 @@ loaders_and_params = OrderedDict({
|
||||||
'load_in_4bit',
|
'load_in_4bit',
|
||||||
'load_in_low_bit',
|
'load_in_low_bit',
|
||||||
'optimize_model',
|
'optimize_model',
|
||||||
'modules_to_not_convert',
|
#'modules_to_not_convert',
|
||||||
'cpu_embedding',
|
'cpu_embedding',
|
||||||
'lightweight_bmm',
|
#'lightweight_bmm',
|
||||||
'trust_remote_code',
|
'trust_remote_code',
|
||||||
'use_cache',
|
'use_cache',
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -362,9 +362,9 @@ def bigdl_llm_loader(model_name):
|
||||||
load_in_4bit=shared.args.load_in_4bit,
|
load_in_4bit=shared.args.load_in_4bit,
|
||||||
load_in_low_bit=shared.args.load_in_low_bit,
|
load_in_low_bit=shared.args.load_in_low_bit,
|
||||||
optimize_model=shared.args.optimize_model,
|
optimize_model=shared.args.optimize_model,
|
||||||
modules_to_not_convert=shared.args.modules_to_not_convert,
|
#modules_to_not_convert=shared.args.modules_to_not_convert,
|
||||||
cpu_embedding=shared.args.cpu_embedding,
|
cpu_embedding=shared.args.cpu_embedding,
|
||||||
lightweight_bmm=shared.args.lightweight_bmm,
|
#lightweight_bmm=shared.args.lightweight_bmm,
|
||||||
trust_remote_code=shared.args.trust_remote_code,
|
trust_remote_code=shared.args.trust_remote_code,
|
||||||
use_cache=shared.args.use_cache,
|
use_cache=shared.args.use_cache,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -176,18 +176,18 @@ group.add_argument('--monkey-patch', action='store_true', help='Apply the monkey
|
||||||
|
|
||||||
# BigDL-LLM
|
# BigDL-LLM
|
||||||
group = parser.add_argument_group('BigDL-LLM')
|
group = parser.add_argument_group('BigDL-LLM')
|
||||||
group.add_argument('--device', type=str, default='cpu', help='the device type, it could be CPU or GPU')
|
group.add_argument('--device', type=str, default='GPU', help='the device type, it could be CPU or GPU')
|
||||||
group.add_argument('--load-in-4bit', action='store_true', default=False, help='boolean value, True means loading linear’s weight to symmetric int 4 if'\
|
group.add_argument('--load-in-4bit', action='store_true', default=False, help='boolean value, True means loading linear’s weight to symmetric int 4 if'\
|
||||||
'the model is a regular fp16/bf16/fp32 model, and to asymmetric int 4 if the model is GPTQ model.Default to be False')
|
'the model is a regular fp16/bf16/fp32 model, and to asymmetric int 4 if the model is GPTQ model.Default to be False')
|
||||||
group.add_argument('--load-in-low-bit', type=str, default=None, help='str value, options are sym_int4, asym_int4, sym_int5, asym_int5'\
|
group.add_argument('--load-in-low-bit', type=str, default=None, help='str value, options are sym_int4, asym_int4, sym_int5, asym_int5'\
|
||||||
', sym_int8, nf3, nf4, fp4, fp8, fp8_e4m3, fp8_e5m2, fp16 or bf16. sym_int4 means symmetric int 4, asym_int4 means asymmetric int 4,'\
|
', sym_int8, nf3, nf4, fp4, fp8, fp8_e4m3, fp8_e5m2, fp16 or bf16. sym_int4 means symmetric int 4, asym_int4 means asymmetric int 4,'\
|
||||||
'nf4 means 4-bit NormalFloat, etc. Relevant low bit optimizations will be applied to the model.')
|
'nf4 means 4-bit NormalFloat, etc. Relevant low bit optimizations will be applied to the model.')
|
||||||
group.add_argument('--optimize-model', action='store_true', help='boolean value, Whether to further optimize the low_bit llm model.')
|
group.add_argument('--optimize-model', action='store_true', default=True, help='boolean value, Whether to further optimize the low_bit llm model.')
|
||||||
group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.')
|
#group.add_argument('--modules-to-not-convert', type=str, default=None, help='list of str value, modules (nn.Module) that are skipped when conducting model optimizations.')
|
||||||
group.add_argument('--cpu-embedding', action='store_true', help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`')
|
group.add_argument('--cpu-embedding', action='store_true', default=True, help='Whether to replace the Embedding layer, may need to set it to `True` when running BigDL-LLM on GPU on Windows. Default to be `False`')
|
||||||
group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.')
|
#group.add_argument('--lightweight-bmm', action='store_true', help='Whether to replace the torch.bmm ops, may need to set it to `True` when running BigDL-LLM on GPU on Windows.')
|
||||||
group.add_argument('--use-cache', action='store_true', help='If use_cache is True, past key values are used to speed up decoding if applicable to model.')
|
group.add_argument('--use-cache', action='store_true', default=True, help='If use_cache is True, past key values are used to speed up decoding if applicable to model.')
|
||||||
group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
|
group.add_argument('--trust-remote-code', action='store_true', default=True, help='Set trust_remote_code=True while loading the model. Necessary for some models.')
|
||||||
|
|
||||||
# HQQ
|
# HQQ
|
||||||
group = parser.add_argument_group('HQQ')
|
group = parser.add_argument_group('HQQ')
|
||||||
|
|
|
||||||
|
|
@ -80,9 +80,9 @@ def list_model_elements():
|
||||||
'load_in_4bit',
|
'load_in_4bit',
|
||||||
'load_in_low_bit',
|
'load_in_low_bit',
|
||||||
'optimize_model',
|
'optimize_model',
|
||||||
'modules_to_not_convert',
|
#'modules_to_not_convert',
|
||||||
'cpu_embedding',
|
'cpu_embedding',
|
||||||
'lightweight_bmm',
|
#'lightweight_bmm',
|
||||||
'use_cache',
|
'use_cache',
|
||||||
'compute_dtype',
|
'compute_dtype',
|
||||||
'quant_type',
|
'quant_type',
|
||||||
|
|
|
||||||
|
|
@ -154,8 +154,8 @@ def create_ui():
|
||||||
shared.gradio['cpu_embedding'] = gr.Checkbox(label="cpu-embedding", value=shared.args.cpu_embedding, info="Whether to replace the Embedding layer.")
|
shared.gradio['cpu_embedding'] = gr.Checkbox(label="cpu-embedding", value=shared.args.cpu_embedding, info="Whether to replace the Embedding layer.")
|
||||||
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
|
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
|
||||||
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
|
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
|
||||||
shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.")
|
#shared.gradio['modules_to_not_convert'] = gr.Textbox(label="modules-to-not-convert", value=shared.args.modules_to_not_convert, info="modules (nn.Module) that are skipped when.")
|
||||||
shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.")
|
#shared.gradio['lightweight_bmm'] = gr.Checkbox(label="lightweight-bmm", value=shared.args.lightweight_bmm, info="Whether to replace the torch.bmm ops.")
|
||||||
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.')
|
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.')
|
||||||
shared.gradio['use_cache'] = gr.Checkbox(label="use-cache", value=shared.args.use_cache, info="Wether to use past_key_values to speed up model decoding.")
|
shared.gradio['use_cache'] = gr.Checkbox(label="use-cache", value=shared.args.use_cache, info="Wether to use past_key_values to speed up model decoding.")
|
||||||
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.')
|
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.')
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue