Fix null pointer dereferences error. (#11125)
* delete unused function on tgi_server * update * update * fix style
This commit is contained in:
		
							parent
							
								
									50ee004ac7
								
							
						
					
					
						commit
						4127b99ed6
					
				
					 4 changed files with 26 additions and 78 deletions
				
			
		| 
						 | 
					@ -29,6 +29,7 @@ def _check_version(filename, flag="GLIBC"):
 | 
				
			||||||
    if flag == "GLIBCXX":
 | 
					    if flag == "GLIBCXX":
 | 
				
			||||||
        subfile = _check_glibcxx_version(filename)
 | 
					        subfile = _check_glibcxx_version(filename)
 | 
				
			||||||
    max_version = None
 | 
					    max_version = None
 | 
				
			||||||
 | 
					    if subfile:
 | 
				
			||||||
        for version_string in subfile.split():
 | 
					        for version_string in subfile.split():
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                version = Version(version_string.split("_")[1])
 | 
					                version = Version(version_string.split("_")[1])
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -113,6 +113,7 @@ def greedy_generate(model, tokenizer, input_ids, past_key_values, max_gen_len, s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if pred_token_idx == tokenizer.eos_token_id:
 | 
					        if pred_token_idx == tokenizer.eos_token_id:
 | 
				
			||||||
            break
 | 
					            break
 | 
				
			||||||
 | 
					    if generated_text:
 | 
				
			||||||
        print(" ".join(generated_text[pos:]).strip('\n<'), flush=True)
 | 
					        print(" ".join(generated_text[pos:]).strip('\n<'), flush=True)
 | 
				
			||||||
    return past_key_values
 | 
					    return past_key_values
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -607,65 +607,6 @@ async def chat_completion_stream_generator(
 | 
				
			||||||
        yield f"data: {json.dumps(chunk.model_dump(exclude_unset=True), ensure_ascii=False)}\n\n"
 | 
					        yield f"data: {json.dumps(chunk.model_dump(exclude_unset=True), ensure_ascii=False)}\n\n"
 | 
				
			||||||
    yield "data: [DONE]\n\n"
 | 
					    yield "data: [DONE]\n\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
async def generate_completion_stream_generator(
 | 
					 | 
				
			||||||
    request: CompletionRequest, n: int, worker_addr: str
 | 
					 | 
				
			||||||
):
 | 
					 | 
				
			||||||
    model_name = request.model
 | 
					 | 
				
			||||||
    id = f"cmpl-{shortuuid.random()}"
 | 
					 | 
				
			||||||
    finish_stream_events = []
 | 
					 | 
				
			||||||
    for text in request.prompt:
 | 
					 | 
				
			||||||
        for i in range(n):
 | 
					 | 
				
			||||||
            previous_text = ""
 | 
					 | 
				
			||||||
            gen_params = await get_gen_params(
 | 
					 | 
				
			||||||
                request.model,
 | 
					 | 
				
			||||||
                worker_addr,
 | 
					 | 
				
			||||||
                text,
 | 
					 | 
				
			||||||
                temperature=request.temperature,
 | 
					 | 
				
			||||||
                top_p=request.top_p,
 | 
					 | 
				
			||||||
                top_k=request.top_k,
 | 
					 | 
				
			||||||
                presence_penalty=request.presence_penalty,
 | 
					 | 
				
			||||||
                frequency_penalty=request.frequency_penalty,
 | 
					 | 
				
			||||||
                max_tokens=request.max_tokens,
 | 
					 | 
				
			||||||
                logprobs=request.logprobs,
 | 
					 | 
				
			||||||
                echo=request.echo,
 | 
					 | 
				
			||||||
                stop=request.stop,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
            async for content in generate_completion_stream(gen_params, worker_addr):
 | 
					 | 
				
			||||||
                if content["error_code"] != 0:
 | 
					 | 
				
			||||||
                    yield f"data: {json.dumps(chunk.model_dump(exclude_unset=True), ensure_ascii=False)}\n\n"
 | 
					 | 
				
			||||||
                    yield "data: [DONE]\n\n"
 | 
					 | 
				
			||||||
                    return
 | 
					 | 
				
			||||||
                decoded_unicode = content["text"].replace("\ufffd", "")
 | 
					 | 
				
			||||||
                delta_text = decoded_unicode[len(previous_text) :]
 | 
					 | 
				
			||||||
                previous_text = (
 | 
					 | 
				
			||||||
                    decoded_unicode
 | 
					 | 
				
			||||||
                    if len(decoded_unicode) > len(previous_text)
 | 
					 | 
				
			||||||
                    else previous_text
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
                # todo: index is not apparent
 | 
					 | 
				
			||||||
                choice_data = CompletionResponseStreamChoice(
 | 
					 | 
				
			||||||
                    index=i,
 | 
					 | 
				
			||||||
                    text=delta_text,
 | 
					 | 
				
			||||||
                    logprobs=create_openai_logprobs(content.get("logprobs", None)),
 | 
					 | 
				
			||||||
                    finish_reason=content.get("finish_reason", None),
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
                chunk = CompletionStreamResponse(
 | 
					 | 
				
			||||||
                    id=id,
 | 
					 | 
				
			||||||
                    object="text_completion",
 | 
					 | 
				
			||||||
                    choices=[choice_data],
 | 
					 | 
				
			||||||
                    model=model_name,
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
                if len(delta_text) == 0:
 | 
					 | 
				
			||||||
                    if content.get("finish_reason", None) is not None:
 | 
					 | 
				
			||||||
                        finish_stream_events.append(chunk)
 | 
					 | 
				
			||||||
                    continue
 | 
					 | 
				
			||||||
                yield f"data: {json.dumps(chunk.model_dump(exclude_unset=True), ensure_ascii=False)}\n\n"
 | 
					 | 
				
			||||||
    # There is not "content" field in the last delta message, so exclude_none to exclude field "content".
 | 
					 | 
				
			||||||
    for finish_chunk in finish_stream_events:
 | 
					 | 
				
			||||||
        yield f"data: {json.dumps(chunk.model_dump(exclude_unset=True), ensure_ascii=False)}\n\n"
 | 
					 | 
				
			||||||
    yield "data: [DONE]\n\n"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
async def generate_completion_stream(payload: Dict[str, Any], worker_addr: str):
 | 
					async def generate_completion_stream(payload: Dict[str, Any], worker_addr: str):
 | 
				
			||||||
    controller_address = app_settings.controller_address
 | 
					    controller_address = app_settings.controller_address
 | 
				
			||||||
    async with httpx.AsyncClient() as client:
 | 
					    async with httpx.AsyncClient() as client:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -487,8 +487,13 @@ def replace_with_low_bit_linear_for_module(model, qtype, module_name=None,
 | 
				
			||||||
        FP16Linear, BF16Linear
 | 
					        FP16Linear, BF16Linear
 | 
				
			||||||
    has_been_replaced = False
 | 
					    has_been_replaced = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    splits = []
 | 
				
			||||||
    if "." in module_name:
 | 
					    if "." in module_name:
 | 
				
			||||||
        splits = module_name.split(".")
 | 
					        splits = module_name.split(".")
 | 
				
			||||||
 | 
					    if not splits:
 | 
				
			||||||
 | 
					        invalidInputError(False,
 | 
				
			||||||
 | 
					                          "Please provide a valid module_name with hierarchical structure")
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
        parent_module = getattr(model, splits[0])
 | 
					        parent_module = getattr(model, splits[0])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if "lm_head" not in module_name:
 | 
					        if "lm_head" not in module_name:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue