# # Copyright 2016 The BigDL Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # =========================================================================== # # This file is adapted from # https://github.com/mit-han-lab/streaming-llm/blob/main/streaming_llm/utils.py # which is licensed under the MIT license: # # MIT License # # Copyright (c) 2023 MIT HAN Lab # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import torch import argparse import os.path as osp import ssl import urllib.request import os import json # code change to import from IPEX-LLM API instead of using transformers API from ipex_llm.transformers import AutoModelForCausalLM from transformers import LlamaTokenizer def load(model_name_or_path): print(f"Loading model from {model_name_or_path} ...") # however, tensor parallel for running falcon will occur bugs tokenizer = LlamaTokenizer.from_pretrained( model_name_or_path, trust_remote_code=True, ) # set load_in_4bit=True to get performance boost, set optimize_model=False for now # TODO align logics of optimize_model and streaming model = AutoModelForCausalLM.from_pretrained( model_name_or_path, load_in_4bit=True, optimize_model=False, trust_remote_code=True ) if tokenizer.pad_token_id is None: if tokenizer.eos_token_id is not None: tokenizer.pad_token_id = tokenizer.eos_token_id else: tokenizer.pad_token_id = 0 model.eval() return model, tokenizer def download_url(url: str, folder="folder"): """ Downloads the content of an url to a folder. Modified from \ https://github.com/pyg-team/pytorch_geometric/tree/master/torch_geometric Args: url (string): The url of target file. folder (string): The target folder. Returns: string: File path of downloaded files. """ file = url.rpartition("/")[2] file = file if file[0] == "?" else file.split("?")[0] path = osp.join(folder, file) if osp.exists(path): print(f"File {file} exists, use existing file.") return path print(f"Downloading {url}") os.makedirs(folder, exist_ok=True) ctx = ssl._create_unverified_context() data = urllib.request.urlopen(url, context=ctx) with open(path, "wb") as f: f.write(data.read()) return path def load_jsonl( file_path, ): list_data_dict = [] with open(file_path, "r") as f: for line in f: list_data_dict.append(json.loads(line)) return list_data_dict