225 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			Bash
		
	
	
	
	
	
			
		
		
	
	
			225 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			Bash
		
	
	
	
	
	
#!/bin/bash
 | 
						|
 | 
						|
usage() {
 | 
						|
  echo "Usage: $0 [-m --mode <controller|worker>] [-h --help] [-w --worker <model_worker|vllm_worker>]"
 | 
						|
  echo "-h: Print help message."
 | 
						|
  echo "Controller mode reads the following env:"
 | 
						|
  echo "CONTROLLER_HOST (default: localhost)."
 | 
						|
  echo "CONTROLLER_PORT (default: 21001)."
 | 
						|
  echo "API_HOST (default: localhost)."
 | 
						|
  echo "API_PORT (default: 8000)."
 | 
						|
  echo "Worker mode reads the following env:"
 | 
						|
  echo "CONTROLLER_HOST (default: localhost)."
 | 
						|
  echo "CONTROLLER_PORT (default: 21001)."
 | 
						|
  echo "WORKER_HOST (default: localhost)."
 | 
						|
  echo "WORKER_PORT (default: 21002)."
 | 
						|
  echo "MODEL_PATH (default: empty)."
 | 
						|
  echo "STREAM_INTERVAL (default: 1)."
 | 
						|
  exit 1
 | 
						|
}
 | 
						|
 | 
						|
# Acquire correct core_nums if using cpuset-cpus, return -1 if file not exist
 | 
						|
calculate_total_cores() {
 | 
						|
  local cpuset_file="/sys/fs/cgroup/cpuset/cpuset.cpus"
 | 
						|
 | 
						|
  if [[ -f "$cpuset_file" ]]; then
 | 
						|
    local cpuset_cpus=$(cat "$cpuset_file")
 | 
						|
    cpuset_cpus=$(echo "${cpuset_cpus}" | tr -d '\n')
 | 
						|
 | 
						|
    local total_cores=0
 | 
						|
    IFS=',' read -ra cpu_list <<< "$cpuset_cpus"
 | 
						|
    for cpu in "${cpu_list[@]}"; do
 | 
						|
      if [[ $cpu =~ - ]]; then
 | 
						|
        # Range of CPUs
 | 
						|
        local start_cpu=$(echo "$cpu" | cut -d'-' -f1)
 | 
						|
        local end_cpu=$(echo "$cpu" | cut -d'-' -f2)
 | 
						|
        local range_cores=$((end_cpu - start_cpu + 1))
 | 
						|
        total_cores=$((total_cores + range_cores))
 | 
						|
      else
 | 
						|
        # Single CPU
 | 
						|
        total_cores=$((total_cores + 1))
 | 
						|
      fi
 | 
						|
    done
 | 
						|
 | 
						|
    echo $total_cores
 | 
						|
    return
 | 
						|
  fi
 | 
						|
  # Kubernetes core-binding will use this file
 | 
						|
  cpuset_file="/sys/fs/cgroup/cpuset.cpus"
 | 
						|
  if [[ -f "$cpuset_file" ]]; then
 | 
						|
    local cpuset_cpus=$(cat "$cpuset_file")
 | 
						|
    cpuset_cpus=$(echo "${cpuset_cpus}" | tr -d '\n')
 | 
						|
 | 
						|
    local total_cores=0
 | 
						|
    IFS=',' read -ra cpu_list <<< "$cpuset_cpus"
 | 
						|
    for cpu in "${cpu_list[@]}"; do
 | 
						|
      if [[ $cpu =~ - ]]; then
 | 
						|
        # Range of CPUs
 | 
						|
        local start_cpu=$(echo "$cpu" | cut -d'-' -f1)
 | 
						|
        local end_cpu=$(echo "$cpu" | cut -d'-' -f2)
 | 
						|
        local range_cores=$((end_cpu - start_cpu + 1))
 | 
						|
        total_cores=$((total_cores + range_cores))
 | 
						|
      else
 | 
						|
        # Single CPU
 | 
						|
        total_cores=$((total_cores + 1))
 | 
						|
      fi
 | 
						|
    done
 | 
						|
 | 
						|
    echo $total_cores
 | 
						|
    return
 | 
						|
  else
 | 
						|
    echo -1
 | 
						|
    return
 | 
						|
  fi
 | 
						|
}
 | 
						|
 | 
						|
# Default values
 | 
						|
controller_host="localhost"
 | 
						|
controller_port="21001"
 | 
						|
api_host="localhost"
 | 
						|
api_port="8000"
 | 
						|
worker_host="localhost"
 | 
						|
worker_port="21002"
 | 
						|
model_path=""
 | 
						|
mode=""
 | 
						|
omp_num_threads=""
 | 
						|
dispatch_method="shortest_queue" # shortest_queue or lottery
 | 
						|
stream_interval=1
 | 
						|
worker_type="model_worker"
 | 
						|
 | 
						|
# Update rootCA config if needed
 | 
						|
update-ca-certificates
 | 
						|
 | 
						|
# Remember the value of `OMP_NUM_THREADS`:
 | 
						|
if [[ -n "${OMP_NUM_THREADS}" ]]; then
 | 
						|
  omp_num_threads="${OMP_NUM_THREADS}"
 | 
						|
fi
 | 
						|
 | 
						|
# We do not have any arguments, just run bash
 | 
						|
if [ "$#" == 0 ]; then
 | 
						|
  echo "[INFO] no command is passed in"
 | 
						|
  echo "[INFO] enter pass-through mode"
 | 
						|
  exec /usr/bin/tini -s -- "bash"
 | 
						|
else
 | 
						|
  # Parse command-line options
 | 
						|
  options=$(getopt -o "m:hw:" --long "mode:,help,worker:" -n "$0" -- "$@")
 | 
						|
  if [ $? != 0 ]; then
 | 
						|
    usage
 | 
						|
  fi
 | 
						|
  eval set -- "$options"
 | 
						|
 | 
						|
  while true; do
 | 
						|
    case "$1" in
 | 
						|
      -m|--mode)
 | 
						|
        mode="$2"
 | 
						|
        [[ $mode == "controller" || $mode == "worker" ]] || usage
 | 
						|
        shift 2
 | 
						|
        ;;
 | 
						|
      -w|--worker)
 | 
						|
        worker_type="$2"
 | 
						|
        [[ $worker_type == "model_worker" || $worker_type == "vllm_worker" ]] || usage
 | 
						|
        shift 2
 | 
						|
        ;;
 | 
						|
      -h|--help)
 | 
						|
        usage
 | 
						|
        ;;
 | 
						|
      --)
 | 
						|
        shift
 | 
						|
        break
 | 
						|
        ;;
 | 
						|
      *)
 | 
						|
        usage
 | 
						|
        ;;
 | 
						|
    esac
 | 
						|
  done
 | 
						|
 | 
						|
  if [ "$worker_type" == "model_worker" ]; then
 | 
						|
      worker_type="bigdl.llm.serving.model_worker"
 | 
						|
  elif [ "$worker_type" == "vllm_worker" ]; then
 | 
						|
      worker_type="bigdl.llm.serving.vllm_worker"
 | 
						|
  fi
 | 
						|
 | 
						|
  if [[ -n $CONTROLLER_HOST ]]; then
 | 
						|
    controller_host=$CONTROLLER_HOST
 | 
						|
  fi
 | 
						|
 | 
						|
  if [[ -n $CONTROLLER_PORT ]]; then
 | 
						|
    controller_port=$CONTROLLER_PORT
 | 
						|
  fi
 | 
						|
 | 
						|
  if [[ -n $API_HOST ]]; then
 | 
						|
    api_host=$API_HOST
 | 
						|
  fi
 | 
						|
 | 
						|
  if [[ -n $API_PORT ]]; then
 | 
						|
    api_port=$API_PORT
 | 
						|
  fi
 | 
						|
 | 
						|
  if [[ -n $WORKER_HOST ]]; then
 | 
						|
    worker_host=$WORKER_HOST
 | 
						|
  fi
 | 
						|
 | 
						|
  if [[ -n $WORKER_PORT ]]; then
 | 
						|
    worker_port=$WORKER_PORT
 | 
						|
  fi
 | 
						|
 | 
						|
  if [[ -n $MODEL_PATH ]]; then
 | 
						|
    model_path=$MODEL_PATH
 | 
						|
  fi
 | 
						|
 | 
						|
  if [[ -n $DISPATCH_METHOD ]]; then
 | 
						|
    dispatch_method=$DISPATCH_METHOD
 | 
						|
  fi
 | 
						|
 | 
						|
  if [[ -n $STREAM_INTERVAL ]]; then
 | 
						|
    stream_interval=$STREAM_INTERVAL
 | 
						|
  fi
 | 
						|
 | 
						|
  controller_address="http://$controller_host:$controller_port"
 | 
						|
  # Execute logic based on options
 | 
						|
  if [[ $mode == "controller" ]]; then
 | 
						|
    # Logic for controller mode
 | 
						|
    # Boot Controller
 | 
						|
    api_address="http://$api_host:$api_port"
 | 
						|
    echo "Controller address: $controller_address"
 | 
						|
    echo "OpenAI API address: $api_address"
 | 
						|
    python3 -m fastchat.serve.controller --host $controller_host --port $controller_port --dispatch-method $dispatch_method &
 | 
						|
    # Boot openai api server
 | 
						|
    python3 -m fastchat.serve.openai_api_server --host $api_host --port $api_port --controller-address $controller_address
 | 
						|
  else
 | 
						|
    # Logic for non-controller(worker) mode
 | 
						|
    worker_address="http://$worker_host:$worker_port"
 | 
						|
    # Apply optimizations from bigdl-llm
 | 
						|
    source bigdl-llm-init -t
 | 
						|
    # First check if user have set OMP_NUM_THREADS by themselves
 | 
						|
    if [[ -n "${omp_num_threads}" ]]; then
 | 
						|
      echo "Setting OMP_NUM_THREADS to its original value: $omp_num_threads"
 | 
						|
      export OMP_NUM_THREADS=$omp_num_threads
 | 
						|
    else
 | 
						|
      # Use calculate_total_cores to acquire cpuset settings
 | 
						|
      # Set OMP_NUM_THREADS to correct numbers
 | 
						|
      cores=$(calculate_total_cores)
 | 
						|
      if [[ $cores == -1 || $cores == 0 ]]; then
 | 
						|
        echo "Failed to obtain the number of cores, will use the default settings OMP_NUM_THREADS=$OMP_NUM_THREADS"
 | 
						|
      else
 | 
						|
        echo "Setting OMP_NUM_THREADS to $cores"
 | 
						|
        export OMP_NUM_THREADS=$cores
 | 
						|
      fi
 | 
						|
    fi
 | 
						|
    if [[ -z "${model_path}" ]]; then
 | 
						|
          echo "Please set env MODEL_PATH used for worker"
 | 
						|
          usage
 | 
						|
    fi
 | 
						|
    echo "Worker type: $worker_type"
 | 
						|
    echo "Worker address: $worker_address"
 | 
						|
    echo "Controller address: $controller_address"
 | 
						|
    if [ "$worker_type" == "bigdl.llm.serving.model_worker" ]; then
 | 
						|
      python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address --stream-interval $stream_interval
 | 
						|
    elif [ "$worker_type" == "bigdl.llm.serving.vllm_worker" ]; then
 | 
						|
      python3 -m "$worker_type" --model-path $model_path --device cpu --host $worker_host --port $worker_port --worker-address $worker_address --controller-address $controller_address
 | 
						|
    fi
 | 
						|
  fi
 | 
						|
fi
 | 
						|
 | 
						|
exec /usr/bin/bash -s -- "bash"
 | 
						|
 |