#!/bin/bash

# Copyright (c) 2020, NVIDIA Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the FreeBSD Project.

TMP_DIR=$(mktemp -d)

DEBUG=${DEBUG:-0}                             # Debug mode
BF_REG=bf-reg
RSHIM_PIPE="/tmp/rshim_pipe"
LOG_FILE="/tmp/bfb-install.log"

run_cmd_local_ready=0     # whether run_cmd* local functions are ready to use
run_cmd_remote_ready=0    # whether run_cmd* remote functions are ready to use

# Register definition (BF3 by default).
RSH_BOOT_FIFO_COUNT=0x13001000
RSH_BOOT_FIFO_DATA=0x13002000
RSH_SCRATCHPAD2=0x13000c48
RSH_SWINT=0x13000318
RSH_BREADCRUMB1=0x13000518

usage ()
{
  echo "Usage: $0 [options]"
  echo "Options:"
  echo "  -a, --activate <0|1>           Activate the upgrade."
  echo "  -b, --bfb <bfb_file>           BFB image file to use."
  echo "  -c, --config <config_file>     Optional configuration file."
  echo "  -f, --rootfs <rootfs_file>     Optional rootfs file."
  echo "  -h, --help                     Show help message."
  echo "  -k, --keep-log                 Do not clear the log after reading during bfb install."
  echo "  -l, --lfwp                     Enable LFWP upgrade."
  echo "  -m, --remote-mode <mode>       Remote mode to use (scp, nc, ncpipe)."
  echo "  -p, --pldm <pldm_file>         PLDM image for runtime upgrade."
  echo "  -r, --rshim <device>           Rshim device, format [<ip>:<port>:]rshim<N>."
  echo "  -R, --reverse-nc               Reverse netcat mode."
  echo "  -u, --runtime                  Runtime upgrade (local rshim only)."
  echo "  -v, --verbose                  Enable verbose output."
}

# Function to print messages in verbose mode
# Usage: echo_v "Your message"
echo_v() {
  if [ "$verbose" -eq 1 ]; then
    echo "$@"
  fi
}

# Function to print messages for debugging
# Usage: echo_dbg "Your message"
echo_dbg() {
  if [ -n "$DEBUG" ] && [ "$DEBUG" -eq 1 ]; then
    echo "$@"
  fi
}

# Run a command locally or remotely via SSH
#
# $1: mode (local or remote)
# $2: command
#
# Global variables used:
#   $ip, $sudo_prefix, $run_cmd_local_ready, $run_cmd_remote_ready
#
# Example:
#   run_cmd local "ls -l"
#   run_cmd remote "ls -l"
run_cmd()
{
  if [ $# -lt 2 ]; then
    echo "Error: run_cmd() needs at least 2 arguments."
    exit 1
  fi

  local mode=$1
  local command=$2

  echo_dbg "Running command in $mode mode: $command"

  if [ "$mode" == "local" ]; then
    if [ $run_cmd_local_ready -eq 0 ]; then
      echo "Error: 'run_cmd local' are not ready to use"
      exit 1
    fi
    $sudo_prefix sh -c "$command"
  elif [ "$mode" == "remote" ]; then
    if [ $run_cmd_remote_ready -eq 0 ]; then
      echo "Error: 'run_cmd remote' are not ready to use"
      exit 1
    fi
    # Execute the command networkly via SSH
    ssh root@$ip "$command"
  else
    echo "Error: invalid mode: $mode"
    return 1
  fi
}

# Run a command locally or remotely via SSH and exit on error with a custom
# message
#
# $1: mode (local or remote)
# $2: command
# $3: custom error message (optional)
#
# Global variables used: $ip
run_cmd_exit()
{
  if [ $# -lt 2 ]; then
    echo "Error: run_cmd_exit() requires at least 2 arguments."
    exit 1
  fi

  local mode=$1
  local command=$2
  local error_msg="${3:-"Error: Command failed"}: [$command]"

  run_cmd "$mode" "$command"
  local RETVAL=$?

  if [ $RETVAL -ne 0 ]; then
    echo "$error_msg"
    exit $RETVAL
  fi
}

# Return the local IP address
#
# Global variables used: $ip
get_local_ip()
{
  if [ -z "$ip" ]; then
    echo "The global variable \$ip is not defined."
    return 1
  fi

  # Capture both stdout and stderr of the ip route get command
  readarray -t output <<< "$(ip route get $ip 2>&1)"

  # Check for known error messages in the command output
  if echo "${output[@]}" | grep -qi "error"; then
    echo "Error: Invalid IP address or routing error."
    return 1
  fi

  # Parse the output for the local IP address, which is right after "src"
  local_ip=$(echo "${output[@]}" | awk '/src/ {for(i=1;i<=NF;i++) if($i=="src") {print $(i+1); exit}}')

  # Check if a local IP was found
  if [ -z "$local_ip" ]; then
    echo "Failed to determine the local IP address."
    return 1
  fi

  echo $local_ip
}

clear_boot_fifo()
{
  local cnt

  while true; do
    # Read BOOT_FIFO_DATA if BOOT_FIFO_COUNT != 0
    cnt=`${BF_REG} $(basename ${rshim_node}) ${RSH_BOOT_FIFO_COUNT}.64 | awk '{print $3}'`
    cnt=$((cnt))
    [ $cnt -eq 0 ] && break
    ${BF_REG} $(basename ${rshim_node}) ${RSH_BOOT_FIFO_DATA}.64 >/dev/null
  done
}

# Push the boot stream to rshim via local rshim
push_boot_stream_via_local_rshim()
{
  local sp2

  # Push the boot stream to local rshim
  echo "Pushing bfb${cfg:+ + cfg}${rootfs:+ + rootfs}"

  if [ $runtime -eq 1 ]; then
    # Skip reset when pushing bfb
    echo "BOOT_RESET_SKIP 1" > ${rshim_node}/misc

    # Clear the current boot fifo
    clear_boot_fifo

    # Set SP2.BIT2=1
    sp2=`${BF_REG} $(basename ${rshim_node}) ${RSH_SCRATCHPAD2}.64 | awk '{print $3}'`
    sp2=$((sp2 | 4))
    ${BF_REG} $(basename ${rshim_node}) ${RSH_SCRATCHPAD2}.64 $sp2 >/dev/null

    # Set SWINT2.BIT2 for DPU mode
    if [ ${nic_mode} -eq 0 ]; then
      ${BF_REG} $(basename ${rshim_node}) ${RSH_SWINT}.64 0x4 >/dev/null
    fi
  fi

  if ! $sudo_prefix sh -c "cat ${bfb} ${cfg:+$cfg} ${rootfs:+${rootfs}} ${pv:+| ${pv} | cat -} > ${rshim_node}/boot"; then
    echo "Error: Failed to push boot stream via local rshim"
    return
  fi


  if [ $runtime -eq 1 ]; then
    echo "BOOT_RESET_SKIP 0" > ${rshim_node}/misc
  fi
}

# Push the boot stream to rshim via remote rshim with scp
#
# Global variables used
#   $bfb, $cfg, $rootfs, $pv, $ip, $rshim
push_boot_stream_via_remote_rshim_scp()
{
  # Push the boot stream to remote rshim via ssh copy
  echo "Pushing bfb${cfg:+ + cfg}${rootfs:+ + rootfs} to ${ip} via scp"
  if ! sh -c "cat ${bfb} ${cfg:+$cfg} ${rootfs:+${rootfs}} ${pv:+| ${pv} | cat -} | ssh root@$ip \"cat > ${rshim_node}/boot\""; then
    echo "Error: Failed to push boot stream via remote rshim with scp"
    return
  fi
}

# Push the boot stream to rshim via remote rshim with netcat
#
# Global variables used:
#   $bfb, $cfg, $rootfs, $pv, $ip, $port, $rshim_node, $reverse_nc
push_boot_stream_via_remote_rshim_nc()
{
  timeout=20   # in seconds

  data="${bfb} ${cfg:+$cfg} ${rootfs:+${rootfs}}"
  # Push the boot stream to remote rshim via netcat
  echo "Pushing $data to ${ip} via netcat $( [ "$reverse_nc" -eq 0 ] || echo '(in reverse mode)' )"

  if [ "$reverse_nc" -eq 1 ]; then
    # Remote as the netcat server and local as the client

    echo "Starting a netcat server on the remote host..."
    # We use nohup to keep the server running after the SSH session is closed.
    # We use dd instead of writing directly to the file to avoid the error of
    # "write: Interrupted system call"; The bs size doesn't matter.
    start_nc_server_cmd="nohup nc -l -p $port | dd bs=1M of=${rshim_node}/boot 2>/dev/null &"
    run_cmd_exit remote "$start_nc_server_cmd"
    sleep 3  # delay to make sure the server is ready
    wait_for_remote_process nc $timeout \
      "Error: Failed to start the remote netcat server"

    echo "Sending bitstream to a remote host with RSHIM..."
    nc_client_cmd="cat $data ${pv:+| ${pv} | cat -} | nc $ip $port"
    run_cmd_exit local "$nc_client_cmd"
  else
    # Local as the netcat server and remote as the client

    echo "Starting a netcat server on the local host..."
    start_nc_server_cmd="cat $data ${pv:+| ${pv} | cat -} | nc --send-only -l -p $port &"
    run_cmd_exit local "$start_nc_server_cmd"
    sleep 3  # delay to make sure the server is ready

    echo "Sending bitstream from remote to local host with RSHIM..."
    local_ip=$(get_local_ip)
    # must put remote nc in the background to avoid blocking the script
    nc_client_cmd="nohup nc $local_ip $port 2>/dev/null | dd bs=1M of=${rshim_node}/boot &>/dev/null"
    run_cmd_exit remote "$nc_client_cmd" &
    wait_for_remote_process nc $timeout \
      "Error: Failed to start the remote netcat server"
  fi
}

# Run a script remotely via SSH to forward data from a network receiver (netcat
# server or client) to the rshim device node. This separate script is needed
# for improved performance
run_pipe_to_rshim_script()
{
    # Execute script remotely via SSH
    # shellcheck disable=SC2087
    ssh root@$ip 'sh -s' << EOF
RSHIM_PIPE=${RSHIM_PIPE:-"/tmp/rshim_pipe"}
RSHIM_BOOT_NODE=${rshim_node:-"/dev/rshim0"}/boot
BLOCK_SIZE=2048000  # smaller block size performs worse

if [ -e "\$RSHIM_PIPE" ]; then
  rm \$RSHIM_PIPE
fi

mkfifo \$RSHIM_PIPE

if ! dd if=\$RSHIM_PIPE of=\$RSHIM_BOOT_NODE bs=\$BLOCK_SIZE; then
    echo "Error occurred in dd command"
    exit 1
fi
EOF
}

# Use SSH execution to check whether a process is running on the remote host
#
# $1: process name
# $2: timeout in seconds
# $3: custom error message (optional)
wait_for_remote_process()
{
  default_timeout=10
  local process=$1
  local timeout=${2:-$default_timeout}
  local error_msg="${3:-Error: Time out waiting for launching process $process}"

  local cmd="while true; do pgrep -x $process >/dev/null && break; sleep 1; done"
  local timeout_cmd="timeout $timeout sh -c \"$cmd\""

  run_cmd_exit remote "$timeout_cmd" "$error_msg"
}

# You can call execute_remote_script multiple times as needed in your script

# Push the boot stream to rshim via remote rshim with netcat and a persistent
# pipe
#
# Global variables used:
#   $bfb, $cfg, $rootfs, $pv, $ip, $port, $rshim_node,
#   $pid_wait_timeout, $RSHIM_PIPE, $PIPE_READER_LOG
push_boot_stream_via_remote_rshim_ncpipe()
{
  timeout=20   # in seconds
  data="${bfb} ${cfg:+$cfg} ${rootfs:+${rootfs}}"

  echo "Starting the remote pipe-to-rshim process..."
  run_pipe_to_rshim_script &
  wait_for_remote_process dd $timeout \
    "Error: Failed to start the remote pipe-to-rshim process"

  if [ "$reverse_nc" -eq 1 ]; then
    echo "Starting the remote netcat server"
    nc_server_cmd="nohup nc -l -p $port | dd bs=1M of=$RSHIM_PIPE 2>/dev/null &"
    run_cmd_exit remote "$nc_server_cmd"
    # It could be very slow to start the netcat server on the remtoe.
    wait_for_remote_process nc $timeout \
      "Error: Failed to start the remote netcat server"

    # Push the boot stream to remote rshim via netcat + persistent pipe
    echo "Pushing $data with nc + pipe to Remote"
    nc_client_cmd="cat $data ${pv:+| ${pv} | cat -} | nc $ip $port"
    run_cmd_exit local "$nc_client_cmd"
  else # reverse_nc
    # In reverse mode, the local host is the netcat server and the remote host
    # is the client.
    echo "Starting the local netcat server"
    nc_server_cmd="cat $data ${pv:+| ${pv} | cat -} | nc --send-only -l -p $port &"
    echo_dbg "Running command in local mode: $cmd"
    run_cmd_exit local "$nc_server_cmd"

    local_ip=$(get_local_ip)
    echo "Starting remote netcat client to get data"
    nc_client_cmd="nohup nc $local_ip $port > $RSHIM_PIPE 2>/dev/null"
    run_cmd_exit remote "$nc_client_cmd" &
    wait_for_remote_process nc $timeout \
      "Error: Failed to start the remote netcat server"
  fi
}

# Push the BFB stream to rshim
#
# Global variables used:
#  $mode, $remote_mode
push_boot_stream()
{
  if [ "$mode" == "local" ]; then
    push_boot_stream_via_local_rshim
  else
    if [ "$remote_mode" == "scp" ]; then
      push_boot_stream_via_remote_rshim_scp
    elif [ "$remote_mode" == "nc" ]; then
      push_boot_stream_via_remote_rshim_nc
    elif [ "$remote_mode" == "ncpipe" ]; then
      push_boot_stream_via_remote_rshim_ncpipe
    fi
  fi
}

# Checks BF3 NIC_MODE
check_nic_mode()
{
  local str

  [ "$mode" != "local" ] && return

  # Only needs to check for BF3.
  str=`cat ${rshim_node}/misc | grep DEV_INFO | grep BlueField-3`
  [ -z "$str" ] && return

  # Get PCIE BDF
  str=`cat ${rshim_node}/misc | grep DEV_NAME | awk '{print $2}'`
  str=${str:5}
  str=${str/.*/}
  pcie_bd="$str"
  num_ports=$(lspci -nn -s $pcie_bd | grep -v '\[0801\]' | wc -l || { echo "Failed to query PCIe ports for $pcie_bd"; echo 0; })
  if [ "$num_ports" -eq 2 ]; then
    [ $verbose -eq 1 ] && echo "Detected dual-port device: ${pcie_bd}.0 and ${pcie_bd}.1"
  fi

  # Check NIC mode
  str=`mlxconfig -d ${pcie_bd}.0 -e q INTERNAL_CPU_OFFLOAD_ENGINE 2>/dev/null | grep INTERNAL_CPU_OFFLOAD_ENGINE | awk '{print $(NF-1)}'`
  if [ ."$str" = ."DISABLED(1)" ] || grep -q "NIC mode" ${rshim_node}/misc; then
    echo "${rshim_node} (${pcie_bd}) is in NIC mode"
    nic_mode=1
  else
    nic_mode=0
  fi
}

# Wait for RSHIM to finish updating by monitoring keywords in the RSHIM log
#
# Global variables used:
#   $mode, $rshim_node, $remote_mode
wait_for_update_to_finish()
{
  # 'filter0' indicates bfb installation completion, thus CLEAR_ON_READ
  # can be disabled for next boot. 'filter' is related to specific mode
  # which might include extra configuration or booting, and is used as
  # the exit condition of the bfb-install script.
  local filter0 filter

  filter0="Rebooting\.\.\.|finished|DPU is ready|Linux up|CRITICAL ERROR"

  if [ $runtime -eq 0 ]; then
    filter0="$filter0|In Enhanced NIC mode"
    if [ $nic_mode -eq 1 ]; then
      filter="In Enhanced NIC mode"
    else
      filter="$filter0"
    fi
  else
    filter0="Runtime upgrade finished"
    filter="$filter0"
  fi


  echo "Collecting BlueField booting status. Press Ctrl+C to stop…"

  # Enable CLEAR_ON_READ, so rshim log will be cleared after read.
  if [ $clear_on_read -eq 1 ]; then
    run_cmd_exit $mode "echo 'CLEAR_ON_READ 1' > ${rshim_node}/misc"
  fi

  # Set display level to 2 to show more information
  run_cmd_exit $mode "echo 'DISPLAY_LEVEL 2' > ${rshim_node}/misc"

  # Create log file with date
  echo "# $(date)" > ${LOG_FILE}

  last=""
  finished=0
  while [ $finished -eq 0 ]; do
    last_len=${#last}
    cmd_get_log="cat ${rshim_node}/misc | sed -n '/^ INFO/,\$p'"
    cur=$(run_cmd_exit $mode "$cmd_get_log")
    cur_len=${#cur}

    sleep 1

    if echo ${cur} | grep -Ei "$filter" >/dev/null; then
      finished=1
    fi

    if echo ${cur} | grep -Ei "$filter0" >/dev/null; then
      # Disable CLEAR_ON_READ.
      run_cmd_exit $mode "echo 'CLEAR_ON_READ 0' > ${rshim_node}/misc"
    fi

    # Overwrite if current length smaller than previous length.
    if [ ${last_len} -eq 0 -o ${last_len} -gt ${cur_len} ]; then
        echo "${cur}" | sed '/^[[:space:]]*$/d' | tee -a ${LOG_FILE}
      last="${cur}"
      continue
    fi

    # Overwrite if first portion does not match.
    sub_cur=$(echo "${cur}" | dd bs=1 count=${last_len} 2>/dev/null)
    if [ "${sub_cur}" != "${last}" ]; then
      echo "${cur}" | sed '/^[[:space:]]*$/d' | tee -a ${LOG_FILE}
      last="${cur}"
      continue
    fi

    # Nothing if no update.
    if [ ${last_len} -eq ${cur_len} ]; then
      [ $finished -eq 0 ] && continue;
    fi

    # Print the diff.
    echo "${cur}" | dd bs=1 skip=${last_len} 2>/dev/null | \
      sed '/^[[:space:]]*$/d' | tee -a ${LOG_FILE}

    last="${cur}"
  done

  # Disable CLEAR_ON_READ.
  run_cmd_exit $mode "echo 'CLEAR_ON_READ 0' > ${rshim_node}/misc"
}

apply_chip_settings()
{
  local chip

  chip=$(run_cmd_exit $mode "cat ${rshim_node}/misc | grep DEV_INFO")
  chip=$(echo "${chip}" | awk '{print $2}')
  chip=${chip:0:11}

  if [ ."${chip}" == ."BlueField-1" -o ."${chip}" == ."BlueField-2" ]; then
    RSH_BOOT_FIFO_COUNT=0x00010488
    RSH_BOOT_FIFO_DATA=0x00010408
    RSH_SCRATCHPAD2=0x00010c28
    RSH_SWINT=0x00010318
    RSH_BREADCRUMB1=0x00010518
  fi
}

# Clean up function whenever the script exits
# shellcheck disable=SC2317
cleanup() {
  local sp2

  # Remove the temp directory.
  rm -rf $TMP_DIR

  # prevent cleanup from being called multiple times
  if [ "$cleanup_started" -eq 1 ]; then
    exit 1
  fi
  cleanup_started=1

  if [ "$?" -ne 0 ]; then
    echo "BlueField Update Failed"
  fi

  # Kill all netcat related processes on both ends
  if [ $run_cmd_local_ready -eq 1 ]; then
    run_cmd local "pgrep -x nc >/dev/null && pgrep -x nc | xargs kill -9"
  fi
  if [ $run_cmd_remote_ready -eq 1 ]; then
    run_cmd remote "pgrep -x nc >/dev/null && pgrep -x nc | xargs kill -9"
    if [ $remote_mode == "nc" ] || [ $remote_mode == "ncpipe" ]; then
      run_cmd remote \
        "pgrep pipe_to_rshim >/dev/null && pgrep pipe_to_rshim | xargs kill -9"
      run_cmd remote "rm -f $RSHIM_PIPE"
    fi
  fi

  if [ $runtime -eq 1 ]; then
    # Reset to default state.
    echo "BOOT_RESET_SKIP 0" > ${rshim_node}/misc

    # Cleanup SP2.BIT2.
    sp2=`${BF_REG} $(basename ${rshim_node}) ${RSH_SCRATCHPAD2}.64 | awk '{print $3}'`
    sp2=$((sp2 & ~4))
    ${BF_REG} $(basename ${rshim_node}) ${RSH_SCRATCHPAD2}.64 $sp2 >/dev/null

    # Set SWINT2.BIT2 for DPU mode
    if [ ${nic_mode} -eq 0 ]; then
      ${BF_REG} $(basename ${rshim_node}) ${RSH_SWINT}.64 0x4 >/dev/null
    fi
  fi

  # Disable CLEAR_ON_READ.
  if [ $mode == "local" ] && [ $run_cmd_local_ready -eq 1 ]; then
    run_cmd_exit local "echo 'CLEAR_ON_READ 0' > ${rshim_node}/misc"
  fi
  if [ $mode == "remote" ] && [ $run_cmd_remote_ready -eq 1 ]; then
    run_cmd_exit remote "echo 'CLEAR_ON_READ 0' > ${rshim_node}/misc"
  fi

  # Restore the original binding states for PF0 and PF1
  if [ $nic_mode -eq 1 -a -n "${pcie_bd}" -a ${runtime} -eq 0 ]; then
    for i in 0 1; do
      if eval "[ \${pf${i}_bound} -eq 0 ]"; then
        [ $verbose -eq 1 ] && echo "Re-binding: skipping originally unbound pf${i} (${pcie_bd}.${i})"
        continue
      fi
      if [ ! -e /sys/bus/pci/drivers/mlx5_core/${pcie_bd}.${i} ]; then
        echo "Binding pf${i} (${pcie_bd}.${i})"
        run_cmd_exit local "echo ${pcie_bd}.${i} > /sys/bus/pci/drivers/mlx5_core/bind"
      fi
    done
  fi
}

# Main

default_remote_mode=scp
default_nc_port=9527    # default nc server port for nc* methods

bfb=
pldm=
cfg=
rootfs=
mode=local        # Values can be local or remote
remote_mode=      # Values can be scp, nc, or ncpipe
rshim=            # rshim device string, format [<ip>:<port>:]rshim<N>
runtime=0         # Values can be 0 or 1.
verbose=0         # Values can be 0 or 1.
reverse_nc=0      # Values can be 0 or 1.
clear_on_read=1   # Values can be 0 or 1.
num_bfb=0
num_rshim=0
max_bfb=1
max_rshim=1
lfwp=0
activate=

rshim_node=       # rshim device identifier, e.g. rshim0
ip=               # IP address for remote host
port=

cleanup_started=0
trap cleanup EXIT INT TERM

nic_mode=0     # Flag to indicate whether DPU in NIC mod or not
pcie_bd=""        # PCIE Bus-Device
pf0_bound=0       # PF0 is bound prior to the script run
pf1_bound=0       # PF1 is bound prior to the script run

options=`getopt -n bfb-install -o a:b:c:f:hklm:p:r:Ruv \
        -l activate:,bfb:,config:,rootfs:,help,keep-log,lfwp,remote-mode:,reverse-nc,rshim:,pldm:,runtime,verbose \
        -- "$@"`
if [ $? != 0 ]; then echo "Command line error" >&2; exit 1; fi
eval set -- $options
while [ "$1" != -- ]; do
  case $1 in
    --activate|-a) shift; activate=$1 ;;
    --bfb|-b) shift; bfb=$(readlink -f $1) num_bfb=$((num_bfb + 1));;
    --config|-c) shift; cfg=$1 ;;
    --rootfs|-f) shift; rootfs=$1 ;;
    --help|-h) usage; exit 0 ;;
    --keep-log|-k) clear_on_read=0 ;;
    --lfwp|-l) lfwp=1; runtime=1 ;;
    --pldm|-p) shift; pldm=$(readlink -f $1) ;;
    --remote-mode|-m) shift; remote_mode=$1 ;;
    --rshim|-r) shift; rshim=$1 num_rshim=$((num_rshim + 1));;
    --reverse-nc|-R) reverse_nc=1 ;;
    --runtime|-u) runtime=1 ;;
    --verbose|-v) verbose=1 ;;
    --) shift; break;;
    *) echo "Error: Invalid argument: $1" >&2; usage >&2; exit 1 ;;
  esac
  shift
done

# Parameter checks

# Default activate to the lfwp value.
activate=${activate:-$lfwp}

if [ -z "${bfb}" -a -z "${pldm}" -a ${activate} -eq 0 ]; then
  echo "Error: Need to provide either bfb or pldm file."
  usage >&2
  exit 1
fi

if [ -n "${bfb}" -a -n "${pldm}" ]; then
  echo "Error: Can't provide both bfb and pldm file."
  usage >&2
  exit 1
fi

# Check if bfb and rshim are set and non-empty
if [ -z "${rshim}" ]; then
  echo "Error: Need to provide rshim device name."
  usage >&2
  exit 1
fi

# Check if bfb and rshim options are valid or not
if [ ${num_bfb} -gt ${max_bfb} ]; then
  echo "Error: More than one bfb image provided"
  usage >&2
  exit 1
fi

if [ ${num_rshim} -gt ${max_rshim} ]; then
  echo "Error: More than one rshim device provided"
  usage >&2
  exit 1
fi

# Parse rshim for IP, optional port, and device identifier
if echo "$rshim" | grep -q ':'; then
  mode=remote
  remote_mode=${remote_mode:-$default_remote_mode}
  ip=$(echo "$rshim" | cut -d':' -f1 | tr -d '\n')

  # Attempt to extract a potential port number
  potential_port=$(echo "$rshim" | cut -s -d':' -f2)
  # Attempt to extract a potential rshim device identifier
  potential_rshim_node=$(echo "$rshim" | cut -s -d':' -f3)

  if [ -n "$potential_rshim_node" ]; then
    # If there's a third field, it's clearly the rshim device, and the second
    # field is the port
    port=$potential_port
    rshim_node=$potential_rshim_node
  else
    # If there's no third field, the second field could be either the port or
    # the rshim device
    if echo "$potential_port" | grep -qE '^[[:digit:]]+$'; then
      # If the second field is numeric, it's the port, and the rshim device is
      # missing
      port=$potential_port
      # This scenario implies a malformed rshim argument as the rshim device
      # identifier is missing
      echo "Error: Missing rshim device identifier." >&2
      usage >&2
      exit 1
    else
      # The second field is not numeric, so it's the rshim device
      rshim_node=$potential_port
    fi
  fi
else
  # Local mode, rshim_node is directly the value of rshim
  rshim_node=$rshim

  # Adjust log file to be per rshim
  LOG_FILE=${LOG_FILE%.*}-$(basename $rshim_node).log
fi

# We don't allow remote modes for local rshim
if [ $mode == "local" ] && [ -n "$remote_mode" ]; then
  echo "Error: Remote mode is not supported for local rshim."
  exit 1
fi

if [ $mode == "remote" ] ; then
  if [ $runtime -eq 1 ]; then
    echo "Error: runtime upgrade is only supported for local rshim."
    exit 1
  fi

  # convert potential host name to IP address
  ip=$(getent ahosts $ip | awk '{print $1}' | head -n 1)

  # We don't allow localhost for remote modes
  if [ $ip == "127.0.0.1" ]; then
    echo "Error: localhost is not supported for remote mode."
    exit 1
  fi

  # Check allowed remote modes
  if [ $remote_mode == "scp" ]; then
    # We don't support port selection for scp mode
    if [ -n "$port" ]; then
      echo "Error: Port selection is not supported for scp mode."
      usage >&2
      exit 1
    fi
  elif [ $remote_mode == "nc" ] || [ $remote_mode == "ncpipe" ]; then
    port=${port:-$default_nc_port}
    if ! echo "$port" | grep -qE '^[0-9]+$'; then
      echo "Error: Invalid port number: $port" >&2
      usage >&2
      exit 1
    fi
  else
    echo "Error: Invalid remote mode: $remote_mode"
    usage >&2
    exit 1
  fi
fi

# Check if rshim_node starts with "/" and add "/dev/" if not
if [ ."$(echo "${rshim_node}" | cut -c1-1)" != ."/" ]; then
  rshim_node="/dev/${rshim_node}"
fi

if [ $verbose -eq 1 ]; then
  echo "Updating BlueField with $mode RSHIM"
  echo "  BFB file: $bfb"
  [ -n "$cfg" ] && echo "  Config File:: $cfg"
  [ -n "$rootfs" ] && echo "  Rootfs File: $rootfs"
  if [ "$mode" = "remote" ]; then
    echo "  Remote Update Mode: $remote_mode"
    echo "  Remote Host IP: $ip"
    if [ "$remote_mode" = "nc" ] || [ "$remote_mode" = "ncpipe" ]; then
      [ -n "$port" ] && echo "  Remote port: $port"
      [ "$reverse_nc" -eq 1 ] && echo "  Using reverse netcat mode"
    fi
  fi
  echo "  RSHIM Device Node: $rshim_node"
fi

# Root access check.
check_root_cmd="[ \$(id -u) -eq 0 ]"
echo "Checking if local host has root access..."
if ! eval "$check_root_cmd"; then
  echo "  Warning: No host root access. Trying sudo"
  sudo_prefix="sudo"
fi

# rshim is ready to use 'run_cmd_exit local xxx'.
run_cmd_local_ready=1

# Setup checks

# Check BF chip version and adjust register offsets.
apply_chip_settings

# Check NIC mode and unbind mlx5_core driver in NIC mode.
check_nic_mode

# Check PLDM and convert it into BFB.
if [ -n "${pldm}" ]; then
  if [ $mode == "remote" ] ; then
    echo "Error: pldm is only for local rshim."
    exit 1
  fi

  if [ ! -e "${pldm}" ]; then
    echo "Error: ${pldm} not found."
    exit 1
  fi

  if [ ! -d "${TMP_DIR}" ]; then
    echo "Error: TMP_DIR not found."
    exit 1
  fi

  # PLDM automatically indicate runtime.
  runtime=1

  pldm_nicfw=""
  pldm_bfb=""
  mkdir ${TMP_DIR}/pldm
  fwpkg_unpack.py --unpack --outdir ${TMP_DIR}/pldm ${pldm}
  for i in ${TMP_DIR}/pldm/*.bin; do
    if [ ! -e "${i}" ]; then
      echo "Error: unable to unpack ${pldm}."
      exit 1
    fi

    sig=$(head -c 4 < ${i})

    if [ ."${sig:0:2}" = ."Bf" ]; then
      pldm_bfb="${i}"
    elif [ ."${sig}" = ."MTFW" ]; then
      pldm_nicfw="${i}"
    else
      printf "\\x4d\\x54\\x46\\x57\\xab\\xcd\\xef\\x00\\xfa\\xde\\x12\\x34\\x56\\x78\\xde\\xad" \
        > ${TMP_DIR}/pldm/nicfw_header
      dd if="${i}" of=${TMP_DIR}/pldm/nicfw_body bs=16 skip=1
      cat ${TMP_DIR}/pldm/nicfw_header ${TMP_DIR}/pldm/nicfw_body > ${TMP_DIR}/pldm/nicfw.bin
      pldm_nicfw=${TMP_DIR}/pldm/nicfw.bin
    fi
  done

  if [ -z "${pldm_nicfw}" -a -z "${pldm_bfb}" ]; then
    echo "Error: unable to find bfb or nicfw in ${pldm}."
    exit 1
  fi

  if ! mlx-mkbfb ${pldm_nicfw:+--nicfw ${pldm_nicfw}} ${cfg:+--boot-args ${cfg}} ${pldm_bfb} ${TMP_DIR}/pldm/pldm.bfb; then
    echo "Error: unable to create bfb from pldm"
    exit 1
  fi

  pldm=""
  bfb="${TMP_DIR}/pldm/pldm.bfb"
elif [ ${runtime} -eq 1 -a -e "${bfb}" ]; then
  # Convert bundle BFB to flat BFB if needed.
  # This conversion is only supported on PCIe host.
  is_bundle=$(mlx-mkbfb -d "${bfb}" | grep "In-memory filesystem")
  if [ -n "${is_bundle}" -a -n "$pcie_bd" ]; then
    echo "Convert $(basename "${bfb}") to flat format for runtime upgrade"
    if ! which flint &> /dev/null; then
      echo "Error: flint not found."
      exit 1
    fi
    psid=$(flint -d "$pcie_bd".0 q | grep PSID | awk '{print $2}')
    if [ -z "${psid}" ]; then
      echo "Error: failed to get PSID."
      exit 1
    fi
    mkdir ${TMP_DIR}/bfb
    bfb-tool repack --bfb "${bfb}" --psid ${psid} \
      --output-dir ${TMP_DIR}/bfb --output-format flat \
      --output-bfb flat.bfb
    if [ $? -ne 0 ]; then
        echo "Error: Failed to create BFB in flat format"
        exit 1
    fi
    bfb=$(basename "${bfb}")
    bfb=${bfb%.*}
    bfb_path=${TMP_DIR}/bfb/"${bfb}"/${psid}
    bfb="${bfb_path}"/flat.bfb
    if [ ! -e "${bfb}" ]; then
        echo "Error: ${bfb} not found"
        exit 1
    fi

    # Replace config file if provided.
    if [ -n "${cfg}" -a -e "${cfg}" ]; then
      mlx-mkbfb --boot-args ${cfg} ${bfb} "${bfb_path}"/flat-cfg.bfb
      bfb="${bfb_path}"/flat-cfg.bfb
    fi
  fi
fi

# Check again if bfb file exists (if not activate-only).
if [ ! -e "${bfb}" -a ${activate} -eq 0 ]; then
  echo "Error: ${bfb} not found."
  exit 1
fi

# Check if rootfs exists if set
if [ -n "${rootfs}" ] && [ ! -e "${rootfs}" ]; then
  echo "Error: ${rootfs} not found."
  exit 1
fi

# Check if cfg exists if set
if [ -n "${cfg}" ] && [ ! -e "${cfg}" ]; then
  echo "Error: ${cfg} not found."
  exit 1
fi

rshim_check_cmd="[ -e ${rshim_node}/boot ]"

if [ $mode == "local" ]; then
  run_cmd_exit local "$check_root_cmd" \
    "Error: current login does not have sudo"

  echo "Checking if rshim driver is running locally..."
  run_cmd_exit local "$rshim_check_cmd" \
    "Error: rshim driver not found at $rshim"
fi

if [ $mode == "remote" ]; then
  echo "Checking if remote host is reachable..."
  ping_cmd="ping -c 1 $ip >/dev/null 2>&1"
  run_cmd_exit local "$ping_cmd"

  echo "Checking if Remote has SSH server running..."
  run_cmd_exit local "nc -z $ip 22" \
    "Error: Remote does not have SSH server running"

  echo "Checking if Remote has password-less root SSH access..."
  if ! ssh -o BatchMode=yes -o ConnectTimeout=5 root@$ip "exit"; then
    echo "Error: Remote does not have password-less (public key authentication) root SSH access"
    exit 1
  fi

  run_cmd_remote_ready=1

  echo "Checking if rshim driver is running remotely..."
  run_cmd_exit remote "$rshim_check_cmd" \
    "Error: remote rshim driver not found"

  echo "Lowering the priority of the remote rshim process..."
  run_cmd_exit remote "renice -n 19 -p \$(pgrep rshim)"  \
    "Error: Failed to lower the priority of the remote rshim process"

  if [ $remote_mode == "nc" ] || [ $remote_mode == "ncpipe" ]; then
    echo "Checking if local netcat is installed..."
    run_cmd_exit local "command -v nc > /dev/null" \
      "Error in $mode mode: Netcat is not installed locally"

    echo "Checking if remote netcat is installed..."
    run_cmd_exit remote "command -v nc > /dev/null" \
      "Error in $mode mode: Netcat is not installed remotely"

    # Try to test-connect the netcat port to see if it's available
    if [ $reverse_nc -eq 1 ]; then
      echo "Checking if remote netcat port $port is available..."
      run_cmd_exit remote "! nc -z $ip $port" \
        "Error: remote netcat port $port is not available"
    else
      echo "Checking if local netcat port $port is available..."
      run_cmd_exit local "! nc -z localhost $port" \
        "Error: local netcat port $port is not available"
    fi
  fi
fi

# Check the pv tool.
pv=$(which pv 2>/dev/null)
if [ -z "${pv}" ]; then
  echo "Warn: 'pv' command not found. Continue without showing BFB progress."
fi

if [ ${nic_mode} -eq 1 -a -n "${pcie_bd}" -a ${runtime} -eq 0 ]; then
  # Set BREADCRUMB.BIT32 to indicate NIC mode.
  breadcrumb1=$(${BF_REG} $(basename ${rshim_node}) ${RSH_BREADCRUMB1}.64 | awk '{print $3}')
  breadcrumb1=$((breadcrumb1 | (0x1 << 32)))
  breadcrumb1=$(printf "0x%x\n" $breadcrumb1)
  ${BF_REG} $(basename ${rshim_node}) ${RSH_BREADCRUMB1}.64 ${breadcrumb1} >/dev/null

  for i in 0 1; do
    if [[ ! -e /sys/bus/pci/drivers/mlx5_core/${pcie_bd}.${i} ]]; then
      [ ${verbose} -eq 1 ] && echo "Unbinding: Skipping originally unbound pf${i} (${pcie_bd}.${i})"
      continue
    fi
    eval "pf${i}_bound=1"
    echo "Unbinding pf${i} (${pcie_bd}.${i})"
    run_cmd_exit local "echo ${pcie_bd}.${i} > /sys/bus/pci/drivers/mlx5_core/unbind"
  done
fi

# Reactivate NIC_FW if runtime but not LFWP.
if [ ${lfwp} -eq 0 -a -n "${pcie_bd}" -a ${runtime} -eq 1 ]; then
  if which flint &> /dev/null; then
    # Suppress errors if already activated.
    flint -d ${pcie_bd}.0 ir >&/dev/null
  else
    echo "Flint not found. Skip NIC_FW reactivation."
  fi
fi

# Push BFB and wait for result.
if [ -e "${bfb}" ]; then
  push_boot_stream
  wait_for_update_to_finish
fi

# LFWP activation on PCIe host.
if [ ${lfwp} -eq 1 -a ${activate} -eq 1 ]; then
  if [ -z "${pcie_bd}" ]; then
    echo "ERROR: Failed to activate LFWP, PCIe device not found."
    exit 1
  fi

  if ! which mlxfwreset &> /dev/null; then
    echo "ERROR: Failed to activate LFWP, mlxfwreset not found."
    exit 1
  fi

  # Best-effort to check and apply L0 reset.
  if (mlxfwreset -d ${pcie_bd}.0 q | grep live-Patch | grep -qw "\-Supported"); then
    echo "Live Patch NIC Firmware reset is supported."
    msg=$(mlxfwreset -d ${pcie_bd}.0 -y -l 0 r 2>&1)
    if [ $? -ne 0 ]; then
      echo "ERROR: Live Patch NIC Firmware reset failed. $msg"
    else
      echo "Live Patch NIC Firmware reset done"
    fi
  else
    echo "Live Patch NIC Firmware reset not supported."
  fi
fi
