# ex:ts=4:sw=4:sts=4:expandtab:set syntax=sh
#
# Copyright (c) 2013 Mellanox Technologies. All rights reserved.
# Copyright (c) 2010 QLogic Corporation. All rights reserved.
#
# This Software is licensed under one of the following licenses:
#
# 1) under the terms of the "Common Public License 1.0" a copy of which is
#    available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/cpl.php.
#
# 2) under the terms of the "The BSD License" a copy of which is
#    available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/bsd-license.php.
#
# 3) under the terms of the "GNU General Public License (GPL) Version 2" a
#    copy of which is available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/gpl-license.php.
#
# Licensee has the right to choose one of the above licenses.
#
# Redistributions of source code must retain the above copyright
# notice and one of the license notices.
#
# Redistributions in binary form must reproduce both the above copyright
# notice, one of the license notices in the documentation
# and/or other materials provided with the distribution.

log_msg()
{
    logger -i "openibd: $@"
}

cleanup()
{
    /bin/rm -f /var/run/mlx_os_booting &>/dev/null
}

read_config() {
    # config: /etc/infiniband/openib.conf
    OPENIBD_CONFIG=${OPENIBD_CONFIG:-"/etc/infiniband/openib.conf"}
    CONFIG=$OPENIBD_CONFIG
    export LANG="C"

    if [ ! -f $CONFIG ]; then
        echo No InfiniBand configuration found
        exit 0
    fi

    . $CONFIG
}

set_wd() {
    cd /etc/infiniband
}

init_environment() {
    PATH=$PATH:/sbin:/usr/bin:/lib/udev
    if [ -e /etc/profile.d/ofed.sh ]; then
        . /etc/profile.d/ofed.sh
    fi

    # Allow calling the service script with the option 'stop' for unloading the driver stack.
    # This flag should be disabled when the OS root file system is on remote storage.
    ALLOW_STOP=${ALLOW_STOP:-"yes"}

    # Run the service script with force mode to enable loading the driver stack even
    # if the available modules were not installed by MLNX_OFED package.
    FORCE_MODE=${FORCE_MODE:-"no"}

    OPENIBD_PRE_START=${OPENIBD_PRE_START:-"/etc/infiniband/pre-start-hook.sh"}
    OPENIBD_POST_START=${OPENIBD_POST_START:-"/etc/infiniband/post-start-hook.sh"}
    OPENIBD_PRE_STOP=${OPENIBD_PRE_STOP:-"/etc/infiniband/pre-stop-hook.sh"}
    OPENIBD_POST_STOP=${OPENIBD_POST_STOP:-"/etc/infiniband/post-stop-hook.sh"}

    max_ports_num_in_hca=0
    FORCE=0
    WARNED_INBOX_LOAD=0
    RUN_SYSCTL=${RUN_SYSCTL:-"no"}

    ipoib_ha_pidfile=/var/run/ipoib_ha.pid
    srp_daemon_pidfile=/var/run/srp_daemon.pid
    _truescale=/etc/infiniband/truescale.cmds
}

# Only needed on Xenserver, but harmless elsewhere
init_environment_xenserver() {
    XE="/opt/xensource/bin/xe"
    INTERFACE_RENAME="/etc/sysconfig/network-scripts/interface-rename.py"
    INTERFACE_RECONFIGURE="/opt/xensource/libexec/interface-reconfigure"
}

set_start_time() {
    start_time=$(date +%s | tr -d '[:space:]')
}

set_run_mode() {
    local systemd_auto bootID base link

    # Only use ONBOOT option if called by a runlevel directory.
    # Therefore determine the base, follow a runlevel link name ...
    systemd_auto=0
    bootID=${1##*=}
    if [ "X$bootID" != "X" ]; then
        last_bootID=$(cat /var/run/openibd.bootid 2>/dev/null)
        echo $bootID > /var/run/openibd.bootid
        if [ "X$last_bootID" == "Xmanual" ]; then
        log_msg "first manual run after installation"
        elif [[ "X$last_bootID" == "X" || "X$last_bootID" != "X$bootID" ]]; then
        systemd_auto=1
        fi
    fi

    base=${0##*/}
    link=${base#*[SK][0-9][0-9]}
    # ... and compare them
    if [[ $link == $base && "$0" != "/etc/rc.d/init.d/openibd" && $systemd_auto -eq 0 ]] ; then
        RUNMODE=manual
        ONBOOT=yes
        log_msg "running in manual mode"
    else
        RUNMODE=auto
        log_msg "running in auto mode"
        echo "$start_time" 2>/dev/null > /var/run/mlx_os_booting
    fi

    # Check if OpenIB configured to start automatically
    if [ "X${ONBOOT}" != "Xyes" ]; then
        log_msg "running in auto mode and ONBOOT=no --> exiting"
        cleanup
        exit 0
    fi

    # Existed even in the deb script:
    if ( grep -i 'SuSE Linux' /etc/issue /etc/os-release >/dev/null 2>&1 ); then
        if [ -n "$INIT_VERSION" ] ; then
        # MODE=onboot
            if LANG=C egrep -L "^ONBOOT=['\"]?[Nn][Oo]['\"]?" ${CONFIG} > /dev/null ; then
                cleanup
                exit 0
            fi
        fi
    fi
}

set_modprobe_ip() {
    # Allow unsupported modules, if disallowed by current configuration
    modprobe=/sbin/modprobe
    if ${modprobe} -c | grep -q '^allow_unsupported_modules  *0'; then
        modprobe="${modprobe} --allow-unsupported-modules"
    fi

    if [ -e /sbin/ip ]; then
        ip=/sbin/ip
    elif [ -e /bin/ip ]; then
        ip=/bin/ip
    else
        ip=ip
    fi
}

set_action() {
    ACTION=$1
    shift
    ORIG_ACTION=$ACTION

    if [[ "$ACTION" =~ force-.* ]]; then
        FORCE=1
        ACTION="${ACTION#force-}"
    fi

    if [ "X${FORCE_MODE}" == "Xyes" ]; then
        FORCE=1
    fi
}

is_serial()
{
    if [ "$CONSOLETYPE" = 'serial' ]; then
        return 0
    fi
    case `tty` in ttyS0)
        return 0
        ;;
    esac
    return 1
}

init_color_print() {
    # Get a sane screen width
    [ -z "${COLUMNS:-}" ] && COLUMNS=80

    # Read in our configuration
    if [ -n "${BOOTUP:-}" ]; then
        return
    fi
    # Copied from old initscripts package of RHEL:
    BOOTUP=color
    RES_COL=60
    MOVE_TO_COL="echo -en \\033[${RES_COL}G"
    SETCOLOR_SUCCESS="echo -en \\033[1;32m"
    SETCOLOR_FAILURE="echo -en \\033[1;31m"
    SETCOLOR_WARNING="echo -en \\033[1;33m"
    SETCOLOR_NORMAL="echo -en \\033[0;39m"
    LOGLEVEL=1
    if  is_serial; then
        BOOTUP=serial
        MOVE_TO_COL=
        SETCOLOR_SUCCESS=
        SETCOLOR_FAILURE=
        SETCOLOR_WARNING=
        SETCOLOR_NORMAL=
      fi
}

echo_success() {
  echo -n $@
  [ "$BOOTUP" = "color" ] && $MOVE_TO_COL
  echo -n "[  "
  [ "$BOOTUP" = "color" ] && $SETCOLOR_SUCCESS
  echo -n $"OK"
  [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL
  echo -n "  ]"
  echo -e "\r"
  return 0
}

echo_done() {
  echo -n $@
  [ "$BOOTUP" = "color" ] && $MOVE_TO_COL
  echo -n "[  "
  [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL
  echo -n $"done"
  [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL
  echo -n "  ]"
  echo -e "\r"
  return 0
}

echo_failure() {
  echo -n $@
  [ "$BOOTUP" = "color" ] && $MOVE_TO_COL
  echo -n "["
  [ "$BOOTUP" = "color" ] && $SETCOLOR_FAILURE
  echo -n $"FAILED"
  [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL
  echo -n "]"
  echo -e "\r"
  return 1
}

echo_warning() {
  echo -n $@
  [ "$BOOTUP" = "color" ] && $MOVE_TO_COL
  echo -n "["
  [ "$BOOTUP" = "color" ] && $SETCOLOR_WARNING
  echo -n $"WARNING"
  [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL
  echo -n "]"
  echo -e "\r"
  return 1
}

count_ib_ports()
{
    local cnt=0
    local ports_in_hca=0
    sysdir=/sys/class/infiniband
    hcas=$(/bin/ls -1 ${sysdir} 2> /dev/null)
    for hca in $hcas
    do
        ports_in_hca=$(/bin/ls -1 ${sysdir}/${hca}/ports 2> /dev/null | wc -l)
        if [ $ports_in_hca -gt $max_ports_num_in_hca ]; then
                max_ports_num_in_hca=$ports_in_hca
        fi
        cnt=$[ $cnt + $ports_in_hca ]
    done

    echo $cnt
}

# Deb only
get_interface_conf_files()
{
    conf_files=/etc/network/interfaces
    if (grep -w source /etc/network/interfaces 2>/dev/null | grep -qvE "^\s*#" 2>/dev/null); then
        # get absolute file paths
        for line in $(grep -w source /etc/network/interfaces 2>/dev/null | grep -vE "^\s*#" 2>/dev/null);
        do
            ff=$(echo "$line" | awk '{print $NF}')

            # check if it's absolute path
            if [ -f "$ff" ]; then
                conf_files="$conf_files $ff"
                continue
            fi

            # check if it's relative path
            if [ -z "$(ls $ff 2>/dev/null)" ]; then
                ff="/etc/network/$ff"
            fi

            # support wildcards
            for file in $(ls -1 $ff 2>/dev/null)
            do
                if [ -f "$file" ]; then
                    conf_files="$conf_files $file"
                fi
            done
        done
    fi
}

check_mlnx_ofed_module() {
    local modinfo_output
    modinfo_output=`modinfo -Fdepends "$1" 2>/dev/null`
    if [ $? = 0 ]; then
        if echo "$modinfo_output" | grep -q mlx_compat; then
            echo "yes"
            return 0
        fi
    fi
    echo "no"
    return 1
}

# This involves running code. Don't do that unless running 'start'
set_module_load_defaults() {
    MLX5_LOAD=${MLX5_LOAD:-`check_mlnx_ofed_module mlx5_core || :`}
    UMAD_LOAD=${UMAD_LOAD:-`check_mlnx_ofed_module ib_umad || :`}
    UVERBS_LOAD=${UVERBS_LOAD:-`check_mlnx_ofed_module ib_uverbs || :`}
    IPOIB_LOAD=${IPOIB_LOAD:-`check_mlnx_ofed_module ib_ipoib || :`}
    RDMA_CM_LOAD=${RDMA_CM_LOAD:-`check_mlnx_ofed_module rdma_cm || :`}
    RDMA_UCM_LOAD=${RDMA_UCM_LOAD:-`check_mlnx_ofed_module rdma_ucm || :`}
}

# FIXME: it's trivial to cache the results of those two,
# but let's keep things simple for now.
is_deb() {
    grep -q '^ID=.*debian$' /etc/os-release 2>/dev/null
}

is_sles() {
    grep -q suse /etc/os-release 2>/dev/null
}

# deb-specific
get_interface_conf_files()
{
    local conf_files

    conf_files=/etc/network/interfaces
    if (grep -w source /etc/network/interfaces 2>/dev/null | grep -qvE "^\s*#" 2>/dev/null); then
        # get absolute file paths
        for line in $(grep -w source /etc/network/interfaces 2>/dev/null | grep -vE "^\s*#" 2>/dev/null);
        do
            ff=$(echo "$line" | awk '{print $NF}')

            # check if it's absolute path
            if [ -f "$ff" ]; then
                conf_files="$conf_files $ff"
                continue
            fi

            # check if it's relative path
            if [ -z "$(ls $ff 2>/dev/null)" ]; then
                ff="/etc/network/$ff"
            fi

            # support wildcards
            for file in $(ls -1 $ff 2>/dev/null)
            do
                if [ -f "$file" ]; then
                    conf_files="$conf_files $file"
                fi
            done
        done
    fi
    echo "$conf_files"
}

get_static_interfaces() {
    local scripts_dir conf_files

    if is_deb; then
        conf_files=`get_interface_conf_files`
            grep -w "^iface" $conf_files 2>/dev/null \
            | cut -d" " -f"2" | tr -d "\"|\'"
        return
    fi
    scripts_dir="/etc/sysconfig/network-scripts"
    if is_sles; then
        scripts_dir="/etc/sysconfig/network"
    fi
    grep -E "NAME=|DEVICE=" $scripts_dir/ifcfg-* 2>/dev/null \
        | cut -d'=' -f'2' | tr -d "\"|\'"
}

set_static_ifaces_bootid_files() {
    local curr_bootid

    if [ "X$RUNMODE" != "Xmanual" ]; then
        return
    fi
    curr_bootid=$(cat /proc/sys/kernel/random/boot_id 2>/dev/null | sed -e 's/-//g')
    for i in `get_static_interfaces`; do
        echo $curr_bootid 2>/dev/null > /var/run/mlx_ifc-${i}.bootid
    done
    echo $curr_bootid 2>/dev/null > /var/run/mlx_ifc.manual
}

set_modules_to_load() {
    # Setting OpenIB start parameters
    POST_LOAD_MODULES=""
    if [ "X${SDP_LOAD}" == "Xyes" ]; then
        POST_LOAD_MODULES="$POST_LOAD_MODULES ib_sdp"
        IPOIB_LOAD="yes"
    fi

    IPOIB=0
    if [ "X${IPOIB_LOAD}" == "Xyes" ]; then
        IPOIB=1
    fi

    if [ "X${SRP_LOAD}" == "Xyes" ]; then
        POST_LOAD_MODULES="$POST_LOAD_MODULES ib_srp"
    fi

    if [ "X${RDMA_CM_LOAD}" == "Xyes" ]; then
        POST_LOAD_MODULES="$POST_LOAD_MODULES rdma_cm"
    fi

    if [ "X${RDMA_UCM_LOAD}" == "Xyes" ]; then
        POST_LOAD_MODULES="$POST_LOAD_MODULES rdma_ucm"
    fi

    UNLOAD_MODULES="ib_mthca mlx5_fpga_tools mlx5_ib mlx5_core mlx4_ib ib_ipath ipath_core ib_ehca iw_nes cxgb3i iw_cxgb3 cxgb3 iw_cxgb4 cxgb4i cxgb4"
    UNLOAD_MODULES="$UNLOAD_MODULES ib_qib mana_ib rnbd_client rnbd_server"
    UNLOAD_MODULES="$UNLOAD_MODULES eth_ipoib ib_ipoib mlx4_vnic ib_madeye ib_rds hns_roce"
    UNLOAD_MODULES="$UNLOAD_MODULES rds_rdma rds_tcp rds ib_ucm kdapl ib_srp_target scsi_target ib_srp ib_iser ib_sdp"
    UNLOAD_MODULES="$UNLOAD_MODULES rdma_ucm rdma_cm iw_cm ib_cm ib_local_sa findex"
    UNLOAD_MODULES="$UNLOAD_MODULES auxiliary mlxdevm mlx5_vdpa mlx5_vfio_pci"
    UNLOAD_MODULES="$UNLOAD_MODULES mlx5_fwctl fwctl mlx5_dpll"
    UNLOAD_MODULES="$UNLOAD_MODULES ib_sa ib_uverbs ib_umad ib_mad ib_core ib_addr ib_netlink rdma_rxe mlxfw vfio_mdev"

    STATUS_MODULES="rdma_ucm ib_srp ib_sdp rdma_cm ib_ipoib mlx5_core mlx5_ib ib_uverbs ib_umad ib_cm ib_core mlxfw"

    if check_mlnx_ofed_module scsi_transport_srp >/dev/null; then
        UNLOAD_MODULES="$UNLOAD_MODULES scsi_transport_srp"
        STATUS_MODULES="$STATUS_MODULES scsi_transport_srp"
    fi
    if check_mlnx_ofed_module cls_flower >/dev/null; then
        UNLOAD_MODULES="$UNLOAD_MODULES cls_flower"
    fi
}

get_interfaces()
{
    cd /sys/class/net; /bin/ls -d ib* 2> /dev/null
}

get_mlx_en_interfaces()
{
    local mlx_en_interfaces ethpath

    mlx_en_interfaces=""
    for ethpath in /sys/class/net/*
    do
        if (grep 0x15b3 ${ethpath}/device/vendor > /dev/null 2>&1); then
            #  FIXME: This condition is only used by XenServer
            # code. However it makes sense to check here for
            # mlx5_core, right?
            if [ ! -z "$1" ]; then
                if [ "$(basename `readlink -f ${ethpath}/device/driver/module`)" != "$1" ]; then
                    continue
                fi
            fi
            mlx_en_interfaces="$mlx_en_interfaces ${ethpath##*/}"
        fi
    done
    echo "$mlx_en_interfaces"
}

xe_get_uuid()
{
    $XE pif-list device=$1 2> /dev/null | grep "^uuid" | awk '{print $NF}'
}

xe_pif_forget()
{
    $XE pif-forget uuid=$1 > /dev/null 2>&1
}

xe_get_network_uuid()
{
    $XE network-list bridge=$1 2> /dev/null | grep "^uuid" | awk '{print $NF}'
}

xe_get_net_uuid_by_device()
{
    $XE pif-list device=$1 2> /dev/null | grep -w "network-uuid" | awk '{print $NF}' | sort -n | uniq
}

xe_network_destroy()
{
    $XE network-destroy uuid=$1 > /dev/null 2>&1
}

xe_remove_side_interfaces()
{
    sleep 2

    get_mlx_en_interfaces
    # Rename side interfaces
    if (echo $mlx_en_interfaces | grep -wq side); then
        if [ -x "$INTERFACE_RENAME" ]; then
            $INTERFACE_RENAME --rename > /dev/null 2>&1
        fi
    fi

    sleep 1

    # Re-read mlx4_en interfaces
    get_mlx_en_interfaces
    for i in $mlx_en_interfaces
    do
        for side_i in `$XE pif-list 2> /dev/null | grep -w side | grep -w $i | awk '{print $NF}'`
        do
            xe_pif_forget `xe_get_uuid $side_i`
        done
        for side_i in `$XE network-list 2> /dev/null | grep -w brside | grep -w $i | awk '{print $NF}'`
        do
            xe_network_destroy `xe_get_network_uuid $side_i`
        done
    done

    sleep 1
}

xe_replug_pif()
{
    $XE pif-unplug uuid=$1 > /dev/null 2>&1
    $XE pif-plug uuid=$1 > /dev/null 2>&1
}

xe_get_bridge()
{
    $XE network-list uuid=$1 2> /dev/null | grep -w bridge | awk '{print $NF}'
}

xe_rebuild_bond()
{
    bond_master_uuid=`$XE bond-param-list uuid=$1 2> /dev/null | grep -w master | awk '{print $NF}'`
    bond_mode=`$XE bond-param-list uuid=$1 2> /dev/null | grep -w mode | awk '{print $NF}'`
    bond_pif_uuids=`$XE bond-param-list uuid=$1 2> /dev/null | grep slaves | cut -d : -f 2- | sed -e "s/;//" -e "s/^\ //" -e "s/\ /,/"`
    bond_mac=`$XE pif-param-list uuid=$bond_master_uuid 2> /dev/null | grep MAC | awk '{print $NF}'`
    bond_network_uuid=`$XE pif-param-list uuid=$bond_master_uuid 2> /dev/null | grep network-uuid | awk '{print $NF}'`

    $XE bond-destroy uuid=$1 2> /dev/null
    $XE bond-create  mac=$bond_mac mode=$bond_mode network-uuid=$bond_network_uuid pif-uuids=$bond_pif_uuids > /dev/null 2>&1
}

xe_bond_recover()
{
    get_mlx_en_interfaces $1
    for bond_uuid in `$XE bond-list 2> /dev/null | grep "^uuid" | awk '{print $NF}'`
    do
        for i in $mlx_en_interfaces
        do
            for uuid_i in `xe_get_uuid $i`
            do
                if ($XE bond-list uuid=$bond_uuid 2> /dev/null | grep -w slaves | grep -wq $uuid_i); then
                    xe_rebuild_bond $bond_uuid
                    break
                fi
            done
        done
    done
}

# Returns PCI IDs of virtual functions used by Xen virtual machines
get_xen_vm_vf_pcis() {
    local pcis pci

    if ! $XE >/dev/null 2>&1; then
        return
    fi

    lspci_output=`lspci -D | grep Mellanox`
    # try Xen's xe instead
    pcis=$(
        for uuid in $(
            $XE vm-list power-state=running | awk '/^uuid/ {print $5}'
        )
        do
            $XE vm-param-list uuid=$uuid | awk '/ pci:/{print $6}'
        done
    )
    for pci in $pcis; do
        echo $pci | sed -e 's|[^/]*/||' -e 's|,[^/]*/| |' -e 's|;$||'
    done \
    | while read p_pci_id v_pci_id; do
        if [ `echo "$lspci_output" | egrep "^($p_pci_id|$v_pci_id) " | wc -l` -eq 2 ]; then
            echo "$v_pci_id"
        fi
    done
}

# If module $1 is loaded return - 0 else - 1
is_module()
{
    local RC

    /sbin/lsmod | grep -w "$1" > /dev/null 2>&1
    RC=$?

    return $RC
}

load_module()
{
    local module=$1
    local rc_lm=0
    local is_ofed
    local mod_file

    is_ofed=`check_mlnx_ofed_module $module || :`
    if [ "$is_ofed" != 'yes' ]; then
        if [ "$FORCE" = 1 ]; then
            if [ "$WARNED_INBOX_LOAD" = 0 ]; then
                WARNED_INBOX_LOAD=1
                echo_warning "Loading inbox modules ($module)"
                # But still load the modules
            fi
        else
            mod_file=`modinfo -n "$module" 2>/dev/null`
            if [ ! -e "$mod_file" ]; then
                echo_failure "Module not found: $module"
            else
                echo_failure "Avoid loading inbox module: $module"
            fi
            return 1
        fi
    fi

    ${modprobe} $module > /dev/null 2>&1
    rc_lm=$?
    if [ $rc_lm -eq 0 ]; then
        ARE_MODULES_LOADED="yes"
        MODULES_LOADED_STATUS="0"
    else
        echo_failure "Failed loading kernel module $module: "
        log_msg "ERROR: Failed loading kernel module $module."
    fi
    return $rc_lm
}

# Load an arbitrary external module w/o OFED-related checks
load_module_external()
{
    ${modprobe} $1 > /dev/null 2>&1
}

# Return module's refcnt
is_ref()
{
    local refcnt
    refcnt=`cat /sys/module/"$1"/refcnt 2> /dev/null`
    return $refcnt
}

get_sw_fw_info()
{
    INFO=/etc/infiniband/info
    OFEDHOME="/usr/local"
    if [ -x ${INFO} ]; then
        OFEDHOME=$(${INFO} | grep -w prefix | cut -d '=' -f 2)
    fi
    MREAD=$(which mstmread 2> /dev/null)

    # Get OFED Build id
    if [ -r ${OFEDHOME}/BUILD_ID ]; then
        echo  "Software"
        echo  "-------------------------------------"
        printf "Build ID:\n"
        cat ${OFEDHOME}/BUILD_ID
        echo  "-------------------------------------"
    fi

    # Get FW version
    if [ ! -x ${MREAD} ]; then
        return 1
    fi

    vendor="15b3"
    slots=$(lspci -n -d "${vendor}:" 2> /dev/null | grep -v "5a46" | cut -d ' ' -f 1)
    for mst_device in $slots
    do
        major=$($MREAD ${mst_device} 0x82478 2> /dev/null | cut -d ':' -f 2)
        subminor__minor=$($MREAD ${mst_device} 0x8247c 2> /dev/null | cut -d ':' -f 2)
        ftime=$($MREAD ${mst_device} 0x82480 2> /dev/null | cut -d ':' -f 2)
        fdate=$($MREAD ${mst_device} 0x82484 2> /dev/null | cut -d ':' -f 2)

        major=$(echo -n $major | cut -d x -f 2 | cut -b 4)
        subminor__minor1=$(echo -n $subminor__minor | cut -d x -f 2 | cut -b 3,4)
        subminor__minor2=$(echo -n $subminor__minor | cut -d x -f 2 | cut -b 5,6,7,8)
        echo
        echo "Device ${mst_device} Info:"
        echo "Firmware:"

        printf "\tVersion:"
        printf "\t$major.$subminor__minor1.$subminor__minor2\n"

        day=$(echo -n $fdate | cut -d x -f 2 | cut -b 7,8)
        month=$(echo -n $fdate | cut -d x -f 2 | cut -b 5,6)
        year=$(echo -n $fdate | cut -d x -f 2 | cut -b 1,2,3,4)
        hour=$(echo -n $ftime | cut -d x -f 2 | cut -b 5,6)
        min=$(echo -n $ftime | cut -d x -f 2 | cut -b 3,4)
        sec=$(echo -n $ftime | cut -d x -f 2 | cut -b 1,2)

        printf "\tDate:"
        printf "\t$day/$month/$year $hour:$min:$sec\n"
    done
}

# Create debug info
get_debug_info()
{
    trap '' 2 9 15
    if [ -x /usr/sbin/sysinfo-snapshot.py ]; then
        echo
        echo "Please run /usr/sbin/sysinfo-snapshot.py to collect the debug information"
        echo "and open an issue in the http://support.mellanox.com/SupportWeb/service_center/SelfService"
        echo
    elif [ -x /usr/sbin/sysinfo-snapshot.sh ]; then
        echo
        echo "Please run /usr/sbin/sysinfo-snapshot.sh to collect the debug information"
        echo "and open an issue in the http://support.mellanox.com/SupportWeb/service_center/SelfService"
        echo
    else
        DEBUG_INFO=/tmp/ib_debug_info.log
        /bin/rm -f $DEBUG_INFO
        touch $DEBUG_INFO
        echo "Hostname: `hostname -s`" >> $DEBUG_INFO
        test -e /etc/issue && echo "OS: `cat /etc/issue`" >> $DEBUG_INFO
        test -e /etc/os-release && echo "OS: `cat /etc/os-release`" >> $DEBUG_INFO
        echo "Current kernel: `uname -r`" >> $DEBUG_INFO
        echo "Architecture: `uname -m`" >> $DEBUG_INFO
        which gcc &>/dev/null && echo "GCC version: `gcc --version`"  >> $DEBUG_INFO
        echo "CPU: `cat /proc/cpuinfo | /bin/grep -E \"model name|arch\" | head -1`" >> $DEBUG_INFO
        echo "`cat /proc/meminfo | /bin/grep \"MemTotal\"`" >> $DEBUG_INFO
        echo "Chipset: `/sbin/lspci 2> /dev/null | head -1 | cut -d ':' -f 2-`" >> $DEBUG_INFO

        echo >> $DEBUG_INFO
        get_sw_fw_info >> $DEBUG_INFO
        echo >> $DEBUG_INFO

        echo >> $DEBUG_INFO
        echo "############# LSPCI ##############" >> $DEBUG_INFO
        /sbin/lspci 2> /dev/null >> $DEBUG_INFO

        echo >> $DEBUG_INFO
        echo "############# LSPCI -N ##############" >> $DEBUG_INFO
        /sbin/lspci -n 2> /dev/null >> $DEBUG_INFO

        echo >> $DEBUG_INFO
        echo "############# LSMOD ##############" >> $DEBUG_INFO
        /sbin/lsmod >> $DEBUG_INFO

        echo >> $DEBUG_INFO
        echo "############# DMESG ##############" >> $DEBUG_INFO
        /bin/dmesg >> $DEBUG_INFO

        if [ -r /var/log/messages ]; then
            echo >> $DEBUG_INFO
            echo "############# Messages ##############" >> $DEBUG_INFO
            tail -50 /var/log/messages >> $DEBUG_INFO
        fi

        echo >> $DEBUG_INFO
        echo "############# Running Processes ##############" >> $DEBUG_INFO
        /bin/ps -ef >> $DEBUG_INFO
        echo "##############################################" >> $DEBUG_INFO

        echo
        echo "Please open an issue in the http://support.mellanox.com/SupportWeb/service_center/SelfService and attach $DEBUG_INFO"
        echo
    fi
}

# Messes with host name, and being called unconditionally
ib_set_node_desc()
{
      # Wait while node's hostname is set
      NODE_DESC_TIME_BEFORE_UPDATE=${NODE_DESC_TIME_BEFORE_UPDATE:-10}
      local declare -i UPDATE_TIMEOUT=${NODE_DESC_UPDATE_TIMEOUT:-120}
      sleep $NODE_DESC_TIME_BEFORE_UPDATE
      # Reread NODE_DESC value
      . $CONFIG
      NODE_DESC=${NODE_DESC:-$(hostname -s)}
      while [ "${NODE_DESC}" == "localhost" ] && [ $UPDATE_TIMEOUT -gt 0 ]; do
          sleep 1
          . $CONFIG
          NODE_DESC=${NODE_DESC:-$(hostname -s)}
          let UPDATE_TIMEOUT--
      done
      # Add node description to sysfs
      ibsysdir="/sys/class/infiniband"
      if [ -d ${ibsysdir} ]; then
          declare -i hca_id=1
          for hca in ${ibsysdir}/*
          do
              if [ -e ${hca}/node_desc ]; then
                  log_msg "Set node_desc for $(basename $hca): ${NODE_DESC} HCA-${hca_id}"
                  echo -n "${NODE_DESC} HCA-${hca_id}" >> ${hca}/node_desc
              fi
              let hca_id++
          done
      fi
}

is_ivyb()
{
    cpu_family=`/usr/bin/lscpu 2>&1 | grep "CPU family" | cut -d':' -f 2 | sed -e 's/ //g'`
    cpu_model=`/usr/bin/lscpu 2>&1 | grep "Model:" | cut -d':' -f 2 | sed -e 's/ //g'`

    case "${cpu_family}_${cpu_model}" in
        6_62)
        return 0
        ;;
        *)
        return 1
        ;;
    esac
}

ivyb_fix_sb_registers() {
    local ivyb_slots ivyb_slot

    if ! is_ivyb; then
        return
    fi

    # Clear SB registers on IvyB machines
    ivyb_slots=`lspci -n | grep -w '8086:0e28' | cut -d ' ' -f 1`
    for ivyb_slot in $ivyb_slots
    do
        if [ "0x`setpci -s $ivyb_slot 0x858.W`" == "0x0000" ]; then
            setpci -s $ivyb_slot 0x858.W=0xffff
        fi
        if [ "0x`setpci -s $ivyb_slot 0x85C.W`" == "0x0000" ]; then
            setpci -s $ivyb_slot 0x85C.W=0xffff
        fi
    done
}

is_active_vf()
{
    local xen_pcis domRegEx f g

    # test if have ConnectX with VFs
    # if not, no need to proceed further. Return 0 (no VFs active)
    lspci | grep Mellanox | grep Virtual > /dev/null
    if [ $? -ne 0 ] ; then
        # No VFs activated
        return 1
    fi

    # test for virsh
    virsh -v > /dev/null 2> /dev/null
    if [ $? -ne 0 ] ; then
        # No virsh
        xen_pcis=$(get_xen_vm_vf_pcis)
        if [ "$xen_pcis" != "" ]; then
            return 0
        fi
        return 1
    fi

    # test if running virsh by mistake on a guest
    virsh sysinfo > /dev/null 2> /dev/null
    if [ $? -ne 0 ] ; then
        # virsh running on a guest
        return 1
    fi

    #
    # for all devices using mlx4_core|mlx5_core, see if any have active VFs
    #
    for k in $(virsh nodedev-list 2>/dev/null | grep pci)
    do
        # Ignore none Mellanox devices
        if ! (virsh nodedev-dumpxml $k 2>/dev/null | grep -Eq "mlx4_core|mlx5_core"); then
            continue
        fi

        # get all domains of this device
        domRegEx=
        OIFS="${IFS}"
        NIFS=$'\n'
        IFS="${NIFS}"
        for f in $(virsh -d 4 nodedev-dumpxml $k 2>/dev/null | grep "address domain")
        do
            IFS="${OIFS}"
            f=$(echo "$f" | sed -e 's/^\s*//g')
            if [ "X$f" == "X" ]; then
                IFS="${NIFS}"
                continue
            fi
            if [ "X$domRegEx" == "X" ]; then
                domRegEx=$f
            else
                domRegEx="$domRegEx|$f"
            fi
            IFS="${NIFS}"
        done
        IFS="${OIFS}"

        if [ "X$domRegEx" == "X" ]; then
            continue
        fi

        # for all running VMs
        for g in $(virsh list 2>/dev/null | grep -E  "running|paused" | awk '{ print $2 }')
        do
            if (virsh dumpxml "$g" 2>/dev/null | grep "address domain" | grep -qE "$domRegEx"); then
                # There are active virtual functions
                return 0
            fi
        done
    done

    # NO GUESTS
    return 1
}

run_fw_updater()
{
    local FWRC

    if [ ! -x /opt/mellanox/mlnx-fw-updater/mlnx_fw_updater.pl ]; then
        log_msg "fw_updater: /opt/mellanox/mlnx-fw-updater/mlnx_fw_updater.pl doesn't exist!"
        return
    fi
    sleep 5
    log_msg "fw_updater: running /opt/mellanox/mlnx-fw-updater/mlnx_fw_updater.pl ..."
    /opt/mellanox/mlnx-fw-updater/mlnx_fw_updater.pl >/dev/null 2>&1
    FWRC=$(grep EXIT_STATUS: /tmp/mlnx_fw_update.log 2>/dev/null | cut -d":" -f"2" | sed -r -e 's/\s//g')
    log_msg "fw_updater: RC $FWRC , log file: /tmp/mlnx_fw_update.log"
    if (grep -qE "Updating FW.*Done" /tmp/mlnx_fw_update.log 2>/dev/null); then
        log_msg "fw_updater: Firmware was updated. Please reboot your system for the changes to take effect."
    else
        log_msg "fw_updater: Didn't detect new devices with old firmware."
    fi
}

# Module paramter values printed by the kernel can be different
# than what we have in conf files. Covert their values to be similar to
# kernel's output, so that we can compare them.
convert_mod_param()
{
    local mod=$1; shift
    local param_name=$1; shift
    local val=$1; shift

    local paramdesc=$(modinfo -p "$mod" | grep "^$param_name:" | cut -d: -f2-)

    case "$paramdesc" in
        *\(int\)* | *\(uint\)* | *\(long\)* | *\(ulong\)* | *\(short\)* | *\(ushort\)*)
        val=$(printf "%d" "$val")
        ;;
        *\(bool\)*)
        case "$val" in
            0 | n | N)
            val=N
            ;;
            1 | y | Y)
            val=Y
            ;;
        esac
        ;;
    esac

    echo $val
}

# Return false if an inbox module or any other module of a
# non-matching version is loaded.
are_old_modules_loaded() {
    local loaded_modules loaded_srcver curr_srcver

    # W/A: inbox drivers are loaded at boot instead of new ones
    loaded_modules=$(/sbin/lsmod 2>/dev/null | grep -E '^be2net|^cxgb|^mlx|^iw_nes|^iw_cxgb|^ib_qib|^ib_mthca|^ocrdma|^ib_ipoib|^ib_srp|^ib_iser|^ib_uverbs|^ib_addr|^ib_mad|^ib_sa|^iw_cm|^ib_core|^mlxfw|^ib_ucm|^ib_cm|^rdma_ucm|^ib_umad|^rdma_cm|^compat|^ib_netlink|^rdma_rxe' | awk '{print $1}')
    for loaded_module in $loaded_modules
    do
        loaded_srcver=$(/bin/cat /sys/module/$loaded_module/srcversion 2>/dev/null)
        curr_srcver=$(/sbin/modinfo -Fsrcversion $loaded_module 2>/dev/null)
        if [ "X$loaded_srcver" != "X$curr_srcver" ]; then
            log_msg "start(): Detected loaded old version of module '$loaded_module'."
            return 0
        fi
    done
    return 1
}

check_loaded_modules_parameters() {
    local goFlag OIFS NIFS line curr_mod item param conf_value curr_value 

    # W/A: modules loaded from initrd without taking new params from /etc/modprobe.d/
    goFlag=1
    OIFS="${IFS}"
    NIFS=$'\n'
    IFS="${NIFS}"
    for line in $(grep -rE "options.*mlx" /etc/modprobe.d/*.conf 2>/dev/null | grep -v ":#" | cut -d":" -f"2-" | uniq)
    do
        IFS="${OIFS}"
        curr_mod=$(echo $line | sed -r -e 's/.*options //g' | awk '{print $NR}')
        if ! is_module $curr_mod; then
            continue
        fi
        for item in $(echo $line | sed -r -e "s/.*options\s*${curr_mod}//g")
        do
            param=${item%=*}
            conf_value=${item##*=}
            real_value=$(cat /sys/module/${curr_mod}/parameters/${param} 2>/dev/null)
            conf_value=$(convert_mod_param $curr_mod $param $conf_value)
            real_value=$(convert_mod_param $curr_mod $param $real_value)
            if [ "X$conf_value" != "X$real_value" ]; then
                log_msg "start(): Detected '$curr_mod' loaded with '$param=$real_value' instead of '$param=$conf_value' as configured under /etc/modprobe.d/, calling stop..."
                goFlag=0
                stop
                # cleanup bootid files for all interfaces to honor ONBOOT in conf file.
                if [ "X$RUNMODE" == "Xauto" ]; then
                    /bin/rm -f /var/run/mlx_ifc-*.bootid &>/dev/null
                fi
                break
            fi
        done
        if [ $goFlag -ne 1 ]; then
            break
        fi
        IFS="${NIFS}"
    done
    IFS="${OIFS}"
}

clear_ivyb_sb_registers() {
    local ivyb_slots ivyb_slot

    if ! is_ivyb; then
        return
    fi

    # Clear SB registers on IvyB machines
    ivyb_slots=`/sbin/lspci -n | grep -w '8086:0e28' | cut -d ' ' -f 1`
    for ivyb_slot in $ivyb_slots
    do
        if [ "0x`/sbin/setpci -s $ivyb_slot 0x858.W`" == "0x0000" ]; then
            setpci -s $ivyb_slot 0x858.W=0xffff
        fi
        if [ "0x`/sbin/setpci -s $ivyb_slot 0x85C.W`" == "0x0000" ]; then
            setpci -s $ivyb_slot 0x85C.W=0xffff
        fi
    done
}

start_xen_interfaces() {
    if [ ! -x "$XE" ]; then
        return
    fi

    xe_remove_side_interfaces
    get_mlx_en_interfaces mlx5_core
    if [ -n "$mlx_en_interfaces" ]; then
        for i in $mlx_en_interfaces
        do
            xe_replug_pif `xe_get_uuid $i`
        done
    fi
    xe_bond_recover mlx5_core
}

load_mlx5() {
    local my_rc

    if [ "X${MLX5_LOAD}" != "Xyes" ]; then
        return 0
    fi

    load_module mlx5_ib
    my_rc=$?
    if [ $my_rc -ne 0 ]; then
            echo_failure $"Loading Mellanox MLX5_IB HCA driver: "
            return 1
    fi

    load_module mlx5_core
    my_rc=$?
    if [ $my_rc -ne 0 ]; then
        echo_failure $"Loading Mellanox MLX5 HCA driver: "
        return 1
    fi
    # enable FW tracing
    if [ "X${ENABLE_FW_TRACER}" == "Xyes" ]; then
        for d in mlx5_fw fw_tracer; do
            if [ -f /sys/kernel/debug/tracing/events/mlx5/$d/enable ]; then
                echo 1 > /sys/kernel/debug/tracing/events/mlx5/$d/enable 2>/dev/null
                break
            fi
        done
    fi
    start_xen_interfaces
    return 0
}

load_esp_offload() {
    local my_rc

    # Load ESP Offload kernel modules for Innova IPsec
    if [ "X${ESP_OFFLOAD_LOAD}" != "Xyes" ]; then
        return 0
    fi

    load_module_external esp4_offload
    my_rc=$?
    if [ $my_rc -ne 0 ]; then
        echo_failure $"Loading ESP Offload for IPv4 module: "
        return 1
    fi

    load_module_external esp6_offload
    my_rc=$?
    if [ $my_rc -ne 0 ]; then
        echo_warning $"Loading ESP Offload for IPv6 module: "
    fi
    return 0
}

load_ipoib() {
    local my_rc

    if [ $IPOIB -ne 1 ]; then
        return 0
    fi

    load_module ib_ipoib
    my_rc=$?

    ipoib_send_queue_size=`cat /sys/module/ib_ipoib/parameters/send_queue_size 2> /dev/null`
    if [ ! -z $ipoib_send_queue_size ]; then
        if [ $ipoib_send_queue_size -gt 1024 ]; then
            if (lspci -n | grep -qw 15b3:1011); then
                log_msg "IPoIB: Failed to bring up interface for Connect-IB device"
                log_msg "Please set ib_ipoib send_queue_size to be <= 1024 and restart driver"
                echo_failure $"Loading IPoIB driver for Connect-IB device:"
                echo "Please set ib_ipoib send_queue_size to be <= 1024 and restart driver"
            fi
        fi
    fi
    return $my_rc
}

set_macs_of_pf() {
    local my_rc rc

    # Set MAC address of PF via ECPF
    # SMARTNIC_PF_MAC_CONF="[<bdf1>-<MAC1>] [<bdf2>-<MAC2>] ..."
    if [ "X${SMARTNIC_PF_MAC_CONF}" = "X" ]; then
        return 0
    fi

    rc=0

        for mac_conf in ${SMARTNIC_PF_MAC_CONF}
        do
            bdf=${mac_conf%%-*}
            mac=${mac_conf##*-}
            if [ ! -d /sys/bus/pci/devices/${bdf}/ ]; then
                log_msg "No such device: ${bdf}."
                log_msg "Check SMARTNIC_PF_MAC_CONF value in the $CONFIG"
                continue
            fi
            for i in `/bin/ls -1 /sys/bus/pci/devices/${bdf}/net 2> /dev/null`
            do
                if [ -e /sys/bus/pci/devices/${bdf}/net/${i}/smart_nic/pf/mac ]; then
                    echo $mac > /sys/bus/pci/devices/${bdf}/net/${i}/smart_nic/pf/mac
                    my_rc=$?
                    if [ $my_rc -eq 0 ]; then
                        log_msg "PF MAC is set to $mac via ECPF $i"
                    else
                        log_msg "ERROR: Failed to set MAC $mac via ECPF $i"
                        rc=1
                    fi
                fi
            done
        done
        return $rc
}

post_load_modules() {
    local rc

    # Load configured modules
    rc=0
    for mod in  $POST_LOAD_MODULES
    do
            case $mod in
                    ib_srp)
                            load_module $mod
                            # Start SRP daemon if needed
                            if [ "X${SRP_DAEMON_ENABLE}" == "Xyes" ]; then
                                systemctl start srp_daemon
                            fi
                    ;;
                    *)
                            load_module $mod
                    ;;
            esac
            rc=$?
            [ $rc -ne 0 ] && echo_failure "Loading $mod"
    done
    return $rc
}

renice_ib_mad() {
    local ports_num list_of_ibmads ib_mad_pids num_of_root_ibmad_procs get_pid_retries

    if [ X${RENICE_IB_MAD} != "Xyes" ]; then
        return
    fi

    # Set max_ports_num_in_hca variable
    ports_num=`count_ib_ports`
    list_of_ibmads=""
    for (( i=1 ; $i <= ${max_ports_num_in_hca} ; i++ ))
    do
            list_of_ibmads="${list_of_ibmads} ib_mad${i}"
    done

    ib_mad_pids=($(pidof ${list_of_ibmads} 2> /dev/null))
    num_of_root_ibmad_procs=$(/bin/ps h -o user -p ${ib_mad_pids[*]} | grep -w root | wc -l)
    get_pid_retries=0
    while [ ${num_of_root_ibmad_procs} -lt $ports_num ]
    do
        # Wait maximum for 5 sec to get ib_mad process pid
        if [ $get_pid_retries -gt 10 ]; then
                echo Failed to get $ports_num ib_mad PIDs to renice. Got ${num_of_root_ibmad_procs}.
                break
        fi
        usleep 500000
        ib_mad_pids=($(pidof ${list_of_ibmads} 2> /dev/null))
        num_of_root_ibmad_procs=$(/bin/ps h -o user -p ${ib_mad_pids[*]} | grep -w root | wc -l)
        let get_pid_retries++
    done
    for ib_mad_pid in ${ib_mad_pids[*]}
    do
        if [ "$(/bin/ps -p ${ib_mad_pid} h -o user 2> /dev/null)" == "root" ]; then
                renice -19 ${ib_mad_pid} > /dev/null 2>&1
        fi
    done
}

start()
{
    local RC
    RC=0
    MODULES_LOADED_STATUS="1"

    if is_active_vf; then
        echo "There are active virtual functions. Cannot continue..."
        cleanup
        exit 1
    fi
    if are_old_modules_loaded; then
        log_msg "Calling stop..."
        stop
        # cleanup bootid files for all interfaces to honor ONBOOT in conf file.
        if [ "X$RUNMODE" == "Xauto" ]; then
            /bin/rm -f /var/run/mlx_ifc-*.bootid &>/dev/null
        fi
    fi

    check_loaded_modules_parameters
    clear_ivyb_sb_registers

    load_mlx5
    RC=$[ $RC + $? ]

    load_esp_offload
    RC=$[ $RC + $? ]

    ib_set_node_desc > /dev/null 2>&1 &

    if [ "X${UMAD_LOAD}" == "Xyes" ]; then
        load_module ib_umad
        RC=$[ $RC + $? ]
    fi

    if [ "X${UVERBS_LOAD}" == "Xyes" ]; then
        load_module ib_uverbs
        RC=$[ $RC + $? ]
    fi

    load_ipoib
    RC=$[ $RC + $? ]

    set_macs_of_pf
    RC=$[ $RC + $? ]

    RC=$[ $RC + $MODULES_LOADED_STATUS ]
    if [ "$MODULES_LOADED_STATUS" != "0" ]; then
        echo_failure "No HCA kernel modules loaded: "
    fi

    if [ $RC -eq 0 ]; then
        echo_success $"Loading HCA driver and Access Layer: "
    else
        echo_failure $"Loading HCA driver and Access Layer: "
        get_debug_info
        cleanup
        exit 1
    fi

    post_load_modules
    RC=$[ $RC + $? ]

    renice_ib_mad

    if  [ -x /sbin/sysctl_perf_tuning ] && [ "X${RUN_SYSCTL}" == "Xyes" ]; then
        /sbin/sysctl_perf_tuning load
    fi

    if [ -x /usr/sbin/mlnx_affinity ] && [ "X${RUN_AFFINITY_TUNER}" == "Xyes" ];then
        /usr/sbin/mlnx_affinity start > /dev/null 2>&1
    fi

    if [ -x /usr/sbin/mlnx_tune ] && [ "X${RUN_MLNX_TUNE}" == "Xyes" ];then
        /usr/sbin/mlnx_tune > /dev/null 2>&1
    fi

    # send SIGHUP to irqbalance so that it will rescan the irqs
    irqbalance_pid=$(ps -C irqbalance -o pid= 2>/dev/null)
    if [ "X${irqbalance_pid}" != "X" ]; then
        kill -s SIGHUP ${irqbalance_pid} >/dev/null 2>&1
    fi

    if [ ! -z "$POST_START_DELAY" ] && [ $POST_START_DELAY -gt 0 ]; then
        sleep $POST_START_DELAY
    fi

    # W/A for ib_ipoib getting loaded in the middle of openibd stop
    if (grep -q "^#alias netdev-ib" /etc/modprobe.d/ib_ipoib.conf); then
            sed -r -i -e "s/(^#)(alias netdev-ib.*)/\2/" /etc/modprobe.d/ib_ipoib.conf
    fi

    if [[ "X$RUN_FW_UPDATER_ONBOOT" == "Xyes" && "X$RUNMODE" == "Xauto" ]]; then
            run_fw_updater >/dev/null 2>&1 &
    fi

    /bin/rm -f /var/run/mlx_os_booting &>/dev/null

    return $RC
}

UNLOAD_REC_TIMEOUT=100
unload_rec()
{
        local mod=$1
        shift

        if is_module $mod ; then
                ${modprobe} -r $mod >/dev/null 2>&1
                if [ $? -ne 0 ];then
                        for dep in `/sbin/rmmod $mod 2>&1 | grep "is in use by" | sed -r -e 's/.*use by[:]* //g' | sed -e 's/,/ /g'`
                        do
                                # if $dep was not loaded by openibd, don't unload it; fail with error.
                                # unless force option was given or OS is booting
                                if ! `echo $UNLOAD_MODULES | grep -q $dep` && [ $FORCE -eq 0 ] && [ "X$RUNMODE" != "Xauto" ]; then
                                        rm_mod $mod
                                else
                                        unload_rec $dep
                                fi
                        done
                fi
                if is_module $mod ; then
                        if [ "X$RUNMODE" == "Xauto" ] && [ $UNLOAD_REC_TIMEOUT -gt 0 ]; then
                                case "$mod" in
                                        mlx*)
                                        let UNLOAD_REC_TIMEOUT--
                                        sleep 1
                                        unload_rec $mod
                                        ;;
                                        *)
                                        rm_mod $mod
                                        ;;
                                esac
                        else
                                rm_mod $mod
                        fi
                fi
        fi
}

rm_mod()
{
        local mod=$1
        shift

        unload_log=`/sbin/rmmod $mod 2>&1`
        if [ $? -ne 0 ]; then
            echo_failure $"Unloading $mod"
            if [ ! -z "${unload_log}" ]; then
                echo $unload_log
            fi
            # get_debug_info
            [ ! -z $2 ] && echo $2
            cleanup
            exit 1
        fi
}

unload()
{
    # Unload module $1
    local mod=$1
    local unload_log

    if is_module $mod; then
        case $mod in
            ib_ipath)
                # infinipath depends on modprobe.conf remove rule
                unload_rec $mod
                sleep 2
                ;;
            ib_qib)
                if [ -s ${_truescale} ]; then
                    . ${_truescale} stop
                fi

                if [ -d /ipathfs ]; then
                    umount /ipathfs
                    rmdir /ipathfs
                fi

                unload_rec $mod
                sleep 2
                ;;
            ib_mthca | mlx4_ib | mlx5_ib | ib_ehca | iw_cxgb3 | iw_cxgb4 | iw_nes)
                unload_rec $mod
                sleep 2
                ;;
            *)
                unload_rec $mod
                if [ $? -ne 0 ] || is_module $mod; then
                    # Try rmmod if modprobe failed: case that previous installation included more IB modules.
                    unload_rec $mod
                fi
                ;;
        esac
    fi
}

# Refuse to stop if not running automatically (on boot) and some conditions are met
check_if_ok_to_stop() {
    local blocking_modules cannot_continue
    if [ "X$RUNMODE" = "Xauto" ]; then
        return 0
    fi

    cannot_continue=0
    blocking_modules=""

    # Check if Lustre is loaded
    if ( grep -q "ko2iblnd" /proc/modules ); then
        echo "Please stop Lustre services before unloading the Infiniband stack."
        cannot_continue=1
    fi

    if is_active_vf; then
        echo "There are active virtual functions. Cannot continue..."
        cannot_continue=1
    fi

    # Check if applications which use infiniband are running
    for serv in ibacm srp_daemon ibacm.socket
    do
            if systemctl is-active --quiet $serv 2>/dev/null; then
                    systemctl stop $serv
            fi
    done

    local apps="opensm osmtest ibbs ibns ibacm"
    local pid
    for app in $apps
    do
        if ( /usr/bin/pgrep $app > /dev/null 2>&1 ); then
            echo "Please stop \"$app\" and all applications running over InfiniBand."
            cannot_continue=1
        fi
    done

    # Lookup for remaining applications using infiniband devices
    local entries
    if [ -d /dev/infiniband ]; then
        entries=$(lsof +c 0 -a +d /dev/infiniband 2>/dev/null | grep -v "^COMMAND" | \
        awk '{print $1 " " $2 " " $3 " " $NF}' | sort -u)
    fi
    if [ -n "$entries" ]; then
        cannot_continue=1
        echo
        echo "Please stop the following applications still using Infiniband devices:"

        while IFS= read -r entry; do
            app=$(echo "$entry" | cut -f1 -d' ')
            pid=$(echo "$entry" | cut -f2 -d' ')
            owner=$(echo "$entry" | cut -f3 -d' ')
            device=$(echo "$entry" | cut -f4 -d' ' | awk -F/ '{print $NF}')

            echo "$app($pid) user $owner is using device $device"
        done <<< "$entries"
        echo
    fi

    # Check if open-iscsi is running and if there are open iSER sessions
    if [ $(pidof iscsid | wc -w) -gt 0 ]; then
            iser_session_cnt=$(iscsiadm -m session 2>&1 | grep -c "^iser")

            if [ $iser_session_cnt -gt 0 ]; then
                    echo "Please logout from all open-iscsi over iSER sessions"
                    cannot_continue=1
            fi
    fi

    # Check for any multipath devices running over SRP devices
    if is_module ib_srp; then
        for f in `/bin/ls /sys/class/scsi_host`; do
            if [ -f /sys/class/scsi_host/$f/local_ib_port ]; then
                for i in `/bin/ls /sys/class/scsi_host/$f/device/target*/*/block* | awk -F: '{print $NF}'`
                do
                    holders=`ls /sys/block/$i/holders 2> /dev/null`
                    if [ -n "$holders" ]; then
                        cannot_continue=1
                        blocking_modules="${blocking_modules} ib_srp"
                        echo "Please flush multipath devices running over SRP devices"
                        break
                    fi
                done
            fi
        done
    fi

    for mod in ib_isert nvme_rdma nvmet_rdma rpcrdma xprtrdma ib_srpt; do
        if is_module $mod; then
            if is_ref $mod; then
                # A misleading name. If we got here: refcnt=0
                continue
            fi
            cannot_continue=1
            blocking_modules="${blocking_modules} $mod"
            case "$mod" in
            ib_isert) echo "Please close all isert sessions and unload 'ib_isert' module.";;
            nvme_rdma) echo "Please close all nvme sessions and unload 'nvme_rdma' module.";;
            nvmet_rdma) echo "Please close all nvmet sessions and unload 'nvmet_rdma' module.";;
            rpcrdma | xprtrdma | ib_srpt)
                echo "Please make sure module '$mod' is not in use and unload it."
                ;;
            esac
        fi
    done

    if [ $cannot_continue -eq 1 ]; then
        echo
        echo "Error: Cannot unload the Infiniband driver stack due to the above issue(s)!"
        if [ "X${blocking_modules}" != "X" ]; then
            echo
            echo "To unload the blocking modules, you can run:"
            echo "# modprobe -rv ${blocking_modules}"
        fi
        echo
        echo "Once the above issue(s) resolved, run:"
        echo "# $0 $ORIG_ACTION"
        cleanup
        exit 1
    fi
}

# Explicitly shut down bonds to infiniband devices. Because?
# W/A for http://bugs.openfabrics.org/bugzilla/show_bug.cgi?id=2259
# still needed?
stop_ib_bonds() {
    for bond in $(cat /sys/class/net/bonding_masters 2> /dev/null) ; do
        if_type=$(cat /sys/class/net/$bond/type 2> /dev/null)
        if [ $if_type -eq 32 ] ; then
            for slave in $(cat /sys/class/net/$bond/bonding/slaves 2> /dev/null) ; do
                echo -$slave > /sys/class/net/$bond/bonding/slaves
            done
            echo -$bond > /sys/class/net/bonding_masters
        fi
    done
}

unset_netdev_alias() {
    # W/A for ib_ipoib getting loaded in the middle of openibd stop
    if (grep -q "^alias netdev-ib" /etc/modprobe.d/ib_ipoib.conf); then
        sed -r -i -e "s/(^alias netdev-ib.*)/#\1/" /etc/modprobe.d/ib_ipoib.conf
    fi
}

# Module last supported in mlnx_ofed_4_1 . This can probably be dropped
unload_mlx4_vnic() {
	if is_module mlx4_vnic; then
	    unload mlx4_vnic
	fi
}

stop_ipoib_ha() {
    local ipoib_ha_pids line mcastpid p
    # Stop IPoIB HA daemon if running
    if [ -f $ipoib_ha_pidfile ]; then
        read line < $ipoib_ha_pidfile
        for p in $line ; do
            [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && ipoib_ha_pids="$ipoib_ha_pids $p"
        done
        /bin/rm -f $ipoib_ha_pidfile
    fi

    if [ "${ipoib_ha_pids:-}" = "" ]; then
        return
    fi

    kill -9 ${ipoib_ha_pids} > /dev/null 2>&1
    mcastpid=$(pidof -x mcasthandle)
    if [ -n "${mcastpid:-}" ]; then
        kill -9 ${mcastpid} > /dev/null 2>&1
    fi
}

# Stop srp_daemon. Is it still needed? What about systemd service?
stop_srp_daemon() {
    local srp_daemon_pids

    srp_daemon_pids=$(pgrep srp_daemon)
    if [ "${srp_daemon_pids:-}" != "" ]; then
        return
    fi

    if [ -e /etc/init.d/srpd ]; then
        /etc/init.d/srpd stop > /dev/null 2>&1
    else
        kill -15 ${srp_daemon_pids} > /dev/null 2>&1
        if [ -f $srp_daemon_pidfile ]; then
            /bin/rm -f $srp_daemon_pidfile
        fi
    fi
}

stop_qlgc_vnic() {
	if [ ! -d /sys/class/infiniband_qlgc_vnic/ ]; then
        return
    fi

    if [ -x /etc/init.d/qlgc_vnic ]; then
		/etc/init.d/qlgc_vnic stop 2>&1 1>/dev/null
	fi
}

# mlxfc was removed after 4.9. Still needed?
stop_mlxfc() {
    # Unload mlx4_fc
    if [ ! -f /sbin/mlxfc ]; then
        return
    fi

    if is_module mlx4_fc; then
        /sbin/mlxfc stop
    fi
}

# Unload modules listed in UNLOAD_MODULES, potentially 
# recursively (but recusion can only span modules listed there)
unload_all_modules() {
    for mod in  $UNLOAD_MODULES
    do
            unload $mod
    done
}

# Unload mlx4_core if it's still loaded. Do we really still need it?
unload_mlx4_core() {
        if ! is_module mlx4_core; then
            return
        fi

        is_ref mlx4_core
        if [ $? -eq 0 ]; then
            unload mlx4_core
        elif is_module mlx4_en; then
            # Unload mlx4_en if one or more of the following cases takes place:
            # - No MLX4 eth devices present
            # - mlx4_en module was not loaded by the openibd script
            if (grep 0x15b3 /sys/class/net/eth*/device/vendor > /dev/null 2>&1) && [ "X$MLX4_EN_LOAD" != "Xyes" ]; then
                echo "MLX4_EN module is loaded and in use."
                echo "To unload MLX4_EN run: 'modprobe -r mlx4_en mlx4_core'"
            else
                # W/A for XenServer
                if [ -e /etc/modprobe.conf ]; then
                    perl -ni -e "s@\s*(alias.*mlx4_en)@# \$1@;print" /etc/modprobe.conf 2> /dev/null
                fi

                unload mlx4_en

                # W/A for XenServer
                if [ -e /etc/modprobe.conf ]; then
                    perl -ni -e "s@\s*#\s*(alias.*mlx4_en)@\$1@;print" /etc/modprobe.conf 2> /dev/null
                fi

                unload mlx4_core
            fi
        else
            unload mlx4_core
        fi
}

unload_mlx_compat() {
    # Try the old name of this module. This name is pre-4.0. We can probably
    # drop this one:
    if is_module compat && (grep -q mlnx /sys/module/compat/parameters/* 2>/dev/null); then
            unload compat
    fi

    if ! is_module mlx_compat; then
        return
    fi

    # This is a big hack. Any better alternative than to look for nvme 
    # that late?
    if [ ! -d /sys/module/mlx_compat/holders/nvme ]; then
        unload mlx_compat
        return
    fi

    if [ $(cat /sys/module/nvme/refcnt) -eq 0 ]; then
        unload nvme
        unload mlx_compat
    else
        echo_warning $"mlx_compat is used by NVME. Leaving it loaded."
        local loaded_srcver=$(/bin/cat /sys/module/mlx_compat/srcversion 2>/dev/null)
        local curr_srcver=$(/sbin/modinfo mlx_compat 2>/dev/null | grep srcversion | awk '{print $NF}')
        if [ "X$loaded_srcver" != "X$curr_srcver" ]; then
            echo_warning $"Detected driver update. To load the new driver version reboot is required."
        fi
    fi
}

# Doesn't memtrack depend on mlx_compat? Or is it an issue of circular
# dependency? Handle it in the unload functions?
unload_memtrack() {
    if is_module memtrack; then
            unload memtrack
    fi
}

# sysctl_perf_tuning was removed in MLNX_OFED 5.4. Remove?
stop_sysctl_perf_tuning() {
    if  [ -x /sbin/sysctl_perf_tuning ] && [ "X${RUN_SYSCTL}" == "Xyes" ]; then
        /sbin/sysctl_perf_tuning unload
    fi
}

stop_mlnx_affinity() {
    if [ -x /usr/sbin/mlnx_affinity ] && [ "X${RUN_AFFINITY_TUNER}" == "Xyes" ];then
        /usr/sbin/mlnx_affinity stop > /dev/null 2>&1
    fi
}

stop()
{
    check_if_ok_to_stop
    stop_ib_bonds
    unset_netdev_alias
    unload_mlx4_vnic
    stop_ipoib_ha
    stop_srp_daemon
    stop_qlgc_vnic
    stop_mlxfc
    unload_all_modules
    unload_mlx4_core
    unload_mlx_compat
    unload_memtrack
    stop_sysctl_perf_tuning
    stop_mlnx_affinity

    /bin/rm -rf /dev/infiniband # Why?
    echo_success $"Unloading HCA driver: "
    sleep 1 # Why?
}

status()
{
    local RC=0 interfaces mlx_en_interfaces

    if is_module mlx5_core; then
        echo
        echo "  HCA driver loaded"
        echo
    else
        echo
        echo $"HCA driver is not loaded"
        echo
        RC=1
    fi

    if is_module ib_ipoib; then
       interfaces=`get_interfaces`
       if [ -n "$interfaces" ]; then
           echo $"Configured IPoIB devices:"
           echo $interfaces
           echo
           echo $"Currently active IPoIB devices:"

           for i in $interfaces
           do
                if [[ ! -e ${NETWORK_CONF_DIR}/ifcfg-${i} ]]; then
                    continue
                fi
                echo `${ip} -o link show $i | awk -F ": " '/UP>/ { print $2 }'`
                RC=$?
           done
       fi
    fi

    if is_module mlx5_core; then
       mlx_en_interfaces=`get_mlx_en_interfaces`
       if [ -n "$mlx_en_interfaces" ]; then
           echo $"Configured Mellanox EN devices:"
           for iface in $mlx_en_interfaces
           do
                case $iface in
                        ib*)
                        continue
                        ;;
                        *)
                        echo $iface
                        ;;
                esac
           done
           echo
           echo $"Currently active Mellanox devices:"

           for i in $mlx_en_interfaces
           do
                echo `${ip} -o link show $i | awk -F ": " '/UP>/ { print $2 }'`
           done
       fi
    fi

    echo

    local cnt=0

    for mod in  $STATUS_MODULES
    do
        if is_module $mod; then
                [ $cnt -eq 0 ] && echo "The following OFED modules are loaded:" && echo
                let cnt++
                echo "  $mod"
        fi
    done

    echo

    return $RC
}

trap_handler()
{
    let run_time=$(date +%s | tr -d '[:space:]')-${start_time}

    # Ask to wait for 5 seconds if trying to stop openibd
    if [ $run_time -gt 5 ] && [ "$ACTION" == "stop" ]; then
        printf "\nProbably some application are still using InfiniBand modules...\n"
    else
        printf "\nPlease wait ...\n"
    fi
    return 0
}

trap_errors() {
    trap 'trap_handler' 2 9 15
}

set_force() {
    if [[ "$ACTION" =~ force-.* ]]; then
        FORCE=1
        ACTION=$(echo $ACTION | sed -e 's/force-//')
    fi

    if [ "X${FORCE_MODE}" == "Xyes" ]; then
        FORCE=1
    fi
}

run_var() {
    if [ ! -z "${!1}" ] && [ -x "${!1}" ]; then
        "${!1}"
    fi
}

do_start() {
    run_var OPENIBD_PRE_START
    start
    RC=$(($RC + $?))
    run_var OPENIBD_POST_START
}

do_stop() {
    run_var OPENIBD_PRE_STOP
    stop
    RC=$(($RC + $?))
    run_var OPENIBD_POST_STOP
}

# Should get the global parameters
global_init() {
    read_config
    set_wd
    init_environment
    init_environment_xenserver
    set_start_time
    set_run_mode "$2"
    set_modprobe_ip
    set_action "$1"
    init_color_print
    set_static_ifaces_bootid_files
    set_modules_to_load
    trap_errors
    set_force
}
