"""
Copyright © 2019-2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 
This software product is a proprietary product of Nvidia Corporation and its affiliates
(the "Company") and all right, title, and interest in and to the software
product, including all associated intellectual property rights, are and
shall remain exclusively with the Company.

This software product is governed by the End User License Agreement
provided with the software product.
"""
# This script collects inventory data (OS, HW, packages, cpu, etc.).
# Used by CollectX python provider which calls 'collect' function.
# This script run linux commands on host and parse the output to a data structure.

import os
import re
import csv
import sys
import json
import time
import shutil
import hashlib
import logging
import subprocess
from enum import Enum
from datetime import datetime
from collections import namedtuple


# options
GRPC_OPTION           = "grpc"            # collect data using gRPC connection
ABSOLUTE_OPTION       = "absolute"        # pass collected data as is - no diffs calculation
TRANSFORM_TEXT_OPTION = "transform-text"  # set transform mode to string and not an integer
LEGACY_MODE_OPTION    = "legacy-mode"     # support legacy format - few fields as JSON strings,
                                          # "version" field in "Node" message and "aid" in header
                                          # use "message_type" in both message header and content
                                          # timestamp as seconds
LOG_LEVEL_OPTION      = "log-level"       # syslog levels - debug>=7, info>=6, warning>=4, error>=3
DEVICE_TYPE_OPTION    = "device-type"     # set "device_type" in various fields

# collector sampling interval
PACKAGE_INFO_SAMPLING_OPTION = "package-info-sampling"
RESUTIL_SAMPLING_OPTION = "resource-utilization-sampling"
NODE_SAMPLING_OPTION = "node-sampling"
INVENTORY_SAMPLING_OPTION = "inventory-sampling"
HW_COUNTERS_SAMPLING_OPTION = "hw-counters-sampling"
AMBER_INFO_SAMPLING_OPTION = "amber-info-sampling"
CABLES_COUNTERS_SAMPLING_OPTION = "cables-counters-sampling"
PCIE_COUNTERS_SAMPLING_OPTION = "pcie-counters-sampling"
NET_IF_SAMPLING_OPTION = "net-if-sampling"
NET_IF_STATS_SAMPLING_OPTION = "net-if-stats-sampling"
DOM_SAMPLING_OPTION = "dom-sampling"
CONTAINERS_SAMPLING_OPTION = "containers-sampling"
IB_DEVICES_SAMPLING_OPTION = "ib-devices-sampling-interval"
ROCE_SAMPLING_OPTION = "roce-sampling-interval"

OPTIONS_NAME_TO_DESCRIPTION = {
    GRPC_OPTION: "enable host connection using gRPC - in case of non privileged container",
    ABSOLUTE_OPTION: "get absolute data, do not calculate diffs from previous iterations",
    TRANSFORM_TEXT_OPTION: "use verbose value for 'trans_mode' - full/partial",
    LEGACY_MODE_OPTION: "enable legacy format",
    LOG_LEVEL_OPTION: "set logging level debug >= 7, info >= 6, warning >= 4, error >= 3",
    DEVICE_TYPE_OPTION: "set value of device_type key in various message",
    PACKAGE_INFO_SAMPLING_OPTION: "set collection sampling time interval of package info message. use [s/m/h/d] suffix",
    RESUTIL_SAMPLING_OPTION: "set collection sampling time interval of resource utilization message. use [s/m/h/d] suffix",
    NODE_SAMPLING_OPTION: "set collection sampling time interval of node message. use [s/m/h/d] suffix",
    INVENTORY_SAMPLING_OPTION: "set collection sampling time interval of inventory message. use [s/m/h/d] suffix",
    HW_COUNTERS_SAMPLING_OPTION: "set collection sampling time interval of HW counters message. use [s/m/h/d] suffix",
    AMBER_INFO_SAMPLING_OPTION: "set collection sampling time interval of amBER info message. use [s/m/h/d] suffix",
    CABLES_COUNTERS_SAMPLING_OPTION: "set collection sampling time interval of cables counters message. use [s/m/h/d] suffix",
    PCIE_COUNTERS_SAMPLING_OPTION: "set collection sampling time interval of PCIE counters message. use [s/m/h/d] suffix",
    NET_IF_SAMPLING_OPTION: "set collection sampling time interval of network interfaces message. use [s/m/h/d] suffix",
    NET_IF_STATS_SAMPLING_OPTION: "set collection sampling time interval of network interfaces statistics message. use [s/m/h/d] suffix",
    DOM_SAMPLING_OPTION: "set collection sampling time interval of dom message. use [s/m/h/d] suffix",
    CONTAINERS_SAMPLING_OPTION: "set collection sampling time interval of container message. use [s/m/h/d] suffix",
    IB_DEVICES_SAMPLING_OPTION: "set collection sampling time interval of ib-devices message. use [s/m/h/d] suffix",
    ROCE_SAMPLING_OPTION: "set collection sampling time interval of roce message. use [s/m/h/d] suffix",
}

# prefix of a known process that runs on DPU itself
DPU_JOB_PREFIX = "kube-proxy"
USER = "root"
CURRENT_OS = None
SCRIPT_TAG = "[collect_inventory]"
NA = "N/A"
MESSAGE = "message"
MESSAGE_TYPE = "message_type"
TRANS_MODE = "trans_mode"
FULL_TRANSFORM = 0
PARTIAL_TRANSFORM = 1
DIFF_ENABLED = True
SECTION_TO_COLLECTOR = {}
SCRIPT_START_TIME = time.time()
LEGACY_MODE    = False
LOGGER = logging.getLogger('collect_inventory')
DEVICE_TYPE = "device_type"

def arg_to_bool(arg):
    ret = False
    if isinstance(arg, str):
        arg = arg.lower()
        ret = arg in ("t", "true", "1")
    elif isinstance(arg, bool):
        ret = arg
    elif arg:
        ret = True
    return ret

def bool_from_dict(d, arg, default):
    val = d.get(arg, default)
    ret = arg_to_bool(val)
    return ret

def get_na_dict(keys):
    """
    return dictionary with NA as value
    """
    data = {}
    for k in keys:
        data[k] = NA
    return data

def log_not_collected(keys):
    if isinstance(keys, str):
        LOGGER.warning(f"{keys} not collected")
    else:
        for k in keys:
            LOGGER.warning(f"{k} not collected")

def log_centos_keys_missing(keys):
    """
    standard format for keys no collected by centOS
    """
    LOGGER.warning(f"Can't collect {keys} - not supported for CentOS")

def handle_not_collected(key_arg, data):
    """
    :param key_arg: key name/list of keys
    :param data: data dictionary
    log not collected keys and add NA in data
    """
    log_not_collected(key_arg)
    if isinstance(key_arg, list):
        na_dict = get_na_dict(key_arg)
        data.update(na_dict)
    elif isinstance(key_arg, str):
        data[key_arg] = NA
    return data

# legacy function - for some clients, the output is expected to be JSON string
def get_json_data(val, key):
    """
    get json data according to LEGCAY_MODE value
    this is a legacy function:
    since some users expect several fields as json string instead of standard python object
    """
    if LEGACY_MODE:
        try:
            val = json.dumps(val)
        except Exception as err:
            LOGGER.error(f"Setting '{key}'='{NA}'. error: {err}")
            val = NA
    return val

def add_output_from_json_command(cmd, data, key):
    res = False
    output = ConnectionHandler.get_command_output(cmd, ignore_rc=True)
    try:
        value = json.loads(output, strict=False)
        res = True
    except Exception:
        LOGGER.warning("couldn't parse json returned from command '%s'", cmd)
        value = NA
    data[key] = value
    return res

def __set_bfb_and_doca_version():
    txt = ConnectionHandler.get_command_output("cat /etc/mlnx-release")
    if txt:
        get_bfb_version.version = txt
        match = re.search(r"DOCA_v{0,1}([0-9\.]+)", txt)
        if match:
            get_doca_version.version = match.group(1)

def get_bfb_version():
    if not hasattr(get_bfb_version, "version") or get_bfb_version.version is None:
        # keep a static variable since BFB can't change while script is running
        get_bfb_version.version = None
        __set_bfb_and_doca_version()
    return get_bfb_version.version

def get_doca_version():
    """
    returns doca version
    on DPU BFB, doca version is in a specific file with a known structure
    """
    if not hasattr(get_doca_version, "version") or get_doca_version.version is None:
        # keep a static variable since BFB can't change while script is running
        get_doca_version.version = None
        __set_bfb_and_doca_version()
    return get_doca_version.version

def get_mst_devices():
    """
    returns a list of mst devices
    """
    def _set_mst_devices():
        output = ConnectionHandler.get_command_output("mst status -v")
        if output:            
            mst_dev_re = re.compile("/dev/mst/\S+")
            lines = output.splitlines()
            for l in lines:
                match = mst_dev_re.search(l)
                if match:
                    mst_dev = match.group(0)
                    get_mst_devices.devices.append(mst_dev)
    if not hasattr(get_mst_devices, "devices") or not get_mst_devices.devices:
        get_mst_devices.devices = []
        _set_mst_devices()
    return get_mst_devices.devices

def get_network_interfaces():
    interfaces = []
    cmd = "ip --json link show"
    output = ConnectionHandler.get_command_output(cmd)
    if output:
        try:
            interfaces = json.loads(output)
        except Exception as err:
            LOGGER.error(err)
            LOGGER.error(f"could not run cmd {cmd}")
    return interfaces


def parse_ethtool_output(cmd, out, key, ifname):
    raw_data = ConnectionHandler.get_command_output(cmd)
    if raw_data:
        lines = raw_data.split('\n')
        if len(lines) > 1:
            out[key] = {}
            sep = ":"
            for l in lines[1:]:
                split = l.split(sep, 1)
                if len(split) == 2:
                    k = split[0].strip()
                    v = split[1].strip()
                    out[key][k] = v
                else:
                    LOGGER.warning(f"(%s) ignoring unexpected line: '%s'", key, l)
    resolved = out.get(key)
    if not resolved:  # empty
        resolved = out[key] = NA
    if resolved == NA:
        LOGGER.warning(f"cannot collect %s %s info", ifname, key)

def get_hca_buses():
    port_buses: list = []
    cmd = 'lspci -D |grep -i Mellanox | egrep -i "(Ethernet|Infiniband|Network)" | grep -iv Virtual'
    res = ConnectionHandler.get_command_output(cmd=cmd)
    if res is not None:
        match = re.findall(r'([0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}.[0-9a-f]+)\s+', res)
        for bus in match:
            port_buses.append(bus)
    return port_buses

def get_map_bus_interface():
    data = {}
    port_buses = get_hca_buses()
    for bus in port_buses:
        interface = ''
        bus_path = bus.replace(r':', r'\:')
        output = ConnectionHandler.get_command_output(cmd=f'ls /sys/bus/pci/devices/{bus_path}/net/')
        if output is not None:
            # output example :
            # ls /sys/bus/pci/devices/0000\:03\:00.0/net/
            # en3f0pf0sf0  p0  pf0hpf
            interfaces = re.findall(r'\w+', output)
            for interface in interfaces:
                output = ConnectionHandler.get_command_output(cmd=f'ethtool -i {interface}')
                if output is not None:
                    match = re.search(r'driver:\s*(mlx\d+_(core|en|ib))', output)
                    if match:
                        data.update({bus: interface})
                        break
    return data

def get_map_bus_rdma_and_interface():

    data = []
    bus_interface = get_map_bus_interface()
    for bus, interface in bus_interface.items():
        output = ConnectionHandler.get_command_output(cmd=f'ls /sys/class/net/{interface}/device/infiniband/')
        if output is not None:
            # ls /sys/class/net/p0/device/infiniband/
            # mlx5_0
            match = re.search(r'(mlx\d+_\d+)',output)
            if match:
                rdma_device = match.group(1)
                data.append({"bus": bus, "rdma": rdma_device, "interface": interface})

    return data

def get_mlxconfig_query(bus):
    output = ''
    res = ConnectionHandler.get_command_output(cmd=f'mlxconfig -d {bus} q')
    if res is not None:
        output = res
    return output

class PrivilegedExecuterException(Exception):
    pass

class ConnectionHandler:
    """
    usually this script runs on a container as part of DTS.
    hence a connection to host is needed.
    supported connection type is gRPC
    """
    _handler = None

    def __init__(self, grpc=False, user=USER):
        self._use_grpc = grpc
        self._user = user
        self._grpc_client = None
        if self._use_grpc:
            self._grpc_client = self._set_dpe_client()
            LOGGER.info("Connected to DPE")

    def _set_dpe_client(self):
        # add dpe_client dir to PYTHONPATH
        clx_root = os.getenv('CLX_ROOT', "/opt/mellanox/collectx")
        client_dir = os.path.join(clx_root, "services", "dpe", "client")
        sys.path.append(client_dir)
        try:
            from dpe_client import DpeClient
            return DpeClient()
        except Exception:
            raise PrivilegedExecuterException("Failed to connect. Please check DPE is active.")

    @classmethod
    def get_command_output(cls, cmd, sudo_required=False, ignore_rc=False):
        """
        returns None if cmd returned error code
        """
        txt = None
        handler = cls._handler
        if handler is None:
            LOGGER.error("connection handler not set")
        elif handler._use_grpc:
            # get output through grpc
            command_result = handler._grpc_client.run_command(cmd, sudo_required)
            if command_result.rc == 0 or ignore_rc:
                txt = command_result.output.strip()
        else:
            if sudo_required and handler._user != "root":
                cmd = "sudo " + cmd
            status, txt = subprocess.getstatusoutput(cmd)
            if status != 0 and not ignore_rc:
                txt = None
        return txt

    @classmethod
    def set(cls, grpc=False):
        if cls._handler is None:
            cls._handler = ConnectionHandler(grpc=grpc)

class OS(Enum):
    """
    enum of OS types
    """
    UBUNTU = 1
    CENT = 2

class TimerException(Exception):
    pass

class Timer:
    """
    Timing functionalities - All collectors have a collection time interval
    """
    def __init__(self, interval):
        """
        interval is a string of an integer with last char of units
        s - seconds
        m - minutes
        h - hours
        d - days
        """
        self._last_ts = -1
        self._interval = -1
        self.set_interval(interval)

    def set_interval(self, interval):
        try:
            self._interval = int(interval[:-1])
            units = interval[-1].lower()
        except:
            raise TimerException("invalid input")
        if units == "s":
            # secs
            pass
        elif units == "m":
            # minutes
            self._interval *= 60
        elif units == "h":
            # hour
            self._interval *= 60 * 60
        elif units == "d":
            # days
            self._interval *= 60 * 60 * 24
        else:
            raise TimerException("invalid input")

    def passed(self):
        now = time.time()
        return now >= self._last_ts + self._interval

    def reset(self):
        self._last_ts = time.time()

class TimerCollector:
    """
    abstract class for collectors with a fixed time collection interval that return diff only.
    """
    def __init__(self, interval, post_collection_func=None):
        """
        post_collection_func: to run on collected data - returns resolved data dict with metadata
        """
        self._timer = Timer(interval)
        self._post_collection_func = None
        if post_collection_func:
            self._post_collection_func = post_collection_func

    def collect(self, section_name):
        """
        main method to run collection and do post collection processing.
        """
        data = {}
        if self._timer.passed():
            self._timer.reset()
            gm = GeneralDataCollector.get_general_metadata()
            msg = self._collect(gm)
            if msg:
                metadata = GeneralDataCollector.get_header_metadata()
                if self._post_collection_func:
                    msg, metadata = self._post_collection_func(msg, metadata, section_name)
                else:
                    metadata[TRANS_MODE] = FULL_TRANSFORM
                    metadata[MESSAGE_TYPE] = section_name
                    if isinstance(msg, dict) and LEGACY_MODE:
                        msg[MESSAGE_TYPE] = section_name
                if msg:
                    data = metadata
                    if isinstance(msg, list):
                        data[MESSAGE] = msg
                    else:
                        data[MESSAGE] = [msg]
        return data

    def _collect(self, gm):
        raise NotImplementedError("Must override _collect")


class GeneralDataCollector():
    """
    Simple utility collector to pass general metadata for all messages.
    some fields are required per message and some are per data item
    """
    GENERAL_METADATA_KEYS = {"timestamp", "hostname"}
    device_type = "dpu"
    hostname = NA

    @classmethod
    def set_device_type(cls, device_type):
        cls.device_type = device_type

    @classmethod
    def get_header_metadata(cls):
        """
        metadata for message headers
        """
        id_key = "hostname"
        if LEGACY_MODE:
            id_key = "aid"
        return {
            # "aid" is an identifier index. here using machine hostname
            id_key: cls.hostname,
            TRANS_MODE: PARTIAL_TRANSFORM,
            DEVICE_TYPE: cls.device_type,
        }

    @classmethod
    def get_general_metadata(cls):
        """
        same keys as in GeneralDataCollector.GENERAL_METADATA_KEYS
        """
        now = time.time()
        if not LEGACY_MODE:
            now =  round(now * 1e6)
        gm = {
            "timestamp": now,
            "hostname": cls.hostname,
        }
        if LEGACY_MODE:
            gm[DEVICE_TYPE] = cls.device_type
        return gm

    @classmethod
    def set_hostname(cls):
        ret = True
        if cls.hostname is NA:
            txt = ConnectionHandler.get_command_output("hostname")
            if txt is not None:
                cls.hostname = txt
            else:
                ret = False
        return ret

class HashCollector():
    """
    general class to pass a hash value of another collector in a separate message.
    this is useful for reasons of data caching.
    sampling rate depends on a separate entity, i.e., the one invoking self.update_from_str()
    """
    def __init__(self):
        self._should_send = False
        self._val = ""

    def update_from_str(self, txt):
        self._should_send = True
        self._val = hashlib.md5(txt.encode()).hexdigest()

    def collect(self, section_name):
        data = {}
        if self._should_send:
            self._should_send = False
            data = GeneralDataCollector.get_header_metadata()
            data[TRANS_MODE] = FULL_TRANSFORM
            md = GeneralDataCollector.get_general_metadata()
            data[MESSAGE_TYPE] = section_name
            msg = md
            msg.update({"hash": self._val})
            data[MESSAGE] = [msg]
        return data



class PackInfoCollector(TimerCollector):
    """
    Collector for installed packages information
    """
    def __init__(self, config):
        sampling_interval = config.get(PACKAGE_INFO_SAMPLING_OPTION, "6h")
        super().__init__(sampling_interval, self._post_collection_impl)
        self._collected = {}
        self._msg_metadata = {}
        self._is_first_collection = True
        self._hash_collector = None

    def set_hash_collector(self, collector):
        self._hash_collector = collector

    def _post_collection_impl(self, packages, metadata, section_name):
        """
        message logic is to send all packages on first iteration (FULL_TRANSFORM),
        and then send only diffs (PARTIAL_TRANSFORM)
        """
        names = set()
        resolved = []
        metadata[MESSAGE_TYPE] = section_name
        if not packages:
            return {}, metadata

        if not DIFF_ENABLED:
            metadata[TRANS_MODE] = FULL_TRANSFORM
        elif self._is_first_collection:
            self._is_first_collection = False
            # FULL_TRANSFORM is required only on first collection
            metadata[TRANS_MODE] = FULL_TRANSFORM

        # find new packages
        for pckg in packages:
            name = pckg["package_name"]
            if LEGACY_MODE:
                pckg[MESSAGE_TYPE] = section_name
            if DIFF_ENABLED:
                names.add(name)
                if name not in self._collected:
                    resolved.append(pckg)
                    self._collected[name] = pckg
            else:
                # diff is not allowed - no need to track on diffs
                resolved.append(pckg)
        if DIFF_ENABLED:
            # find removed packages
            removed = set(self._collected.keys()) - names
            for r in removed:
                pckg_dict = self._collected[r]
                pckg_dict["active"] = False
                pckg_dict["deleted"] = True
                pckg_dict.update(self._msg_metadata)
                resolved.append(pckg_dict)
                del self._collected[r]
        return resolved, metadata

    def _collect(self, gm):
        self._msg_metadata = gm

        # collect default dictionaries as string
        if CURRENT_OS == OS.UBUNTU:
            if DIFF_ENABLED:
                cmd = """dpkg-query --show -f \'{"package_name": "${Package}","version": "${Version}","status": "${Status}", "active": true, "deleted": false}\n\'"""
            else:
                cmd = """dpkg-query --show -f \'{"package_name": "${Package}","version": "${Version}","status": "${Status}"}\n\'"""
        else:
            if DIFF_ENABLED:
                cmd = """rpm -qa --queryformat '\{"package_name": "%{NAME}", "version": "%{VERSION}", "status": "installed", "active": true, "deleted": false\}\n'"""
            else:
                cmd = """rpm -qa --queryformat '\{"package_name": "%{NAME}", "version": "%{VERSION}", "status": "installed"\}\n'"""
        txt = ConnectionHandler.get_command_output(cmd)
        if txt is None:
            log_not_collected("packages information")
            return []
        if self._hash_collector:
            self._hash_collector.update_from_str(txt)
        packages_as_str = txt.split('\n')

        # convert json string to dictionary and add boolean flags
        dicts_list = []
        for pckg_str in packages_as_str:
            as_dict = json.loads(pckg_str)
            if LEGACY_MODE:
                as_dict.update(self._msg_metadata)
            dicts_list.append(as_dict)
        return dicts_list

class ResUtilCollector(TimerCollector):
    """
    Resource utilization collector
    """
    def __init__(self, config):
        sampling_interval = config.get(RESUTIL_SAMPLING_OPTION, "30s")
        super().__init__(sampling_interval)

    @staticmethod
    def _run_one_subprocess_float(cmd, data, key):
        txt = ConnectionHandler.get_command_output(cmd)
        if txt:
            val = float(txt)
            if LEGACY_MODE:
                # convert range from [0,1] to [0,100]
                val *= 100
            data[key] = val
        else:
            data = handle_not_collected(key, data)
        return data

    def get_cpu_utilization(self, data, key):
        cmd = "top -bn 2 -d 0.01 | grep '^%Cpu' | tail -n 1 | awk '{print ($2+$4+$6)/100}'"
        return self._run_one_subprocess_float(cmd, data, key)

    def get_memory_utilization(self, data, key):
        cmd = """free -t -m | grep -i "^mem" | awk '{print $3/$2}'"""
        return self._run_one_subprocess_float(cmd, data, key)

    @staticmethod
    def _get_disk_utilization_dict(name):
        # copied from psutil module to reduce modules (function - disk_usage)
        # https://github.com/giampaolo/psutil
        # method contains many optional errors - wrap call with "try-except"
        txt = ConnectionHandler.get_command_output(f"python3 -c \"import os; print(os.statvfs('{name}'))\"")
        start = txt.find('(')
        end = txt.find(')')
        txt = txt[start + 1:end]
        keys = []
        values = []
        tokens = txt.split(", ")
        for tok in tokens:
            split = tok.split('=')
            keys.append(split[0])
            values.append(int(split[1]))
        st_type = namedtuple("st", keys)
        st = st_type(*values)

        # Total space which is only available to root (unless changed
        # at system level).
        total = (st.f_blocks * st.f_frsize)
        # Remaining free space usable by root.
        avail_to_root = (st.f_bfree * st.f_frsize)
        # Remaining free space usable by user.
        avail_to_user = (st.f_bavail * st.f_frsize)
        # Total space being used in general.
        used = (total - avail_to_root)
        # Total space which is available to user (same as 'total' but
        # for the user).
        total_user = used + avail_to_user

        if total_user == 0:
            res = 0.0
        else:
            res = 100 * used / total_user
        ret = {
            "percent": res,
            "total": total,
            "used": used
        }
        return ret

    @staticmethod
    def _get_disk_partitions():
        ret = []
        txt = ConnectionHandler.get_command_output("fdisk -l", True)
        if txt is None:
            LOGGER.error(f"could not list disk partitions: {txt}")
            return ret
        txt = txt.split('\n')
        in_table = False
        for line in txt:
            if in_table:
                if line:
                    ret.append(line.split()[0])
                else:
                    in_table = False
            else:
                line = line.strip()
                in_table = line.startswith("Device ")
        return ret

    @staticmethod
    def _get_mount_points(partitions):
        mnt_points = {}
        for prt in partitions:
            cmd = f"lsblk {prt} --output MOUNTPOINT"
            txt = ConnectionHandler.get_command_output(cmd)
            if txt is None:
                LOGGER.error(f"could not run command '{cmd}': {txt}")
                continue
            txt = txt.split('\n')
            if len(txt) != 2:
                LOGGER.error(f"could not find {prt} mount point")
                continue
            mnt_p = txt[1]
            if mnt_p and mnt_p[0] == '/':
                mnt_points[prt] = mnt_p
        return mnt_points

    def get_disk_utilization(self, data, key):
        partitions = self._get_disk_partitions()
        mnt_points = self._get_mount_points(partitions)
        if not mnt_points:
            LOGGER.warning("reading disk utilization for root only")
            mnt_points = {'/': '/'}
        dud = {}
        for p, mnt_pt in mnt_points.items():
            try:
                res = self._get_disk_utilization_dict(mnt_pt)
                if res:
                    dud[p] = res
            except Exception:
                LOGGER.error(f"Could not collect disk utilization info for disk {mnt_pt}")

        if dud:
            data[key] = dud
        else:
            data = handle_not_collected(key, data)
        return data

    @staticmethod
    def get_disk_read_only(data, key):
        cmd = "findmnt / -n -o FS-OPTIONS"
        txt = ConnectionHandler.get_command_output(cmd)
        if txt:
            if txt.startswith("ro"):
                data[key] = True
            else:
                data[key] = False
        else:
            data = handle_not_collected(key, data)
        return data
    
    @staticmethod
    def get_processes_list(data, key):
        data[key] = NA
        output = ConnectionHandler.get_command_output("top -bn1 -o PID")
        if output:
            # 1. create list of strings
            lines = output.split('\n')
            processes = []
            for l in reversed(lines):
                l = l.strip()
                if not l:
                    # empty line => went over all processes
                    break
                processes.append(l)

            # 2. convert strings to dictionaries
            if processes:
                title = processes[-1]  # last item is table columns names
                if title.startswith("PID"):
                    data[key] = []
                    title_tokens = title.split()
                    for p in processes[:-1]:
                        tokens = p.split(maxsplit=len(title_tokens) - 1)
                        if len(tokens) < len(title_tokens):
                            LOGGER.warning(f"({key}) ignoring unexpected line: {p}")
                            continue
                        d = {}
                        for i, k in enumerate(title_tokens):
                            d[k] = tokens[i]
                        if d:
                            data[key].append(d)
                    if not data[key]:
                        data[key] = NA
                else:
                    LOGGER.error(f"unexpected output while parsing processes list")
        if data[key] == NA:
            log_not_collected(key)
        return data

    @staticmethod
    def get_services_list(data, key):
        data[key] = NA
        if CURRENT_OS == OS.CENT:
            log_centos_keys_missing(key)
            return data
        output = ConnectionHandler.get_command_output("service --status-all")
        if output:
            data[key] = {}
            lines = output.split('\n')
            for l in lines:
                split = l.split(']', 1)
                if len(split) != 2:
                    LOGGER.warning(f"({key}) ignoring unexpected line: {l}")
                    continue
                v = split[0].replace("[", "").strip()  # status
                k = split[1].strip()                   # service name
                data[key][k] = v
            if not data[key]:
                data[key] = NA
        if data[key] == NA:
            log_not_collected(key)
        return data
    
    def _collect(self, gm):
        data = gm
        key_to_func = {
            "cpu_utilization": self.get_cpu_utilization,
            "mem_utilization": self.get_memory_utilization,
            "disk_utilization": self.get_disk_utilization,
            "is_disk_read_only": self.get_disk_read_only,
            "processes": self.get_processes_list,
            "services":  self.get_services_list,
            }
        for key, func in key_to_func.items():
            data = func(data, key)
        return data

class NodeCollector(TimerCollector):
    """
    Collector for node status (uptime, version, ntp status)
    """
    def __init__(self, config):
        sampling_interval = config.get(NODE_SAMPLING_OPTION, "30s")
        super().__init__(sampling_interval)
        self._ipv4 = NA
        self._set_ipv4()

    def _set_ipv4(self):
        ipv4_re = r"(\d+\.\d+\.\d+\.\d+)"
        txt = ConnectionHandler.get_command_output("hostname -i")
        if txt is not None:
            match = re.search(ipv4_re, txt)
            if match:
                self._ipv4 = match.group(1)
            else:
                log_not_collected("ipv4")

    def _collect(self, gm):
        data = gm
        data["lastboot"] = SCRIPT_START_TIME
        data["ipv4"] = self._ipv4
        methods = [
            self._get_sys_uptime,
            self._get_version,
            self._get_ntp_state,
            self._get_ntp_details,
            self._get_lldpad_details,
            self._get_lldp_neighbor_info,
        ]
        for m in methods:
            data = m(data)
        return data

    @staticmethod
    def _get_sys_uptime(data):
        key = "sys_uptime"
        txt = ConnectionHandler.get_command_output("uptime -s")
        if txt is None:
            data = handle_not_collected(key, data)
        else:
            as_date = datetime.strptime(txt, "%Y-%m-%d %H:%M:%S")
            data[key] = as_date.timestamp()
        return data

    @staticmethod
    def _get_ntp_state(data):
        txt = ConnectionHandler.get_command_output("ntpstat")
        if txt is not None:
            val = "yes"
        else:
            val = "no"
        data["ntp_state"] = val
        return data

    @staticmethod
    def _get_ntp_details(data):
        key = "ntp_details"
        data[key] = NA
        if CURRENT_OS == OS.CENT:
            log_centos_keys_missing(key)
            return data
        output = ConnectionHandler.get_command_output("/usr/bin/ntpq -p")
        if output:
            lines = output.split('\n')
            if len(lines) > 2:
                data[key] = []
                title = lines[0].strip()
                title_tokens = title.split()
                for l in lines[2:]:
                    tokens = l.strip().split()
                    if len(tokens) != len(title_tokens):
                        LOGGER.warning(f"({key}) ignoring unexpected line: '{l}'")
                        continue
                    d = {}
                    for i, k in enumerate(title_tokens):
                        d[k] = tokens[i]
                    data[key].append(d)
                if not data[key]:
                    data[key] = NA
        if data[key] == NA:
            log_not_collected(key)
        return data

    @staticmethod
    def _get_version(data):
        if LEGACY_MODE:
            key = "version"
            data[key] = NA
            if CURRENT_OS == OS.CENT:
                log_centos_keys_missing(key)
            else:
                version = get_doca_version()
                if version:
                    data[key] = version
                else:
                    log_not_collected(key)
        return data

    @staticmethod
    def _get_lldpad_details(data):
        key = "lldpad_details"
        data[key] = NA
        output = ConnectionHandler.get_command_output("systemctl show lldpad")
        if output:
            lines = output.split('\n')
            data[key] = {}
            for l in lines:
                split = l.split("=", 1)
                if len(split) != 2:
                    LOGGER.warning(f"({key}) ignoring unexpected line: '{l}'")
                    continue
                k = split[0].strip()
                v = split[1].strip()
                data[key][k] = v
            if not data[key]:
                data[key] = NA
        if data[key] == NA:
            log_not_collected(key)
        return data

    @staticmethod
    def _get_lldp_neighbor_info(data):
        key = "lldp_neighbor_info"
        interfaces = ConnectionHandler.get_command_output("ip -json link show")
        try:
            interfaces = json.loads(interfaces)
        except Exception as e:
            log_not_collected(key)
            data[key] = NA
            return data
        data[key] = []
        kv_regex = re.compile(r".*: .*")  # "key: value" regex
        for dev in interfaces:
            ifname = dev.get("ifname", "")
            flags = dev.get("flags", [])
            link_type = dev.get("link_type", "")
            # run lldptool on up ethernet interfaces
            if ifname and "UP" in flags and "ether" in link_type:
                cmd = f"lldptool -t -n -i {ifname}"
                txt = ConnectionHandler.get_command_output(cmd)
                if not txt:
                    continue
                lines = txt.split("\n")
                interface_dict = {}
                entry = []  # first item is key, next elemets are values
                for l in lines:
                    if not l.startswith("\t") and entry:
                        # l is a key - add entry from prior iterations to interface_dict
                        k = entry[0]
                        v = entry[1:]
                        if all([kv_regex.match(vv) for vv in v]):  # all have key-value pattern
                            # convert v to be a dict
                            sub_dict = {}
                            for kv_pair in v:
                                split = kv_pair.split(": ", 1)
                                new_k = split[0].strip()
                                new_v = split[1].strip()
                                sub_dict[new_k] = new_v
                            v = sub_dict
                        if len(v) == 1 and isinstance(v, list):
                            v = v[0].strip()

                        interface_dict[k] = v
                        entry = []
                    l = l.strip()
                    entry.append(l)
                interface_dict["ifname"] = ifname
                data[key].append(interface_dict)
        if not data[key]:
            log_not_collected(key)
            data[key] = NA
        return data

class InventoryCollector(TimerCollector):
    """
    Collector for inventory data - CPU, ASIC, memory, disk, OS, FW
    """
    def __init__(self, config):
        sampling_interval = config.get(INVENTORY_SAMPLING_OPTION, "1d")
        super().__init__(sampling_interval, self._post_collection)
        # common PCI for bluefield. is updated while parsing lspci
        self._pci_dev = ""
        self._recent = {}
        self._hash_collector = None
    
    def set_hash_collector(self, hash_collector):
        self._hash_collector = hash_collector

    def _post_collection(self, dd, metadata, section_name):
        # returns updated dict iff 1 key was changed
        # use shallow copy since the calling function to this method changes the output
        metadata[MESSAGE_TYPE] = section_name
        if LEGACY_MODE:
            dd[MESSAGE_TYPE] = section_name
        if not DIFF_ENABLED:
            # diffs are not allowed - no need to compare data with prior collection
            metadata[TRANS_MODE] = FULL_TRANSFORM
            return dd, metadata
        if not self._recent:
            self._recent = dd.copy()
            return dd, metadata
        diff = False
        for key, new in dd.items():
            old = self._recent.get(key, None)
            if old != new and key not in GeneralDataCollector.GENERAL_METADATA_KEYS:
                # diff is only when looking at the collected data that is not general data
                # i.e. timestamp is always different from the timestamp in self._recent
                self._recent[key] = new
                diff = True
        msg = {}
        if diff:
            for k in GeneralDataCollector.GENERAL_METADATA_KEYS:
                self._recent[k] = dd.get(k, NA)
            msg = self._recent.copy()
        return msg, metadata

    def _collect(self, gm):
        data = {}
        resolved = gm
        collect_methods = [
            self._get_cpu_info,
            self._get_os_info,
            self._get_asic_info,
            self._get_lshw_info,
            self._get_dmidecode_info,
            self._get_memory_total_size,
            self._get_version_info,
            self._get_fwmanager_info,
            self._get_label_revision,
            self._get_disk_total_size,
            self._get_disk_data,
            self._get_ambiguos_cmd_fields,
            self._get_pci_info,
            self._get_ofed_version,
            self._get_bfb_version,
            ]
        for method in collect_methods:
            try:
                ret = method()
                data.update(ret)
            except Exception as err:
                LOGGER.error(f"an error occurred while collecting inventory data: {err}")
        if self._hash_collector:
            self._hash_collector.update_from_str(json.dumps(data))
        resolved.update(data)
        return resolved

    def _get_cpu_info(self):
        data = {}
        data = self._read_lscpu(data)
        data = self._read_cpu_max_freq(data)
        return data

    @staticmethod
    def _read_lscpu(data):
        key_to_token = {
            "cpu_arch": "architecture:",
            "cpu_nos": "socket(s):",
            "cpu_model": "model name:",
        }
        keys = list(key_to_token.keys())
        na_dict = get_na_dict(keys)
        data.update(na_dict)
        txt = ConnectionHandler.get_command_output("lscpu")
        if txt is None:
            log_not_collected(keys)
        else:
            txt = txt.split('\n')
            for line in txt:
                line = line.strip()
                if line:
                    line_lower = line.lower()
                    for key, token in key_to_token.items():
                        if line_lower.startswith(token):
                            split = line.split(":")
                            data[key] = split[-1].strip()
                            del key_to_token[key]
                            break
                if len(key_to_token) == 0:
                    break
        return data

    @staticmethod
    def _read_cpu_max_freq(data):
        key = "cpu_max_freq"
        data[key] = NA
        txt = ConnectionHandler.get_command_output('dmidecode -t 4 output 2> /dev/null | grep "Max Speed:" | head -1', True)
        if txt is None:
            log_not_collected(key)
        else:
            tup = txt.split(':')
            if len(tup) != 2:
                LOGGER.error("unexpected output of dmidecode")
                log_not_collected(key)
            else:
                val = tup[1]
                data[key] = val.strip()
        return data

    @staticmethod
    def _get_os_info():
        key_to_token = {
            "os_name": "name",
            "os_version": "version",
            "os_version_id": "version_id"
        }
        keys = list(key_to_token.keys())
        data = get_na_dict(keys)
        file_name = "/etc/os-release"
        txt = ConnectionHandler.get_command_output("cat /etc/os-release")
        if txt:
            lines = txt.split("\n")
            for line in lines:
                line = line.strip()
                if line:
                    split = line.split("=")
                    line_lower = line.lower()
                    for key, token in key_to_token.items():
                        if line_lower.startswith(token + '='):
                            data[key] = split[1].strip('"')
                            del key_to_token[key]
                            break
                if len(key_to_token) == 0:
                    break
        else:
            LOGGER.error(f"could not parse {file_name}")
            log_not_collected(keys)
        return data

    def _get_asic_info(self):
        keys = ["asic_model", "asic_vendor", "asic_serial_number"]
        data = get_na_dict(keys)
        if CURRENT_OS == OS.CENT:
            log_centos_keys_missing(keys)
            return data
        txt = ConnectionHandler.get_command_output("lspci | grep controller | grep BlueField")
        if txt is None or len(txt) == 0:
            log_not_collected(keys)
            return data
        txt = txt.split('\n')

        # line format is "<port_number> Ethernet controller: <Vendor> MTXXXX <model_name>"
        tokens = txt[0].split()
        self._pci_dev = tokens[0]
        start = 0
        middle = 0
        for idx, tok in enumerate(tokens):
            if tok.startswith("MT"):
                middle = idx
            if ':' in tok:
                start = idx + 1
        try:
            data["asic_vendor"] = ' '.join(tokens[start:middle])
            data["asic_model"] = ' '.join(tokens[middle+1:])
        except:
            LOGGER.error("error while parsing asic info")
            log_not_collected(["asic_model", "asic_vendor"])
        # get DPU serial number
        txt = ConnectionHandler.get_command_output(f"lspci -vv -s {self._pci_dev} | grep SN")
        if txt:
            key = "asic_serial_number"
            split = txt.split(":")
            if len(split) != 2:
                log_not_collected(key)
            else:
                val = split[1].strip()
                data[key] = val
        return data

    def _get_lshw_info(self):
        key_to_class_name = {
        "memory_data": "memory",
        "cpu_data": "processor",
        }
        data = {}
        for key, class_name in key_to_class_name.items():
            try:
                txt = ConnectionHandler.get_command_output(f"lshw -c {class_name} -json", True)
                val = json.loads(txt)
                val = get_json_data(val, key)
            except Exception:
                log_not_collected(key)
                val = NA
            data[key] = val
        return data

    @staticmethod
    def _get_fwmanager_info():
        keys = ["asic_core_bw", "asic_ports"]
        data = get_na_dict(keys)
        data["asic_data"] = get_json_data({}, "asic_data")  # required by user
        txt = ConnectionHandler.get_command_output("mlxfwmanager | grep Description:", True)
        if txt is None:
            log_not_collected(keys)
        else:
            txt = txt.split('\n')
            for line in txt:
                line_tokens = line.split(';')
                tok = line_tokens[0].strip()  # get tokens before first ';'
                tokens = tok.split()
                try:
                    data["asic_ports"] = ' '.join(tokens[1:]).strip() # after 'Description:' prefix
                    for tok in tokens:
                        if "GbE" in tok:
                            data["asic_core_bw"] = tok
                            return data
                except Exception:
                    LOGGER.error("unexpected output of mlxfwmanager")
                    log_not_collected(keys)
        return data

    @staticmethod
    def _get_label_revision():
        key = "platform_label_revision"
        data = {key: NA}
        txt = ConnectionHandler.get_command_output("mst status", True)
        if txt is None:
            log_not_collected(key)
            return data
        txt = txt.split("\n")
        for line in txt:
            if "revision" in line:
                split = line.split(":")
                try:
                    data[key] = split[1].strip()
                except Exception:
                    log_not_collected(key)
                break
        return data

    @classmethod
    def _get_dmidecode_info(cls):
        keys_to_cmds = {
            "platform_vendor": "system-manufacturer",
            "platform_model": "system-product-name",
            "platform_serial_number": "system-serial-number",
        }
        keys = list(keys_to_cmds.keys())
        data = get_na_dict(keys)
        base_cmd = "dmidecode -s "
        for key, sub_cmd in keys_to_cmds.items():
            cmd = base_cmd + sub_cmd
            txt = ConnectionHandler.get_command_output(cmd, True)
            if txt:
                data[key] = txt
            else:
                log_not_collected(key)

        data = cls._fix_platform_vendor_value(data, "platform_vendor")
        return data

    @staticmethod
    def _fix_platform_vendor_value(data, key):
        # in case platform_vendor is a website, extract domain only
        # https://www.mellanox.com -> mellanox
        vendor = data.get(key, "")
        if vendor:
            start = vendor.find("www.")
            end = vendor.find(".com")
            if end > start > 0:
                data[key] = vendor[start + 4:end]
        return data

    @staticmethod
    def _get_disk_total_size():
        key = "disk_total_size"
        data = {key: NA}
        try:
            res = shutil.disk_usage('/')[0] / (1024**3) # bytes to GB
            data[key] = f"{res:.2f} GB"
        except Exception:
            log_not_collected(key)
        return data

    @staticmethod
    def _get_disk_data():
        key = "disk_data"
        data = {key: NA}
        txt = ConnectionHandler.get_command_output("lsblk -d -P -o name,size,type,vendor,tran,rev,model")
        if txt is None:
            log_not_collected(key)
        else:
            txt = txt.split("\n")
            key_to_pattern = {"name":      re.compile(r"NAME\=\"([^\"]+)\""),
                              "size":      re.compile(r"SIZE\=\"([^\"]+)\""),
                              "d_type":    re.compile(r"TYPE\=\"([^\"]+)\""),
                              "vendor":    re.compile(r"VENDOR\=\"([^\"]+)\""),
                              "transport": re.compile(r"TRAN\=\"([^\"]+)\""),
                              "rev":       re.compile(r"REV\=\"([^\"]+)\""),
                              "model":     re.compile(r"MODEL\=\"([^\"]+)\""),
                            }
            disk_data = []
            for line in txt:
                name_match = key_to_pattern["name"].search(line)
                if not line or (name_match and "boot" in name_match.group(1)):  # skip boot devices
                    continue
                entry = {}
                for k, pattern in key_to_pattern.items():
                    val = NA
                    match = pattern.search(line)
                    if match:
                        val = match.group(1).strip()
                    entry[k] = val
                disk_data.append(entry)
            if disk_data:
                data[key] = get_json_data(disk_data, key)
            else:
                log_not_collected(key)
        return data

    @staticmethod
    def _get_memory_total_size():
        key = "memory_total_size"
        data = {key: NA}
        file_name = "/proc/meminfo"
        try:
            with open(file_name, "r") as fptr:
                for line in fptr:
                    line = line.lower()
                    if line.startswith("memtotal"):
                        # this line is usually the first line in this file
                        split = line.split()
                        value = int(split[1])
                        # convert to megabytes
                        if split[2] in ("kb", "KB", "kB"):
                            value = value / 1024
                        elif split[2] in ("gb", "GB", "gB"):
                            value *= 1024
                        elif split[2] in ("b", "B", "bytes", "Bytes"):
                            value = (1024 * 1024)
                        data[key] = f"{value:.2f} MB"
                        break
        except Exception:
            LOGGER.error(f"could not parse {file_name}")
            log_not_collected(key)
        return data

    @staticmethod
    def _get_version_info():
        key = "agent_version"
        data = {key: NA}
        if CURRENT_OS == OS.CENT:
            log_centos_keys_missing(key)
        else:
            version = get_doca_version()
            if version:
                data = {key: version}
            else:
                log_not_collected(key)
        return data

    def _get_ambiguos_cmd_fields(self):
        # some fields can be found using command from 2 different packages
        # MST which is open source or another one that isn't
        # in each one the cmd is a bit different
        keys = ["asic_model_id", "platform_base_mac", "platform_part_number", "fw_version"]
        data = get_na_dict(keys)
        if CURRENT_OS == OS.CENT:
            log_centos_keys_missing(keys)
            return data
        txt = ConnectionHandler.get_command_output("which flint")
        if txt is not None:
            cmd = "flint"
        else:
            txt = ConnectionHandler.get_command_output("which mstflint")
            if txt is not None:
                cmd = "mstflint"
            else:
                log_not_collected(keys)
                return data
        txt = ConnectionHandler.get_command_output(f"{cmd} -d {self._pci_dev} q full", True)
        if txt is None:
            log_not_collected(keys)
        else:
            txt = txt.split('\n')
            for line in txt:
                low = line.lower()
                if low.startswith("psid:"):
                    data["asic_model_id"] = line.split()[-1].strip()
                elif low.startswith("base mac"):
                    ret = line.split()[2].strip()
                    # add colons for MAC address notation
                    as_list = []
                    if len(ret) % 2 == 0:
                        for i in range(0, len(ret), 2):
                            as_list.append(ret[i:i + 2])
                        data["platform_base_mac"] = ':'.join(as_list)
                    else:
                        LOGGER.warning(f"MAC address should have even number of characters. got {ret}")
                        log_not_collected("platform_base_mac")
                elif low.startswith("part number:"):
                    data["platform_part_number"] = line.split()[-1].strip()
                elif low.startswith("fw version:"):
                    data["fw_version"] = line.split()[-1].strip()
        return data
    
    def _get_pci_info(self):
        key = "pci_info"
        data = {key: NA}
        output = ConnectionHandler.get_command_output(f"lspci -nvmm")
        if output:
            data[key] = []
            lines = output.split('\n')
            d = {}
            for l in lines:
                if l:
                    split = l.split(":", 1)
                    if len(split) == 2:
                        k = split[0].strip()
                        v = split[1].strip()
                        d[k] = v
                    else:
                        LOGGER.warning(f"(FEC) ignoring unexpected line: '{l}'")
                elif d:
                    data[key].append(d)
                    d = {}
            if not data[key]:
                data = {key: NA}
        return data

    @classmethod
    def _get_ofed_version(cls):
        key = "ofed_version"
        cmd = "ofed_info -s"
        value = ConnectionHandler.get_command_output(cmd)
        if value is None:
            value = NA
        return {key: value}

    def _get_bfb_version(cls):
        key = "bfb_version"
        value = get_bfb_version()
        if value is None:
            value = NA
        return {key: value}

class HwCountersCollector(TimerCollector):
    """
    Collector for HW counters
    """
    def __init__(self, config):
        sampling_interval = config.get(HW_COUNTERS_SAMPLING_OPTION, "5m")
        super().__init__(sampling_interval)

    def _collect(self, gm):
        mst_devices = get_mst_devices()
        if not mst_devices:
            LOGGER.error("cannot collect HW counters - no MST devices found")
            return {}
        data = gm
        for mst_dev in mst_devices:
            cmd = f"mlxlink -d {mst_dev} -e -m -c --json"
            if not add_output_from_json_command(cmd, data, mst_dev):
                log_not_collected(f"HW counters ({mst_dev})")
        return data

class AmberInfoCollector(TimerCollector):
    """
    Collector for amber info (mlxlink -d DEV --amber_collect FILE)
    """
    def __init__(self, config):
        sampling_interval = config.get(AMBER_INFO_SAMPLING_OPTION, "1h")
        super().__init__(sampling_interval)

    def _collect(self, gm):
        mst_devices = get_mst_devices()
        if not mst_devices:
            LOGGER.error("cannot collect amBER info - no MST devices found")
            return {}
        data = gm
        for mst_dev in mst_devices:
            data[mst_dev] = self._get_amber_info(mst_dev)
        return data

    @staticmethod
    def _get_amber_info(mst_dev):
        csv_file = "/tmp/amber_inventory.csv"
        cmd = f"mlxlink -d {mst_dev} --amber_collect {csv_file}"
        ConnectionHandler.get_command_output(cmd)
        cmd = f"cat {csv_file} && rm -f {csv_file}"
        output = ConnectionHandler.get_command_output(cmd)
        if not output:
            log_not_collected(f"amber info ({mst_dev})")
            return NA
        lines = output.splitlines()
        try:
            reader = csv.DictReader(lines, delimiter=',', restval=NA)
        except Exception as err:
            LOGGER.error("could not parse CSV string: %s", err)
            log_not_collected(f"amber info ({mst_dev})")
            return NA
        resolved_lines = []
        for raw_line_dict in reader:
            resolved = {}
            for k, v in raw_line_dict.items():
                if v != NA and v and k:
                    resolved[k] = v
            resolved_lines.append(resolved)
        if len(resolved_lines) == 0:
            LOGGER.warning("no lines found in CSV")
            log_not_collected(f"amber info ({mst_dev})")
            return NA
        return resolved_lines

class CablesCountersCollector(TimerCollector):
    """
    Collector for cables counters
    """
    def __init__(self, config):
        sampling_interval = config.get(CABLES_COUNTERS_SAMPLING_OPTION, "5m")
        super().__init__(sampling_interval)

    def _collect(self, gm):
        mst_devices = get_mst_devices()
        if not mst_devices:
            LOGGER.error("cannot collect cables info - no MST devices found")
            return {}
        mst_dev_template = "$MST_DEV"
        key_to_cmd_tempalte = {
            "cables_counters": f"mlxlink -d {mst_dev_template} --cable --dump --json",
            "cables_ddm_counters": f"mlxlink -d {mst_dev_template} --cable --ddm --json",
        }
        data = gm
        for mst_dev in mst_devices:
            data[mst_dev] = {}
            for key, cmd_template in key_to_cmd_tempalte.items():
                cmd = cmd_template.replace(mst_dev_template, mst_dev)
                if not add_output_from_json_command(cmd, data[mst_dev], key):
                    log_not_collected(f"{key} ({mst_dev})")
        return data

class PCIECountersCollector(TimerCollector):
    """
    Collector for PCIe counters
    """
    def __init__(self, config):
        sampling_interval = config.get(PCIE_COUNTERS_SAMPLING_OPTION, "5m")
        super().__init__(sampling_interval)

    def _collect(self, gm):
        mst_devices = get_mst_devices()
        if not mst_devices:
            LOGGER.error("cannot collect PCIe counters - no MST devices found")
            return {}
        data = gm
        for mst_dev in mst_devices:
            cmd = f"mlxlink -d {mst_dev} --port_type PCIE -c -e --json"
            if not add_output_from_json_command(cmd, data, mst_dev):
                log_not_collected(f"PCIE counters ({mst_dev})")
        return data

class NetIfCollector(TimerCollector):
    """
    Collector for Network Interface information
    """
    def __init__(self, config):
        sampling_interval = config.get(NET_IF_SAMPLING_OPTION, "30s")
        super().__init__(sampling_interval, self._post_collection_impl)
        self._collected = {}
        self._msg_metadata = {}
        self._is_first_collection = True

    def _post_collection_impl(self, links, metadata, section_name):
        """
        message logic is to send all packages on first iteration (FULL_TRANSFORM),
        and then send only diffs (PARTIAL_TRANSFORM)
        """
        ifnames = set()
        resolved = []
        metadata[MESSAGE_TYPE] = section_name
        if not links:
            return {}, metadata

        if not DIFF_ENABLED:
            metadata[TRANS_MODE] = FULL_TRANSFORM
        elif self._is_first_collection:
            self._is_first_collection = False
            # FULL_TRANSFORM is required only on first collection
            metadata[TRANS_MODE] = FULL_TRANSFORM

        for link in links:
            ifname = link['ifname']
            if DIFF_ENABLED:
                link[MESSAGE_TYPE] = section_name
                ifnames.add(ifname)
                if ifname not in self._collected:
                    resolved.append(link)
                    self._collected[ifname] = link
                else:
                    # look for oper_state modification
                    prev_oper_state = self._collected[ifname].get("oper_state")
                    curr_oper_state = link.get("oper_state")
                    if prev_oper_state != curr_oper_state:
                        resolved.append(link)
                        self._collected[ifname] = link
            else:
                # diff is not allowed - no need to track diffs
                resolved.append(link)

        if DIFF_ENABLED:
            # find removed
            removed = set(self._collected.keys()) - ifnames
            for r in removed:
                link_dict = self._collected[r]
                link_dict['deleted'] = True
                if LEGACY_MODE:
                    link_dict.updated(self._msg_metadata)
                resolved.append(link_dict)
                del self._collected[r]
        return resolved, metadata

    def _get_network_interfaces(self, gm):
        if CURRENT_OS == OS.CENT:
            log_centos_keys_missing("network-interfaces")
            return {}

        all_interfaces = get_network_interfaces()
        if len(all_interfaces) == 0:
            log_not_collected("network-interfaces")
            return {}

        resolved = []
        # add FEC if exists
        for j in all_interfaces:
            t = j.get("link_type", "")
            is_ethernet = "ether" in t.lower()
            if LEGACY_MODE and not is_ethernet:
                continue
            elif is_ethernet:
                # add FEC settings
                j["FEC"] = NA
                ifname = j.get("ifname", None)
                if ifname is None:
                    continue
                cmd = f"ethtool --show-fec {ifname}"
                parse_ethtool_output(cmd, j, "FEC", ifname)

            mac_address = j.pop("address")
            oper_state = j.pop("operstate")
            address = "address"
            if LEGACY_MODE:
                address = "mac_address"               
            j[address] = mac_address
            j['oper_state'] = oper_state
            j['deleted'] = False
            if 'NO-CARRIER' in j.get('flags', {}):
                j['down_reason'] = "no_carrier"
            else:
                j['down_reason'] = NA
            j['vrf'] = "default"
            if LEGACY_MODE:
                j['kind'] = gm.get(DEVICE_TYPE, NA)
                j.update(gm)
            resolved.append(j)

        return resolved

    def _collect(self, gm):
        self._msg_metadata = gm

        # collect default dictionaries as string
        data = self._get_network_interfaces(gm)
        if data is None or len(data) == 0:
            log_not_collected("Network interfaces information")
            return gm

        return data

class NetIfStatsCollector(TimerCollector):
    """
    Collector for Network Interface Statistics information
    """

    def __init__(self, config):
        sampling_interval = config.get(NET_IF_STATS_SAMPLING_OPTION, "30s")
        super().__init__(sampling_interval, self._post_collection_impl)
        self._collected = {}
        self._msg_metadata = {}
        self._is_first_collection = True

    def _post_collection_impl(self, links, metadata, section_name):
        """
        message logic is to send all packages on first iteration (FULL_TRANSFORM),
        and then send only diffs (PARTIAL_TRANSFORM)
        """
        ifnames = set()
        resolved = []
        metadata[MESSAGE_TYPE] = section_name
        if not links:
            return {}, metadata

        if not DIFF_ENABLED:
            metadata[TRANS_MODE] = FULL_TRANSFORM
        elif self._is_first_collection:
            self._is_first_collection = False
            # FULL_TRANSFORM is required only on first collection
            metadata[TRANS_MODE] = FULL_TRANSFORM

        for link in links:
            ifname = link['ifname']
            if DIFF_ENABLED:
                link[MESSAGE_TYPE] = section_name
                ifnames.add(ifname)
                if ifname not in self._collected:
                    resolved.append(link)
                    self._collected[ifname] = link
                else:
                    # check for field change
                    for k in link:
                        if k == "timestamp":
                            continue
                        collected_val = self._collected[ifname].get(k)
                        if link[k] != collected_val:
                            resolved.append(link)
                            self._collected[ifname] = link
                            break
            else:
                # diff is not allowed - no need to track diffs
                resolved.append(link)

        if DIFF_ENABLED:
            # find removed
            removed = set(self._collected.keys()) - ifnames
            for r in removed:
                link_dict = self._collected[r]
                link_dict['deleted'] = True
                if LEGACY_MODE:
                    link_dict.updated(self._msg_metadata)
                resolved.append(link_dict)
                del self._collected[r]
        return resolved, metadata

    def _get_network_interfaces_stats(self, gm):
        data = {}
        resolved = []
        if CURRENT_OS == OS.CENT:
            log_centos_keys_missing("network interfaces stats")
            return {}

        cmd = "ip -s -s --json link show"
        output = ConnectionHandler.get_command_output(cmd)
        if output:
            try:
                data = json.loads(output)
            except Exception as err:
                LOGGER.error(err)
        if len(data) == 0:
            log_not_collected("network-interfaces-stats")
            return data

        for j in data:
            if "ether" not in j.get('link_type', ""):
                continue
            stats = j.pop('stats64')
            j['deleted'] = False
            j['rx_broadcast'] = 0
            j['rx_frame'] = 0
            rx_stats = stats.get('rx', {})
            j['rx_bytes'] = rx_stats.get('bytes', NA)
            j['rx_packets'] = rx_stats.get('packets', NA)
            j['rx_drop'] = rx_stats.get('dropped', NA)
            j['rx_multicast'] = rx_stats.get('multicast', NA)
            j['rx_errs'] = rx_stats.get('errors', NA)
            j['rx_frame_errors'] = rx_stats.get('frame_errors', NA)
            j['rx_length_errors'] = rx_stats.get('length_errors', NA)
            j['rx_crc_errors'] = rx_stats.get('crc_errors', NA)
            j['rx_fifo_errors'] = rx_stats.get('fifo_errors', NA)

            j['tx_broadcast'] = 0
            j['tx_carrier'] = 0
            j['tx_multicast'] = 0
            tx_stats = stats.get('tx', {})
            j['tx_bytes'] = tx_stats.get('bytes', NA)
            j['tx_packets'] = tx_stats.get('packets', NA)
            j['tx_colls'] = tx_stats.get('collisions', NA)
            j['tx_drop'] = tx_stats.get('dropped', NA)
            j['tx_errs'] = tx_stats.get('errors', NA)
            j['tx_carrier_errors'] = tx_stats.get('carrier_errors', NA)
            j['tx_aborted_errors'] = tx_stats.get('aborted_errors', NA)
            if LEGACY_MODE:
                j.update(gm)
            resolved.append(j)

        return resolved

    def _collect(self, gm):
        self._msg_metadata = gm
        data = self._get_network_interfaces_stats(gm)
        if data is None or len(data) == 0:
            log_not_collected("Network Interfaces statistics information")
            return gm

        return data

class DomCollector(TimerCollector):
    """
    Collector for Dom information
    """

    def __init__(self, config):
        sampling_interval = config.get(DOM_SAMPLING_OPTION, "1h")
        super().__init__(sampling_interval, self._post_collection_impl)
        self._collected = {}
        self._msg_metadata = {}
        self._is_first_collection = True

    def _post_collection_impl(self, links, metadata, section_name):
        """
        message logic is to send all packages on first iteration (FULL_TRANSFORM),
        and then send only diffs (PARTIAL_TRANSFORM)
        """
        ifnames = set()
        resolved = {}
        metadata[MESSAGE_TYPE] = section_name
        if not links:
            return {}, metadata

        if not DIFF_ENABLED:
            metadata[TRANS_MODE] = FULL_TRANSFORM
        elif self._is_first_collection:
            self._is_first_collection = False
            # FULL_TRANSFORM is required only on first collection
            metadata[TRANS_MODE] = FULL_TRANSFORM

        if DIFF_ENABLED:
            for ifname, link in links.items():
                if not isinstance(link, dict):
                    continue
                link[MESSAGE_TYPE] = section_name
                ifnames.add(ifname)
                if ifname not in self._collected:
                    resolved[ifname] = link
                    self._collected[ifname] = link
            # find removed
            removed = set(self._collected.keys()) - ifnames
            for r in removed:
                link_dict = self._collected[r]
                link_dict['deleted'] = True
                if LEGACY_MODE:
                    link_dict.updated(self._msg_metadata)
                resolved.append(link_dict)
                del self._collected[r]
        else:
            resolved = links
        return resolved, metadata

    def _get_dom(self):
        all_interfaces = get_network_interfaces()
        if len(all_interfaces) == 0:
            log_not_collected("network-interfaces")
            return {}
        resolved = {}
        # add DOM if exists
        for j in all_interfaces:
            t = j.get("link_type", "")
            if "ether" not in t.lower():
                continue
            ifname = j.get("ifname", None)
            if ifname is None:
                continue
            data = {}
            cmd = f"ethtool -m {ifname}"
            parse_ethtool_output(cmd, data, ifname, ifname)
            if LEGACY_MODE:
                data.update(self._msg_metadata)
            if data.get(ifname, NA) != NA:
                resolved.update(data)
        return resolved

    def _collect(self, gm):
        if CURRENT_OS == OS.CENT:
            log_centos_keys_missing("network-interfaces")
            return {}
        self._msg_metadata = gm
        data = self._get_dom()
        if data is None or len(data) == 0:
            log_not_collected("DOM information")
        return data

class ContainersCollector(TimerCollector):
    """
    Collector for Containers information
    """

    def __init__(self, config):
        sampling_interval = config.get(CONTAINERS_SAMPLING_OPTION, "30S")
        super().__init__(sampling_interval, self._post_collection_impl)
        self._collected = {}
        self._msg_metadata = {}
        self._is_first_collection = True

    def _post_collection_impl(self, containers, metadata, section_name):
        """
        message logic is to send all packages on first iteration (FULL_TRANSFORM),
        and then send only diffs (PARTIAL_TRANSFORM)
        """
        id_set = set()
        resolved = []
        metadata[MESSAGE_TYPE] = section_name
        if not isinstance(containers, list):
            # no containers found
            return {}, metadata

        if not DIFF_ENABLED:
            metadata[TRANS_MODE] = FULL_TRANSFORM
        elif self._is_first_collection:
            self._is_first_collection = False
            # FULL_TRANSFORM is required only on first collection
            metadata[TRANS_MODE] = FULL_TRANSFORM

        for container in containers:
            cid = container.get('id')
            if DIFF_ENABLED:
                container[MESSAGE_TYPE] = section_name
                id_set.add(cid)
                if cid not in self._collected:
                    resolved.append(container)
                    self._collected[id] = container
            else:
                # diff is not allwoed - no need to track on diffs
                resolved.append(container)

        if DIFF_ENABLED:
            # find removed
            removed = set(self._collected.keys()) - id_set
            for r in removed:
                container_dict = self._collected[r]
                container_dict['deleted'] = True
                if LEGACY_MODE:
                    container_dict.update(self._msg_metadata)
                resolved.append(container_dict)
                del self._collected[r]
        return resolved, metadata

    @staticmethod
    def _parse_container_cpu(info):
        # first option
        try:
            cpu = info["runtimeSpec"]["linux"]["resources"]["cpu"]
        except Exception:
            cpu = None
            # second option
            try:
                resources = info["config"]["linux"]["resources"]
            except Exception:
                return None
            cpu_period = resources.get("cpu_period", None)
            cpu_quota = resources.get("cpu_quota", None)
            cpu_shares = resources.get("cpu_shares", None)
            if cpu_shares or cpu_quota or cpu_period:
                cpu = {
                    "period": cpu_period if cpu_period else NA,
                    "quota": cpu_quota if cpu_quota else NA,
                    "shares": cpu_shares if cpu_shares else NA,
                }
        return cpu

    @staticmethod
    def _parse_container_memory(info):
        # first option
        try:
            memory = info["config"]["linux"]["resources"]["memory_limit_in_bytes"]
        except Exception:
            # second option
            try:
                memory = info["runtimeSpec"]["linux"]["resources"]["memory"]["limit"]
            except Exception:
                memory = None
        return memory

    @staticmethod
    def _parse_container_resources(id):
        cmd = f"crictl inspect {id}"
        output = ConnectionHandler.get_command_output(cmd)
        try:
            inspect = json.loads(output)
        except Exception:
            LOGGER.warning("unexpected output from '%s'", cmd)
            return None
        info = inspect.get("info", None)
        if info is None:
            return None
        cpu = ContainersCollector._parse_container_cpu(info)
        if cpu is None:
            LOGGER.warning("could not get container ID %s cpu info", id)
        memory = ContainersCollector._parse_container_memory(info)
        if memory is None:
            LOGGER.warning("could not get container ID %s memory info", id)
        if cpu or memory:
            data = {
                "cpu":  cpu if cpu else NA,
                "memory": memory if memory else NA,
            }
        else:
            data = None
        return data

    @staticmethod
    def _parse_container_cpu_utilisation(id):
        cmd = f"crictl stats {id}"
        output = ConnectionHandler.get_command_output(cmd)
        try:
            cpu_util = output.split('\n')[1].split()[1]
        except Exception:
            LOGGER.warning("could not get '%s' container cpu utilisation", cmd)
            cpu_util = None
        return cpu_util

    @staticmethod
    def _parse_container_stats(id):
        cmd = f"crictl stats -o json {id}"
        output = ConnectionHandler.get_command_output(cmd)
        try:
            as_json = json.loads(output)
            stats = as_json["stats"][0]
            cpu_data = stats['cpu']
            cpu_data['utilisation'] = ContainersCollector._parse_container_cpu_utilisation(id)
        except Exception:
            LOGGER.warning("could not get '%s' container cpu data", cmd)
            stats = None
        return stats

    def _parse_container(self, id, info):
        state = info.get('state', "CONTAINER_UNKNOWN")
        if state == 'CONTAINER_RUNNING':
            resources = ContainersCollector._parse_container_resources(id)
            if resources is None:
                resources = {}
            stats = ContainersCollector._parse_container_stats(id)
            if stats is None:
                stats = {}
        else:
            resources = {}
            stats = {}

        stats_cpu = stats.get('cpu', {})
        stats_memory = stats.get('memory', {})
        stats_disk = stats.get('writableLayer', {})
        memory_limit = resources.get('memory', NA)

        cpu = resources.get('cpu', {})
        cpu_shares = cpu.get('shares', NA)
        cpu_period = cpu.get('period', NA)
        cpu_quota = cpu.get('quota', NA)
        cpu_utilisation = stats_cpu.get('utilisation', NA)

        memory = stats_memory.get('workingSetBytes', {})
        memory_used = memory.get('value', 0)

        disk = stats_disk.get('usedBytes', {})
        disk_used = disk.get('value', 0)
        
        metadata = info.get('metadata', {})
        name = metadata.get('name', NA)

        data = {
            "id": id,
            "name": name,
            "state": state.lower(),
            "deleted": False,
            "cpu_shares": cpu_shares,
            "cpu_period": cpu_period,
            "cpu_quota": cpu_quota,
            "cpu_utilisation": cpu_utilisation,
            "memory_limit": memory_limit,
            "memory_used": memory_used,
            "disk_used": disk_used
        }
        if LEGACY_MODE:
            data[DEVICE_TYPE] = self._msg_metadata.get(DEVICE_TYPE, NA)
        return data

    def _get_containers(self):
        key = "containers"
        cmd = "crictl ps -a -o json"
        output = ConnectionHandler.get_command_output(cmd)
        try:
            containers = json.loads(output)
        except Exception:
            LOGGER.warning("unexpected output from '%s'", cmd)
            log_not_collected(key)
            return {key: NA}
        data = []
        containers = containers.get("containers", [])
        for con in containers:
            id = con.get("id", None)
            if id is None:
                LOGGER.warning("'id' key is missing in '%s' output", cmd)
                continue
            parsed = self._parse_container(id, con)
            if LEGACY_MODE:
                parsed.update(self._msg_metadata)
            data.append(parsed)
        if not data:
            log_not_collected(key)
        return data

    def _collect(self, gm):
        self._msg_metadata = gm
        data = self._get_containers()
        if data is None or len(data) == 0:
            log_not_collected("Containers information")
            return gm
        return data

class IbDevicesCollector(TimerCollector):
    """
    Collector for ib devices info
    """
    def __init__(self, config):
        sampling_interval = config.get(IB_DEVICES_SAMPLING_OPTION, "20m")
        super().__init__(sampling_interval)

    def _collect(self, gm):
        ip_addr_info = self.get_ibstat_info()
        if not ip_addr_info:
            LOGGER.error("cannot collect ibstat ")
            return {}
        data = gm
        data.update({"devices": ip_addr_info})
        return data

    @classmethod
    def get_ibstat_info(cls):
        data = {}
        rdma_device = ""
        port = ""
        output = ConnectionHandler.get_command_output('ibstat')
        if output is not None:
            for line in output.splitlines():
                res = line.strip().split(':')
                if len(res) == 1:
                    match = re.search(r'(mlx\d+_\d+)', res[0])
                    if match:
                        rdma_device = match.group(1)
                    else:
                        rdma_device = res[0]
                    data.update({rdma_device.strip(): {}})
                    port = ""
                elif len(res) == 2 and res[1] == '' and 'Port' in res[0]:
                    port = res[0].strip()
                    data[rdma_device].update({port: {}})
                elif len(res) == 2:
                    field = res[0].strip()
                    field_value = res[1].strip()
                    if port:
                        data[rdma_device][port].update({field: field_value})
                    else:
                        data[rdma_device].update({field: field_value})
        return data


class RoCECollector(TimerCollector):
    """
    Collector for RoCE config info
    """

    def __init__(self, config):
        sampling_interval = config.get(ROCE_SAMPLING_OPTION, "1h")
        super().__init__(sampling_interval)

    def _collect(self, gm):
        roce_info = self.get_roce_config_info()
        if not roce_info:
            LOGGER.error("cannot collect roce config info ")
            return {}
        data = gm
        data["roce_info"] = roce_info
        return data

    @classmethod
    def get_roce_config_info(cls):
        data = []
        bus_rdma_interface_list = get_map_bus_rdma_and_interface()
        for info in bus_rdma_interface_list:
            port_data_info = {}
            bus = info.get('bus', '')
            rdma_device = info.get('rdma', '')
            interface = info.get('interface', '')
            if bus:
                match = re.search(r'\.(\d+)', bus)
                if match:
                    port = str(int(match.group(1)) + 1)
                    ident_data = {"bus": bus, "rdma_device": rdma_device, "port": port, "interface": interface}
                    res = get_mlxconfig_query(bus=bus)
                    if res:
                        # cnp priority
                        cnp_res = cls.get_cnp_priority(port=port, interface=interface, mlxconfig_output=res)
                        port_data_info.update({"CNP_Priority": cnp_res})

                        # ECN DCQCN
                        ecn_dcqcn_info = cls.get_ecn_dcqcn(port=port, interface=interface, mlxconfig_output=res)
                        port_data_info.update({"ECN_DCQCN": ecn_dcqcn_info})
                    # roce mode
                    roce_mode_info = cls.get_roce_mode(rdma_device=rdma_device)
                    port_data_info.update({"RoCE_Mode": roce_mode_info})

                    # roce tos
                    roce_tos_info = cls.get_roce_tos(rdma_device=rdma_device)
                    port_data_info.update({"RoCE_TOS": roce_tos_info})

                    # DSCP
                    dscp_info = cls.get_dscp_traffic_class(rdma_device=rdma_device)
                    port_data_info.update({"DSCP": {"traffic_class": dscp_info}})

                    port_data_info.update(ident_data)
            data.append(port_data_info)
        return data

    @classmethod
    def get_roce_mode(cls, rdma_device):
        mode = ''
        res = ConnectionHandler.get_command_output(cmd=f'cma_roce_mode -d {rdma_device}')
        if res is not None:
            mode = res.strip()
        return mode

    @classmethod
    def get_roce_tos(cls, rdma_device):
        tos = ''
        res = ConnectionHandler.get_command_output(cmd=f'cma_roce_tos -d {rdma_device}')
        if res is not None:
            tos = res.strip()
        return tos

    @classmethod
    def get_dscp_traffic_class(cls, rdma_device):
        tc = ''
        res = ConnectionHandler.get_command_output(cmd=f'cat /sys/class/infiniband/{rdma_device}/tc/1/traffic_class')
        if res is not None:
            tc = res.splitlines()
        return tc

    @classmethod
    def get_cnp_priority(cls, port, interface, mlxconfig_output):
        data = {}
        fields = [f'CNP_DSCP_P{port}', f'CNP_802P_PRIO_P{port}']
        for field in fields:
            match = re.search(rf'{field}\s+(\S+)', mlxconfig_output)
            if match:
                field_value = match.group(1)
                data.update({field: field_value})

        dscp_cnp_data = {"ecn_roce_np_cnp_dscp": f'/sys/class/net/{interface}/ecn/roce_np/cnp_dscp',
                         "ecn_roce_np_cnp_802p_prio": f'/sys/class/net/{interface}/ecn/roce_np/cnp_802p_prio'}
        for k, p in dscp_cnp_data.items():
            res = ConnectionHandler.get_command_output(cmd=f'cat {p}')
            if res is not None:
                cnp_value = res.strip()
                data.update({f'{k}': cnp_value})
        return data

    @classmethod
    def get_ecn_dcqcn(cls, port, interface, mlxconfig_output):
        data = {}

        field = f'ROCE_CC_PRIO_MASK_P{port}'
        match = re.search(rf'{field}\s+(\S+)', mlxconfig_output)
        if match:
            field_value = match.group(1)
            data.update({field: field_value})
        ecn_dcqcn_np_rp_data = {"ecn_roce_np_enable": f'/sys/class/net/{interface}/ecn/roce_np/enable/',
                                "ecn_roce_rp_enable": f'/sys/class/net/{interface}/ecn/roce_rp/enable/'}
        for k, p in ecn_dcqcn_np_rp_data.items():
            for i in range(0,8):
                res = ConnectionHandler.get_command_output(cmd=f'cat {p}{i}')
                if res is not None:
                    enable_i_value = res.strip()
                    data.update({f'{k}_{i}': enable_i_value})
        return data


class MainCollector:
    """
    Class to iterate over collectors and return collected data list
    """
    def __init__(self):
        self._collected_data = []

    def collect(self):
        for section, collector in SECTION_TO_COLLECTOR.items():
            try:
                data = collector.collect(section)
                if data:
                    self._collected_data.append(data)
            except Exception as err:
                LOGGER.error(f"while collecting {section} data: {err}")
        return self._collected_data

def set_section_to_collector(config):
    global SECTION_TO_COLLECTOR
    all_section_to_collector = {
        "Inventory": InventoryCollector(config),
        "Node": NodeCollector(config),
        "PackageInfo": PackInfoCollector(config),
        "ResourceUtil": ResUtilCollector(config),
        "PackageInfoHash": HashCollector(),
        "InventoryHash": HashCollector(),
        "HwCounters": HwCountersCollector(config),
        "AmberInfo": AmberInfoCollector(config),
        "CablesCounters": CablesCountersCollector(config),
        "PCIECounters": PCIECountersCollector(config),
        "NetIf": NetIfCollector(config),
        "NetIfStats": NetIfStatsCollector(config),
        "DOM": DomCollector(config),
        "Containers": ContainersCollector(config),
        "IBDevices": IbDevicesCollector(config),
        "RoCE": RoCECollector(config),
    }
    disabled_components = config.get("disabled", "")
    for section, collector in all_section_to_collector.items():
        if section.lower() in disabled_components:
            continue
        SECTION_TO_COLLECTOR[section] = collector

    # link package info collector with package info hash collector
    package_info = SECTION_TO_COLLECTOR.get("PackageInfo", None)
    package_info_hasher = SECTION_TO_COLLECTOR.get("PackageInfoHash", None)
    if package_info and package_info_hasher:
        package_info.set_hash_collector(package_info_hasher)
    elif package_info_hasher and package_info is None:
        del SECTION_TO_COLLECTOR["PackageInfoHash"]

    # link inventory collector with inventory hash collector
    inventory = SECTION_TO_COLLECTOR.get("Inventory", None)
    inventory_hasher = SECTION_TO_COLLECTOR.get("InventoryHash", None)
    if inventory and inventory_hasher:
        inventory.set_hash_collector(inventory_hasher)
    elif inventory_hasher and inventory is None:
        del SECTION_TO_COLLECTOR["InventoryHash"]

def set_transform(config):
    transform_text = bool_from_dict(config, TRANSFORM_TEXT_OPTION, False)
    if transform_text:
        global FULL_TRANSFORM, PARTIAL_TRANSFORM
        FULL_TRANSFORM    = "full"
        PARTIAL_TRANSFORM = "partial"

def set_absolute_collection(config):
    absolute = bool_from_dict(config, ABSOLUTE_OPTION, False)
    if absolute:
        global DIFF_ENABLED
        DIFF_ENABLED = False

def set_output_format(config):
    global LEGACY_MODE
    LEGACY_MODE = bool_from_dict(config, LEGACY_MODE_OPTION, LEGACY_MODE)

def set_logging(config):
    global LOG_LEVEL_OPTION
    num = config.get(LOG_LEVEL_OPTION, 3)
    num = int(num)
    if num >= 7:
        level = logging.DEBUG
    elif num >= 6:
        level = logging.INFO
    elif num >= 4:
        level = logging.WARNING
    elif num >= 3:
        level = logging.ERROR
    else:
        level = logging.NOTSET
    logging.basicConfig(level=level,format='%(asctime)s   %(name)-17s   %(levelname)-8s   %(message)s')

def set_device_type(config):
    device_type = config.get(DEVICE_TYPE_OPTION, None)
    if device_type is not None:
        GeneralDataCollector.set_device_type(device_type)

def init(config):
    global SECTION_TO_COLLECTOR, CURRENT_OS, LOGGER
    set_logging(config)
    if os.geteuid() != 0:
        LOGGER.error("Missing root privileges. Please rerun script as root")
        return False
    set_transform(config)
    set_absolute_collection(config)
    set_output_format(config)
    set_device_type(config)
    grpc = bool_from_dict(config, GRPC_OPTION, False)
    try:
        ConnectionHandler.set(grpc=grpc)
    except PrivilegedExecuterException as pe_exp:
        LOGGER.error(str(pe_exp))
        return False
    if not GeneralDataCollector.set_hostname():
        LOGGER.error("Cannot set node name")
        return False
    if not SECTION_TO_COLLECTOR:
        set_section_to_collector(config)
    if not CURRENT_OS:
        txt = ConnectionHandler.get_command_output("cat /etc/os-release")
        if txt:
            txt = txt.split('\n')
            for line in txt:
                low = line.lower()
                if low.startswith("id="):
                    if "ubuntu" in low:
                        CURRENT_OS = OS.UBUNTU
                    elif "cent" in low:
                        CURRENT_OS = OS.CENT
                    elif "rhel" in low:
                        CURRENT_OS = OS.CENT
                    else:
                        break
                    return True
    LOGGER.error("Unsupported OS")
    return False

def collect(config):
    if CURRENT_OS is None:
        if not init(config):
            return {}
    collector = MainCollector()
    data = collector.collect()
    return data


def get_tag():
    return "inventory"

def get_options_description():
    global OPTIONS_NAME_TO_DESCRIPTION
    return OPTIONS_NAME_TO_DESCRIPTION

if __name__ == "__main__":
    print(json.dumps(collect({"log-level": 7, "absolute": True}))) # debug
