// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: BSD-3-Clause

#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdio.h>

#ifndef VFIO_MLX5_H
#define VFIO_MLX5_H

#define VFIO_MLX5_MAX_DEVICES 8

/**
 * @struct vfio_mlx5_handle
 * @brief Opaque handle for VFIO MLX5 context.
 */
struct vfio_mlx5_handle;

/**
 * @brief Initialize mlx5 handle and persistent storage memory layout.
 *
 * @param persistent_storage Persistent storage for all devices managed by this handle.
 * @param storage_len Length of the persistent storage.
 * @param iova IOVA, base DMA address of the whole persistent storage.
 * @param max_devices Max Number of expected devices up to VFIO_MLX5_MAX_DEVICES.
 *
 * @return vfio mlx5 handle.
*/
struct vfio_mlx5_handle *vfio_mlx5_init(void *persistent_storage,
					size_t storage_len, uint64_t iova,
					unsigned int max_devices);

/**
 * @brief Uninitialize the handle
 *
 * Note: All devices must have been removed prior to calling this function.
 *
 * @param vmh vfio mlx5 handle to uninitialize
*/
void vfio_mlx5_uninit(struct vfio_mlx5_handle *vmh);

/**
 * @brief Add a mlx5 device (PF)
 *
 * Note: Device must be already bound to vfio driver
 *       This function will add the device and boot it up,
 *       The Device will be activated and ready to use.
 *       Make sure to setup and poll VFIO msix interrupts evntfd and for vector 0
 *       (Async events vector of mlx5) and call vfio_mlx5_events_process()
 *       on every interrupt, alternatively,
 *       just periodically call vfio_mlx5_events_process()
 *
 * @param vmh mlx5 context to add the device to
 * @param bdf VFIO device PCI BDF
 * @param device_fd VFIO device file descriptor
 * @param num_vfs Number of VFs to enable
 *
 * @return vfio mlx5 device handle
*/
struct vfio_mlx5_dev *vfio_mlx5_device_add(struct vfio_mlx5_handle *vmh,
					   const char *bdf, int device_fd,
					   int num_vfs);

/**
 * @brief Delete a device (PF) from the mlx5 context
 *
 * @param dev mlx5 device to delete
*/
void vfio_mlx5_device_del(struct vfio_mlx5_dev *dev);

/**
 * @enum VFIO_MLX5_IRQ_VEC_IDX
 * @brief The device MSI-X vector index for page management events queue.
 *        If interrupt/epoll mode is used, this vector must be set up and polled
 *        for events.
 */
#define VFIO_MLX5_IRQ_VEC_IDX 0

/**
 * @brief Process async events
 *
 * Handles device page management async events
 *
 * polling mode: Can be called periodically to process events, without setting
 *               up an eventfd or epoll for the MSI-X vector.
 *
 * interrupt mode: Must be called on every MSI-X interrupt generated on vfio's
 *     eventfd for vector `VFIO_MLX5_IRQ_VEC_IDX`.
 *
 * Note: calling this function upon receiving an interrupt is mandatory,
 *       otherwise the driver will not be able to process further events
 *       as the device will not generate new interrupts until armed.
 *
 * @param dev mlx5 device to process events for
 * @return 0 or num processed events on success,
 *         < 0 catastrophic error:
 *             -EIO: PCI communication error
 */
int vfio_mlx5_events_process(struct vfio_mlx5_dev *dev);

/**
 * @enum mlx5_fwp_event
 * @brief Firmware pages events
 *
 * This enum defines the events related to firmware pages management.
 * It is used for tracking the state of page allocation and transfer
 * between the driver and the firmware.
 */
enum mlx5_fwp_event {
	FWP_EVENT_GIVE_BOOT, /* Driver gives pages to FW at boot */
	FWP_EVENT_FW_REQ_GIVE, /* FW request driver to give pages */
	FWP_EVENT_GIVE, /* Driver gives pages to FW */
	FWP_EVENT_GIVE_PARTIAL, /* Driver gives partial pages to FW */
	FWP_EVENT_GIVE_SUCCESS, /* Driver successfully gave pages to FW */
	FWP_EVENT_GIVE_ERROR, /* Driver failed to give pages to FW */
	FWP_EVENT_CANT_GIVE, /* Driver can't give pages to FW */
	FWP_EVENT_CANT_GIVE_SUCCESS, /* Driver successfully can't give pages to FW */
	FWP_EVENT_CANT_GIVE_ERROR, /* Driver failed to notify can't give to FW */
	FWP_EVENT_FW_REQ_TAKE, /* FW request driver to take pages */
	FWP_EVENT_TAKE, /* Driver takes pages from FW */
	FWP_EVENT_TAKE_SUCCESS, /* Driver successfully took pages from FW */
	FWP_EVENT_TAKE_ERROR, /* Driver failed to take pages from FW */
	FWP_EVENT_FW_REQ_DROP_ERROR, /* Driver drops a fw request */
	FWP_EVENT_FW_REQ_BUSY, /* Driver is busy, request pending in fifo */
	/* reminder: update fwpages_event_names[] */
	FWP_EVENT_COUNT, /* current supported events count */
	FWP_EVENT_MAX = 32, /* ABI For future expansion */
};

/**
 * @typedef mlx5_pg_events_t
 * @brief Firmware pages events array
 *
 * This array holds the count of each event defined in mlx5_fwp_event.
 */
typedef uint64_t mlx5_pg_events_t[FWP_EVENT_MAX];

enum cmd_events { // ABI, do not change order
	CMD_EVENT_EXEC, /* Busy poll command execution on dev add/del */
	CMD_EVENT_POST, /* Post command to device */
	CMD_EVENT_COMP, /* Command completion */
	CMD_EVENT_POST_ERROR, /* Error posting command */
	CMD_EVENT_COMP_ERROR, /* Error completing command */
	CMD_EVENT_COMP_OUT_FAIL, /* cmd completed bug failed at FW level */
	CMD_EVENT_TIMEOUT_ERROR, /* Command timeout */
	CMD_EVENT_GROW, /* Command in/out grow (alloc extra pages) */
	CMD_EVENT_GROW_FAIL, /* Failed to grow command in/out */
	CMD_EVENT_COPY_ERROR, /* Failed to copy command in/out */
	CMD_EVENT_BUSY_ERROR, /* trying to post a command to a busy slot */
	CMD_EVENT_COUNT,

	// reminder: update cmd_event_names[] in cmd.c

	CMD_EVENT_MAX = 32, // room for future events
};

/**
 * @typedef mlx5_cmd_stats_t
 * @brief Command statistics array
 *
 * This array holds the count of each event defined in mlx5_cmd_event.
 */
typedef uint64_t mlx5_cmd_stats_t[CMD_EVENT_MAX];

/**
 * @typedef mlx5_pg_alloc_stats_t
 * @brief Page allocation statistics
 *
 * This structure holds the statistics related to the internal page allocator.
 */
typedef struct {
	uint64_t total_pages; /* Total pages managed by the allocator */
	uint64_t free_pages; /* Pages available for allocation */
	uint64_t allocs; /* Allocations made by the allocator */
	uint64_t frees; /* Frees made by the allocator */
	uint64_t double_frees; /* Double frees detected */
	uint64_t allocs_failed; /* Allocations failed due to no free pages */
	uint64_t reserved[8]; /* room for future events */
} mlx5_pg_alloc_stats_t;

/**
 * @struct mlx5_health_record
 * @brief Internal FW health record/snapshot
 *
 * This structure holds the internal FW health record/snapshot for a device
 */
struct mlx5_health_record {
	uint64_t time_stamp;
	uint64_t hw_timestamp; /* mlx5_bar->timer_h/l */
	uint32_t sensor;
	uint32_t assert_var[5];
	uint32_t assert_exit_ptr;
	uint32_t assert_callra;
	uint32_t time;
	uint32_t hw_id;
	uint32_t severity;
	uint32_t rfr;
	uint32_t irisc_index;
	uint32_t synd;
	uint32_t ext_synd;
	uint32_t fw_ver;
	uint32_t reserved[8];
};

struct mlx5_dev_info {
	uint8_t state;
	uint16_t index;
	uint16_t num_vfs;
	uint32_t reserved[30];
};

/**
 * @struct mlx5_dev_stats
 * @brief Device stats structure
 *
 * This structure holds device statistics including page events and allocation stats.
 */
struct mlx5_dev_stats {
	struct mlx5_dev_info dev_info;
	mlx5_pg_events_t page_events;
	mlx5_pg_alloc_stats_t page_stats;
	mlx5_cmd_stats_t cmd_stats;
	uint64_t firmware_pages; /* Pages held/used in firmware */
	uint64_t driver_pages; /* Pages used by driver */
	struct mlx5_health_record health_rec;
};

/**
 * @brief Get statistics from a device (PF), and poll health record
 *
 * @param dev vfio mlx5 device to get stats from
 * @param stats Pointer to mlx5_dev_stats structure to fill with stats.
 *        Always valid, even on error
 *
 * @return 0 on success, < 0 on error (e.g. health check failed)
 */
int vfio_mlx5_dev_stats(struct vfio_mlx5_dev *dev,
			struct mlx5_dev_stats *stats);

/**
 * @brief Dump statistics from a device (PF) on library's 'outfd'
 *
 * @param dev vfio mlx5 device to dump stats from
 * @param stats Pointer to mlx5_dev_stats structure to dump
 *        If NULL, get current stats from device and dump them
 */
void vfio_mlx5_dev_stats_dump(struct vfio_mlx5_dev *dev,
			      struct mlx5_dev_stats *stats);

/**
 * @enum vfio_mlx5_log_level
 * @brief Logging levels for VFIO MLX5
 *
 * This enum defines the logging levels for the VFIO MLX5 library.
 */
enum vfio_mlx5_log_level {
	MLX5_LOG_LVL_NONE = 0, /* No logging */
	MLX5_LOG_LVL_CRIT = 10, /* Critical logging */
	MLX5_LOG_LVL_ERR = 20, /* Error logging */
	MLX5_LOG_LVL_WARN = 30, /* Warning logging */
	MLX5_LOG_LVL_INFO = 40, /* Informational logging */
	MLX5_LOG_LVL_DEBUG = 50, /* Debug logging */
	MLX5_LOG_LVL_TRACE = 60, /* Trace logging */
	MLX5_LOG_LVL_MAX = 90, /* Maximum log level */
};

/**
 * @brief Set logging (Optional)
 *
 * This function allows to set custom logging level and output file descriptors.
 * The library starts by default with logging level MLX5_LOG_LVL_INFO
 * and outputs logs to stdout and stderr.
 *
 * @param level Logging level to set, one of enum vfio_mlx5_log_level
 * @param outf Output file pointer for logging
 * @param errf Error file pointer for logging
 */
void vfio_mlx5_log_set(enum vfio_mlx5_log_level level, FILE *outf, FILE *errf);

/**
 * @brief Suspend a device (PF)
 *
 * Note: Per-device thread safe.
 *
 * @param dev mlx5 device to suspend
 *
 * @return 0 on success, < 0 on error
 */
int vfio_mlx5_device_suspend(struct vfio_mlx5_dev *dev);

/**
 * @brief Suspend the mlx5 context
 *
 * Note: This function will suspend all non-suspended devices in the context.
 *
 * @param vmh mlx5 context to suspend
 *
 * @return 0 on success, < 0 on error
 */
int vfio_mlx5_suspend(struct vfio_mlx5_handle *vmh);

/**
 * @brief Resume the mlx5 context
 *
 * Note: Doesn't resume devices
 *
 * @param vmh mlx5 context to resume
 *
 * @return vfio mlx5 handle on success, NULL on error
 */
struct vfio_mlx5_handle *vfio_mlx5_resume(void *persist_storage);

/**
 * @brief Resume a device (PF) from the mlx5 context
 *
 * Note: Per-device thread safe
 *
 * @param vmh mlx5 context to resume
 * @param dev mlx5 device to resume
 * @param device_fd (new) VFIO device file descriptor
 *
 * @return 0 on success, < 0 on error
 */
int vfio_mlx5_device_resume(struct vfio_mlx5_handle *vmh,
			    struct vfio_mlx5_dev *dev, int device_fd);

/**
 * @brief Get a device from the mlx5 context
 *
 * Note: Must be used after vfio_mlx5_resume() to get the new device handle
 *       from the new context.
 *
 * @param vmh mlx5 context to get the device from
 * @param index The index of the device to get
 *
 * @return vfio mlx5 device handle
 */
struct vfio_mlx5_dev *vfio_mlx5_dev_get(struct vfio_mlx5_handle *vmh,
					uint32_t index);

/**
 * @brief Get a device's unique index from a handle
 *
 * @param dev mlx5 device handle
 * @return unsigned int, index of the device
 */
unsigned int vfio_mlx5_dev_index(const struct vfio_mlx5_dev *dev);

#endif
