// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: BSD-3-Clause

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <stdbool.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/eventfd.h>
#include <sys/ioctl.h>
#include <sys/epoll.h>
#include <linux/vfio.h>
#include <sched.h>
#include "pagealloc.h"
#include "util/mmio.h"
#include "util/util.h"
#include "fwpages.h"
#include "dev.h"
#include "cmd.h"
#include "ifc.h"

/*
 * Macro to traverse all command blocks in the inplace linked list.
 * @blk_ptr: pointer variable to struct mlx5_cmd_block*
 * @blocks: pointer to struct mlx5_cmd_blocks
 * @dev:     pointer to the device structure
 *
 * Example usage:
 *   struct mlx5_cmd_block *blk;
 *   cmd_blk_foreach(blk, blocks, iova2vaddr) {
 *       // use blk
 *   }
 */
#define cmd_blk_foreach(blk_ptr, blocks, dev)                         \
	for (blk_ptr = (struct mlx5_cmd_block *)iova2vaddr(           \
		     dev, (blocks)->frst_blk_iova);                   \
	     blk_ptr != NULL;                                         \
	     blk_ptr = (blk_ptr->next != 0) ?                         \
			       (struct mlx5_cmd_block *)iova2vaddr(   \
				       dev, be64toh(blk_ptr->next)) : \
			       NULL)

/* Helper function to allocate a page and return first block */
static struct mlx5_cmd_block *page_alloc_get_block(struct vfio_mlx5_dev *dev,
						   uint64_t *blk_iova)
{
	struct mlx5_cmd_block *block;
	int ret;

	ret = mlx5_vfio_page_alloc(dev->page_alloc, blk_iova);
	if (ret) {
		dev_err(dev,
			"Failed to allocate page for command block, err(%d)",
			ret);
		return NULL;
	}

	block = iova2vaddr(dev, *blk_iova);
	memset(block, 0, MLX5_ADAPTER_PAGE_SIZE);

	return block;
}

#define BLOCK_STRIDE_SIZE (1024)

static void mlx5_vfio_cmd_blocks_free(struct vfio_mlx5_dev *dev,
				      struct mlx5_cmd_blocks *blocks)
{
	struct mlx5_cmd_block *block;

	cmd_blk_foreach(block, blocks, dev)
	{
		uint64_t iova = vaddr2iova(dev, block);
		if (iova & (MLX5_ADAPTER_PAGE_SIZE - 1))
			continue;
		mlx5_vfio_page_free(dev->page_alloc, iova);
	}

	dev_dbg(dev, "Freed command blocks, num blocks %u", blocks->num_blocks);
	blocks->num_blocks = 0;
}

static int mlx5_vfio_cmd_blocks_alloc(struct vfio_mlx5_dev *dev,
				      struct mlx5_cmd_blocks *blocks,
				      uint32_t num_blocks)
{
	struct mlx5_cmd_block *prev_block;
	struct mlx5_cmd_block *block;
	uint32_t npages = 0;
	uint64_t iova;
	uint32_t i;

	prev_block = page_alloc_get_block(dev, &blocks->frst_blk_iova);
	if (!prev_block) {
		dev_err(dev, "Failed to allocate first command block");
		return -ENOMEM;
	}
	npages++;
	iova = blocks->frst_blk_iova;

	for (i = 1; i < num_blocks; i++) {
		if ((iova + BLOCK_STRIDE_SIZE) & (MLX5_ADAPTER_PAGE_SIZE - 1)) {
			iova += BLOCK_STRIDE_SIZE;
			block = iova2vaddr(dev, iova);
		} else {
			block = page_alloc_get_block(dev, &iova);
			if (!block) {
				dev_err(dev,
					"Failed to allocate command block %d of %d",
					i + 1, num_blocks);
				goto err_alloc;
			}
			npages++;
		}

		block->block_num = htobe32(i);
		prev_block->next = htobe64(iova);
		prev_block = block;
	}

	dev_dbg(dev, "Allocated %d command blocks, iova %lx npages %d",
		num_blocks, blocks->frst_blk_iova, npages);
	blocks->num_blocks = num_blocks;
	return 0;

err_alloc:
	cmd_blk_foreach(block, blocks, dev)
	{
		iova = vaddr2iova(dev, block);
		if (iova & (MLX5_ADAPTER_PAGE_SIZE - 1))
			continue;
		mlx5_vfio_page_free(dev->page_alloc, iova);
	}

	blocks->num_blocks = 0;
	return -ENOMEM;
}

/* num_blocks needed for a specific msg_len */
static int mlx5_msg_num_blocks(uint32_t msg_len)
{
	int size = msg_len;
	int blen = size - min_t(int, 16, size);

	return DIV_ROUND_UP(blen, MLX5_CMD_DATA_BLOCK_SIZE);
}

/* Just for verbosity, this is always true as it is a HW requirement */
_Static_assert(sizeof(((struct mlx5_cmd_layout *)0)->in) ==
		       sizeof(((struct mlx5_cmd_layout *)0)->out),
	       "Size of 'in' and 'out' fields must be equal");

static size_t cmd_msg_size(const struct mlx5_cmd_blocks *blocks)
{
	/* size of a command slot in/out is: inline data + block data*/
	/* inline data is always 16B and block data is a list of 512B chunks */
	return sizeof(((struct mlx5_cmd_layout *)0)->in) +
	       (blocks->num_blocks) * MLX5_CMD_DATA_BLOCK_SIZE;
}

static int mlx5_vfio_cmd_grow(struct vfio_mlx5_dev *dev,
			      struct mlx5_cmd_blocks *blocks, size_t new_size)
{
	struct mlx5_cmd_blocks new_blocks = {};
	uint32_t new_num_blocks;
	int err, i = 0;

	new_num_blocks = mlx5_msg_num_blocks(new_size);

	if (new_num_blocks <= blocks->num_blocks)
		return 0; /* no need to grow */

	uint32_t delta_blocks = new_num_blocks - blocks->num_blocks;

	cmd_event_dbg(
		dev, GROW,
		"from %lu to %lu bytes curr_blocks %u, additional blocks needed %u",
		cmd_msg_size(blocks), new_size, blocks->num_blocks,
		delta_blocks);

	/* allocate the new blocks on the side new_blocks variable */
	err = mlx5_vfio_cmd_blocks_alloc(dev, &new_blocks, delta_blocks);
	if (err) {
		cmd_event_wrn(
			dev, GROW_FAIL,
			"Failed to allocate %d new command blocks, err(%d)",
			delta_blocks, err);
		return err;
	}

	struct mlx5_cmd_block *block;

	/* find the last block of the existing blocks */
	cmd_blk_foreach(block, blocks, dev)
	{
		if (block->next == 0)
			break;
	}

	/* link the last block of the existing blocks to the new blocks */
	block->next = htobe64(new_blocks.frst_blk_iova);

	/* update the block index of the new blocks */
	cmd_blk_foreach(block, &new_blocks, dev)
		block->block_num = htobe32(blocks->num_blocks + i++);

	blocks->num_blocks = new_num_blocks;
	return 0;
}

static inline struct mlx5_cmd_layout *
mlx5_cmd_slot_layout(struct vfio_mlx5_dev *dev, unsigned int slot)
{
	void *cmdif_vaddr = iova2vaddr(dev, dev->cmd.iova);

	return (struct mlx5_cmd_layout *)((char *)cmdif_vaddr +
					  (slot * (1 << dev->cmd.log_stride)));
}

/* Never return -EAGAIN in here */
static int mlx5_cmd_status_to_err(uint8_t status)
{
	switch (status) {
	case MLX5_CMD_STAT_OK:
		return 0;
	case MLX5_CMD_STAT_INT_ERR:
		return -EIO;
	case MLX5_CMD_STAT_BAD_OP_ERR:
		return -EINVAL;
	case MLX5_CMD_STAT_BAD_PARAM_ERR:
		return -EINVAL;
	case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
		return -EIO;
	case MLX5_CMD_STAT_BAD_RES_ERR:
		return -EINVAL;
	case MLX5_CMD_STAT_RES_BUSY:
		return -EBUSY;
	case MLX5_CMD_STAT_LIM_ERR:
		return -ENOMEM;
	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
		return -EINVAL;
	case MLX5_CMD_STAT_IX_ERR:
		return -EINVAL;
	case MLX5_CMD_STAT_NO_RES_ERR:
		return -ENOMEM;
	case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
		return -EIO;
	case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
		return -EIO;
	case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
		return -EINVAL;
	case MLX5_CMD_STAT_BAD_PKT_ERR:
		return -EINVAL;
	case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
		return -EINVAL;
	default:
		return -EIO;
	}
}

static const char *cmd_status_str(uint8_t status)
{
	switch (status) {
	case MLX5_CMD_STAT_OK:
		return "OK";
	case MLX5_CMD_STAT_INT_ERR:
		return "internal error";
	case MLX5_CMD_STAT_BAD_OP_ERR:
		return "bad operation";
	case MLX5_CMD_STAT_BAD_PARAM_ERR:
		return "bad parameter";
	case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
		return "bad system state";
	case MLX5_CMD_STAT_BAD_RES_ERR:
		return "bad resource";
	case MLX5_CMD_STAT_RES_BUSY:
		return "resource busy";
	case MLX5_CMD_STAT_LIM_ERR:
		return "limits exceeded";
	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
		return "bad resource state";
	case MLX5_CMD_STAT_IX_ERR:
		return "bad index";
	case MLX5_CMD_STAT_NO_RES_ERR:
		return "no resources";
	case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
		return "bad input length";
	case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
		return "bad output length";
	case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
		return "bad QP state";
	case MLX5_CMD_STAT_BAD_PKT_ERR:
		return "bad packet (discarded)";
	case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
		return "bad size too many outstanding CQEs";
	default:
		return "unknown status";
	}
}

#define CMD_STR_SZ 32

static const char *cmd_str(const void *in, char *buf)
{
	uint32_t opcode = DEVX_GET(mbox_in, in, opcode);
	uint32_t op_mod = DEVX_GET(mbox_in, in, op_mod);

	snprintf(buf, CMD_STR_SZ, "opcode(0x%x), op_mod(0x%x)", opcode, op_mod);
	return buf;
}

static int mlx5_vfio_cmd_check(struct vfio_mlx5_dev *dev, const void *in,
			       const void *out)
{
	uint32_t syndrome;
	uint16_t opcode;
	uint16_t op_mod;
	uint8_t status;
	int ret;

	mlx5_cmd_mbox_status(out, &status, &syndrome);
	if (!status)
		return 0;

	opcode = DEVX_GET(mbox_in, in, opcode);
	op_mod = DEVX_GET(mbox_in, in, op_mod);

	ret = mlx5_cmd_status_to_err(status);

	cmd_event_wrn(
		dev, COMP_OUT_FAIL,
		"opcode(0x%x), op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), ret(%d)",
		opcode, op_mod, cmd_status_str(status), status, syndrome, ret);

	return ret;
}

/* assumption: size is always <= cmd_msg_size(to) */
static void mlx5_cmd_copy_to_in(struct vfio_mlx5_dev *dev, void *from,
				size_t size, int slot)
{
	struct mlx5_cmd_layout *cmd_lay = mlx5_cmd_slot_layout(dev, slot);
	const struct mlx5_cmd_blocks *to = &dev->cmd.cmds[slot].in;
	struct mlx5_cmd_block *block;
	size_t copy;

	copy = min_t(size_t, size, sizeof(cmd_lay->in));
	memcpy(cmd_lay->in, from, copy);
	size -= copy;
	from = (char *)from + copy;

	cmd_blk_foreach(block, to, dev)
	{
		if (!size)
			break;
		copy = min_t(size_t, size, MLX5_CMD_DATA_BLOCK_SIZE);
		memcpy(block->data, from, copy);
		from = (char *)from + copy;
		size -= copy;
	}
}

static int mlx5_vfio_cmd_prep_in(struct vfio_mlx5_dev *dev,
				 struct mlx5_vfio_cmd_slot *cmd_slot, void *in,
				 size_t ilen)
{
	struct mlx5_cmd_blocks *in_blks = &cmd_slot->in;
	struct mlx5_cmd_layout *cmd_lay;

	cmd_lay = mlx5_cmd_slot_layout(dev, cmd_slot->slot);

	if (ilen > cmd_msg_size(in_blks)) {
		int err = mlx5_vfio_cmd_grow(dev, in_blks, ilen);
		if (err) // we log and print in the grow function
			return err;
	}

	mlx5_cmd_copy_to_in(dev, in, ilen, cmd_slot->slot);

	cmd_lay->ilen = htobe32(ilen);
	return 0;
}

static int mlx5_vfio_cmd_prep_out(struct vfio_mlx5_dev *dev,
				  struct mlx5_vfio_cmd_slot *cmd_slot,
				  size_t olen)
{
	struct mlx5_cmd_blocks *cmd_out = &cmd_slot->out;
	struct mlx5_cmd_layout *cmd_lay;
	struct mlx5_cmd_block *block;

	cmd_lay = mlx5_cmd_slot_layout(dev, cmd_slot->slot);
	cmd_lay->olen = htobe32(olen);

	memset(cmd_lay->out, 0, sizeof(cmd_lay->out));

	/* zeroing output message */
	uint32_t nblocks = mlx5_msg_num_blocks(olen);

	cmd_blk_foreach(block, cmd_out, dev)
	{
		if (!nblocks--)
			break;
		memset(block->data, 0, MLX5_CMD_DATA_BLOCK_SIZE);
	}

	if (olen > cmd_msg_size(cmd_out))
		/* new blocks will be zeroed on grow */
		return mlx5_vfio_cmd_grow(dev, cmd_out, olen);

	return 0;
}

static void mlx5_vfio_cmd_slot_free(struct vfio_mlx5_dev *dev,
				    unsigned int slot)
{
	struct mlx5_vfio_cmd_slot *cmd_slot = &dev->cmd.cmds[slot];

	mlx5_vfio_cmd_blocks_free(dev, &cmd_slot->in);
	mlx5_vfio_cmd_blocks_free(dev, &cmd_slot->out);
}

/* on boot FW requests 6553 pages which requires 52440 bytes for manage_pages_in
 * (6553 * 8 + 16), we allocate 65536 bytes by default for the commands in/out
 * to avoid unnecessary "cmd slot" enlargement on boot.
 */
#define DEFAULT_CMD_SIZE 65536 /* 64K */

static int mlx5_vfio_cmd_slot_setup(struct vfio_mlx5_dev *dev,
				    unsigned int slot)
{
	struct mlx5_vfio_cmd *cmd = &dev->cmd;
	struct mlx5_vfio_cmd_slot *cmd_slot = &cmd->cmds[slot];
	int num_blocks = mlx5_msg_num_blocks(DEFAULT_CMD_SIZE);
	struct mlx5_cmd_layout *cmd_lay;
	int ret;

	ret = mlx5_vfio_cmd_blocks_alloc(dev, &cmd_slot->in, num_blocks);
	if (ret)
		return ret;

	ret = mlx5_vfio_cmd_blocks_alloc(dev, &cmd_slot->out, num_blocks);
	if (ret)
		goto err;

	cmd_slot->slot = slot;
	cmd_lay = mlx5_cmd_slot_layout(dev, slot);
	cmd_lay->type = MLX5_PCI_CMD_XPORT;
	cmd_lay->iptr = htobe64(cmd_slot->in.frst_blk_iova);
	cmd_lay->optr = htobe64(cmd_slot->out.frst_blk_iova);

	dev_info(dev, "cmd slot[%u] sz_in(%zu) sz_out(%zu)", slot,
		 cmd_msg_size(&cmd_slot->in), cmd_msg_size(&cmd_slot->out));
	return 0;

err:
	mlx5_vfio_cmd_blocks_free(dev, &cmd_slot->in);
	return ret;
}

int mlx5_vfio_cmd_interface_init(struct vfio_mlx5_dev *dev)
{
	struct mlx5_vfio_cmd *cmd = &dev->cmd;
	struct mlx5_bar *bar0 = dev->heap->bar_map;
	uint32_t cmd_h, cmd_l;
	uint16_t cmdif_rev;
	void *cmdif_vaddr;

	int ret;

	cmdif_rev = be32toh(mmio_read32_be(&bar0->cmdif_rev_fw_sub)) >> 16;

	if (cmdif_rev != 5) {
		dev_err(dev,
			"Failed due to unsupported command interface revision %u",
			cmdif_rev);
		return -EINVAL;
	}

	cmd_l = be32toh(mmio_read32_be(&bar0->cmdq_addr_l_sz)) & 0xff;
	cmd->log_sz = cmd_l >> 4 & 0xf;
	cmd->log_stride = cmd_l & 0xf;
	if (1 << cmd->log_sz < CMD_SLOT_MAX) {
		dev_err(dev, "Failed due to command queue size too large");
		return -EINVAL;
	}

	if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) {
		dev_err(dev,
			"Failed due to command parameters exceed adapter page size");
		return -EINVAL;
	}

	ret = mlx5_vfio_page_alloc(dev->page_alloc, &cmd->iova);
	if (ret) {
		dev_err(dev,
			"Failed to allocate page for command interface, err(%d)",
			ret);
		return ret;
	}

	cmdif_vaddr = iova2vaddr(dev, cmd->iova);
	memset(cmdif_vaddr, 0, MLX5_ADAPTER_PAGE_SIZE);

	cmd_h = (uint32_t)((uint64_t)(cmd->iova) >> 32);
	cmd_l = (uint32_t)(uint64_t)(cmd->iova);

	mmio_write32_be(&bar0->cmdq_addr_h, htobe32(cmd_h));
	mmio_write32_be(&bar0->cmdq_addr_l_sz, htobe32(cmd_l));

	/* Make sure firmware sees the complete address before we proceed */
	udma_to_device_barrier();

	ret = mlx5_vfio_cmd_slot_setup(dev, CMD_SLOT_GENERIC);
	if (ret) {
		dev_err(dev, "Failed to setup command slot %d, err(%d)",
			CMD_SLOT_GENERIC, ret);
		goto err_slot_0;
	}

	ret = mlx5_vfio_cmd_slot_setup(dev, CMD_SLOT_PAGE_REQ);
	if (ret) {
		dev_err(dev, "Failed to setup page request slot %d, err(%d)",
			CMD_SLOT_PAGE_REQ, ret);
		goto err_slot_1;
	}

	return 0;

err_slot_1:
	mlx5_vfio_cmd_slot_free(dev, CMD_SLOT_GENERIC);
err_slot_0:
	mlx5_vfio_page_free(dev->page_alloc, cmd->iova);
	return ret;
}

void mlx5_vfio_cmd_interface_uninit(struct vfio_mlx5_dev *dev)
{
	const struct mlx5_vfio_cmd *cmd = &dev->cmd;

	mlx5_vfio_cmd_slot_free(dev, CMD_SLOT_PAGE_REQ);
	mlx5_vfio_cmd_slot_free(dev, CMD_SLOT_GENERIC);
	mlx5_vfio_page_free(dev->page_alloc, cmd->iova);
}

/**
 * Main entry point to the command interface
 *
 * Posts a command to the device and returns immediately.
 *
 * This function is intended for internal use within this file only.
 * For external access, use the wrapper functions provided below.
 */
static int mlx5_vfio_cmd_slot_post(struct vfio_mlx5_dev *dev, void *in,
				   size_t ilen, size_t olen, unsigned int slot)
{
	struct mlx5_vfio_cmd_slot *cmd_slot = &dev->cmd.cmds[slot];
	struct mlx5_cmd_layout *cmd_lay;
	int err;

	if (dev->pci_err) {
		cmd_event_err(
			dev, POST_ERROR,
			"PCI in error, can't post command, health sensor: %d\n",
			dev->health_rec.sensor);
		return -EIO;
	}

	cmd_lay = mlx5_cmd_slot_layout(dev, slot);

	if (cmd_slot->in_use) {
		/* shouldn't happen */
		err = -EBUSY;
		cmd_event_err(dev, BUSY_ERROR,
			      "Command slot %d is already in use, err(%d)",
			      slot, err);
		return err;
	}

	err = mlx5_vfio_cmd_prep_in(dev, cmd_slot, in, ilen);
	if (err)
		return err;

	err = mlx5_vfio_cmd_prep_out(dev, cmd_slot, olen);
	if (err)
		return err;

	char cmd_buf[CMD_STR_SZ];

	cmd_event_dbg(dev, POST,
		      "Posting command to slot %d: %s inlen(%zu) outlen(%zu)",
		      slot, cmd_str(in, cmd_buf), ilen, olen);

	cmd_slot->in_use = true;
	cmd_lay->status_own = 0x1;

	udma_to_device_barrier();
	mmio_write32_be(&dev->heap->bar_map->cmd_dbell, htobe32(0x1 << slot));
	return 0;
}

enum {
	MLX5_CMD_DELIVERY_STAT_OK = 0x0,
	MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1,
	MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2,
	MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3,
	MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4,
	MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5,
	MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6,
	MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7,
	MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8,
	MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9,
	MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10,
};

static const char *deliv_status_to_str(uint8_t status)
{
	switch (status) {
	case MLX5_CMD_DELIVERY_STAT_OK:
		return "no errors";
	case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
		return "signature error";
	case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
		return "token error";
	case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
		return "bad block number";
	case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
		return "output pointer not aligned to block size";
	case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
		return "input pointer not aligned to block size";
	case MLX5_CMD_DELIVERY_STAT_FW_ERR:
		return "firmware internal error";
	case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
		return "command input length error";
	case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
		return "command output length error";
	case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
		return "reserved fields not cleared";
	case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
		return "bad command descriptor type";
	default:
		return "unknown status code";
	}
}

static int deliv_status_to_err(uint8_t status)
{
	switch (status) {
	case MLX5_CMD_DELIVERY_STAT_OK:
		return 0;
	case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
	case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
		return -EBADR;
	case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
	case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
	case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
		return -EFAULT; /* Bad address */
	case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
	case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
	case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
	case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
		return -ENOMSG;
	case MLX5_CMD_DELIVERY_STAT_FW_ERR:
		return -EIO;
	default:
		return -EINVAL;
	}
}

/* release the slot for next use, check and return delivery status */
static int mlx5_vfio_cmd_release(struct vfio_mlx5_dev *dev, unsigned int slot)
{
	struct mlx5_vfio_cmd_slot *cmd_slot = &dev->cmd.cmds[slot];
	struct mlx5_cmd_layout *cmd_lay;
	uint8_t delivery_status;
	int ret = 0;

	cmd_lay = mlx5_cmd_slot_layout(dev, slot);
	WARN_ON(!cmd_slot->in_use); // should never happen

	cmd_slot->in_use = false;

	udma_from_device_barrier();
	delivery_status = mmio_read8(&cmd_lay->status_own) >> 1;

	if (delivery_status == MLX5_CMD_DELIVERY_STAT_OK) {
		cmd_event_dbg(
			dev, COMP,
			"Command slot %d completed successfully, delivery status %s(0x%x)",
			slot, deliv_status_to_str(delivery_status),
			delivery_status);
		return 0;
	}

	char cmd_buf[CMD_STR_SZ];

	ret = deliv_status_to_err(delivery_status);
	cmd_event_err(
		dev, COMP_ERROR,
		"Command slot %d failed, delivery status %s(0x%x), err(%d), for cmd %s",
		slot, deliv_status_to_str(delivery_status), delivery_status,
		ret, cmd_str(cmd_lay->in, cmd_buf));
	return ret;
}

#define MLX5_CMD_TIMEOUT_MSEC (60 * 1000)

/* Busy Poll for command completion, only used before EQ is created */
static int mlx5_cmd_poll_timeout(struct vfio_mlx5_dev *dev, uint32_t slot)
{
	struct mlx5_cmd_layout *cmd_lay = mlx5_cmd_slot_layout(dev, slot);
	static struct timeval start, curr;
	uint64_t ms_start, ms_curr;

	gettimeofday(&start, NULL);
	ms_start = (uint64_t)start.tv_sec * 1000 + start.tv_usec / 1000;
	do {
		if (!(mmio_read8(&cmd_lay->status_own) & 0x1))
			return 0;
		sched_yield();
		gettimeofday(&curr, NULL);
		ms_curr = (uint64_t)curr.tv_sec * 1000 + curr.tv_usec / 1000;
	} while (ms_curr - ms_start < MLX5_CMD_TIMEOUT_MSEC);

	int ret = -ETIMEDOUT;
	cmd_event_err(
		dev, TIMEOUT_ERROR,
		"Command slot %d timed out after %d ms, status_own (0x%x), err(%d)\n",
		slot, MLX5_CMD_TIMEOUT_MSEC, mmio_read8(&cmd_lay->status_own),
		ret);

	return ret;
}

void mlx5_vfio_cmd_eqe_comp(struct vfio_mlx5_dev *dev, unsigned long vector)
{
	if (vector & (1 << CMD_SLOT_PAGE_REQ)) {
		int ret = mlx5_vfio_cmd_release(dev, CMD_SLOT_PAGE_REQ);

		ret = mlx5_vfio_page_request_cmd_comp(dev, ret);
		if (ret)
			dev_err(dev,
				"Failed to process page cmd completion, err(%d)",
				ret);
		vector &= ~(1 << CMD_SLOT_PAGE_REQ);
	}

	if (vector & (1 << CMD_SLOT_GENERIC)) {
		/* nothing to do here, user will call mlx5_vfio_cmd_poll()*/
		vector &= ~(1 << CMD_SLOT_GENERIC);
	}

	if (vector)
		dev_err(dev, "Unknown command event, vector(%lx)", vector);
}

/* Command IF API wrappers */

size_t mlx5_vfio_cmd_in_size(struct vfio_mlx5_dev *dev, uint16_t slot)
{
	const struct mlx5_cmd_layout *cmd_lay = mlx5_cmd_slot_layout(dev, slot);

	return be32toh(cmd_lay->ilen);
}

size_t mlx5_vfio_cmd_out_size(struct vfio_mlx5_dev *dev, uint16_t slot)
{
	const struct mlx5_cmd_layout *cmd_lay = mlx5_cmd_slot_layout(dev, slot);

	return be32toh(cmd_lay->olen);
}

void mlx5_vfio_cmd_copy_out(struct vfio_mlx5_dev *dev, void *to, size_t size,
			    uint16_t slot)
{
	const struct mlx5_cmd_layout *cmd_lay = mlx5_cmd_slot_layout(dev, slot);
	const struct mlx5_cmd_blocks *from = &dev->cmd.cmds[slot].out;
	struct mlx5_cmd_block *block;
	size_t copy;

	/* copy the inline data first */
	copy = min_t(size_t, size, sizeof(cmd_lay->out));
	memcpy(to, cmd_lay->out, copy);
	size -= copy;
	to = (char *)to + copy;

	/* copy the blocks */
	cmd_blk_foreach(block, from, dev)
	{
		if (!size)
			break;
		copy = min_t(size_t, size, MLX5_CMD_DATA_BLOCK_SIZE);
		memcpy(to, block->data, copy);
		to = (char *)to + copy;
		size -= copy;
	}
}

void mlx5_vfio_cmd_copy_in(struct vfio_mlx5_dev *dev, void *to, size_t size,
			   uint16_t slot)
{
	const struct mlx5_cmd_layout *cmd_lay = mlx5_cmd_slot_layout(dev, slot);
	const struct mlx5_cmd_blocks *from = &dev->cmd.cmds[slot].in;
	struct mlx5_cmd_block *block;
	size_t copy;

	/* copy the inline data first */
	copy = min_t(size_t, size, sizeof(cmd_lay->in));
	memcpy(to, cmd_lay->in, copy);
	size -= copy;
	to = (char *)to + copy;

	/* copy the blocks */
	cmd_blk_foreach(block, from, dev)
	{
		if (!size)
			break;
		copy = min_t(size_t, size, MLX5_CMD_DATA_BLOCK_SIZE);
		memcpy(to, block->data, copy);
		to = (char *)to + copy;
		size -= copy;
	}
}

/**
 * @brief Execute a command and busy wait for completion.
 *  - Must be called before the EQ is created, used only in
 *     vfio_mlx5_device_add/del()
 *
 * @param dev: Pointer to the vfio mlx5 device
 * @param in: Pointer to the input buffer
 * @param ilen: Length of the input buffer
 * @param out: Pointer to the output buffer
 * @param olen: Length of the output buffer
 *
 * @return 0 on success, error code on failure
 */
int mlx5_vfio_cmd_exec(struct vfio_mlx5_dev *dev, void *in, int ilen, void *out,
		       int olen)
{
	char buf[CMD_STR_SZ];
	int err;

	// should never happen, but just in case
	if (WARN_ON(dev->have_eq /* Can't use cmd_exec with eq */))
		return -EINVAL;

	cmd_event_dbg(dev, EXEC, "EXEC CMD GENERIC: %s inlen %d outlen %d",
		      cmd_str(in, buf), ilen, olen);

	err = mlx5_vfio_cmd_slot_post(dev, in, ilen, olen, CMD_SLOT_GENERIC);
	if (err)
		return err;

	err = mlx5_cmd_poll_timeout(dev, CMD_SLOT_GENERIC);
	if (err)
		return err;

	err = mlx5_vfio_cmd_release(dev, CMD_SLOT_GENERIC);
	if (err)
		return err;

	mlx5_vfio_cmd_copy_out(dev, out, olen, CMD_SLOT_GENERIC);
	return mlx5_vfio_cmd_check(dev, in, out);
}

int mlx5_vfio_cmd_page_req_post(struct vfio_mlx5_dev *dev, void *in,
				size_t ilen, size_t olen)
{
	return mlx5_vfio_cmd_slot_post(dev, in, ilen, olen, CMD_SLOT_PAGE_REQ);
}

/* CMDIF API For testing only */

/**
 * mlx5_vfio_cmd_post - Post a command on the generic slot and return immediately
 *
 * - Must be called after creating an Event Queue (EQ) after vfio_mlx5_device_add()
 * - Please refer to mlx5_vfio_cmd_wait() for details on how to wait for the command
 *
 * @dev: Pointer to the vfio mlx5 device
 * @in: Pointer to the input buffer
 * @ilen: Length of the input buffer
 * @out: Pointer to the output buffer
 * @olen: Length of the output buffer
 *
 * @return 0 on success
 */
int mlx5_vfio_cmd_post(struct vfio_mlx5_dev *dev, void *in, int ilen, int olen)
{
	return mlx5_vfio_cmd_slot_post(dev, in, ilen, olen, CMD_SLOT_GENERIC);
}

/**
 * mlx5_vfio_cmd_poll - Poll for command completion
 *
 * @dev: Pointer to the vfio mlx5 device
 * @out: Pointer to the output buffer
 * @olen: Length of the output buffer
 *
 * @return 0 on success, error code on failure
 */
int mlx5_vfio_cmd_poll(struct vfio_mlx5_dev *dev, void *out, int olen)
{
	const struct mlx5_cmd_layout *cmd_lay;
	int ret;

	cmd_lay = mlx5_cmd_slot_layout(dev, CMD_SLOT_GENERIC);

	if (mmio_read8(&cmd_lay->status_own) & 0x1)
		return -EAGAIN;

	ret = mlx5_vfio_cmd_release(dev, CMD_SLOT_GENERIC);
	if (ret)
		return ret;

	mlx5_vfio_cmd_copy_out(dev, out, olen, CMD_SLOT_GENERIC);

	return mlx5_vfio_cmd_check(dev, cmd_lay->in, cmd_lay->out);
}

void mlx5_cmd_page_req_try_grow(struct vfio_mlx5_dev *dev, size_t *ilen,
				size_t *olen)
{
	struct mlx5_cmd_blocks *out_blks;
	struct mlx5_cmd_blocks *in_blks;

	in_blks = &dev->cmd.cmds[CMD_SLOT_PAGE_REQ].in;
	if (*ilen > cmd_msg_size(in_blks)) {
		/* only when we fail to grow, we return the current size */
		if (mlx5_vfio_cmd_grow(dev, in_blks, *ilen))
			*ilen = cmd_msg_size(in_blks);
	}

	out_blks = &dev->cmd.cmds[CMD_SLOT_PAGE_REQ].out;
	if (*olen > cmd_msg_size(out_blks)) {
		/* only when we fail to grow, we return the current size */
		if (mlx5_vfio_cmd_grow(dev, out_blks, *olen))
			*olen = cmd_msg_size(out_blks);
	}
}
