// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: BSD-3-Clause

#include "eq.h"

#include <errno.h>
#include <linux/types.h>
#include <stdint.h>
#include <unistd.h>

#include "dev.h"
#include "fwpages.h"
#include "util/mmio.h"
#include "util/util.h"
#include "vfio_mlx5.h"

#define MLX5_NUM_CMD_EQE (32)
#define MLX5_NUM_SPARE_EQE (0x80)

enum mlx5_event {
	MLX5_EVENT_TYPE_CMD = 0x0a,
	MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb,
};

struct mlx5_eq_param {
	uint8_t irq_index;
	int nent;
	uint64_t mask[4];
};

struct mlx5_eqe_cmd {
	__be32 vector;
	__be32 rsvd[6];
};

struct mlx5_eqe_page_req {
	__be16 ec_function;
	__be16 func_id;
	__be32 num_pages;
	__be32 rsvd1[5];
};

union ev_data {
	__be32 raw[7];
	struct mlx5_eqe_cmd cmd;
	struct mlx5_eqe_page_req req_pages;
};

struct mlx5_eqe {
	uint8_t rsvd0;
	uint8_t type;
	uint8_t rsvd1;
	uint8_t sub_type;
	__be32 rsvd2[7];
	union ev_data data;
	__be16 rsvd3;
	uint8_t signature;
	uint8_t owner;
};

static struct mlx5_eqe *get_eqe(const struct mlx5_eq *eq, uint32_t entry)
{
	return (struct mlx5_eqe *)eq->vaddr + entry;
}

static struct mlx5_eqe *mlx5_eq_get_eqe(const struct mlx5_eq *eq, uint32_t cc)
{
	uint32_t ci = eq->cons_index + cc;
	struct mlx5_eqe *eqe;

	eqe = get_eqe(eq, ci & (eq->nent - 1));
	eqe = ((eqe->owner & 1) ^ (!!(ci & eq->nent))) ? NULL : eqe;

	if (eqe)
		udma_from_device_barrier();

	return eqe;
}

static void eq_update_ci(struct mlx5_eq *eq, uint32_t cc, int arm)
{
	__be32 *addr = eq->doorbell + (arm ? 0 : 2);
	uint32_t val;

	eq->cons_index += cc;
	val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);

	mmio_write32_be(addr, htobe32(val));
	udma_to_device_barrier();
}

#define MLX5_EQE_OWNER_INIT_VAL 0x1

static void init_eq_buf(const struct mlx5_eq *eq)
{
	for (unsigned int i = 0; i < eq->nent; i++) {
		struct mlx5_eqe *eqe = get_eqe(eq, i);
		eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
	}
}

static void mlx5_vfio_destroy_eq(struct vfio_mlx5_dev *dev,
				 const struct mlx5_eq *eq)
{
	uint32_t out[DEVX_ST_SZ_DW(destroy_eq_out)] = {};
	uint32_t in[DEVX_ST_SZ_DW(destroy_eq_in)] = {};

	DEVX_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
	DEVX_SET(destroy_eq_in, in, eq_number, eq->eqn);

	mlx5_vfio_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}

static uintptr_t uar2iova(const struct vfio_mlx5_dev *dev, uint32_t index)
{
	if (MLX5_CAP_GEN(dev, uar_4k)) /* only enabled for page size > 4K */
		return (uintptr_t)dev->heap->bar_map +
		       (index * MLX5_ADAPTER_PAGE_SIZE);

	uint64_t system_page_size = sysconf(_SC_PAGESIZE);

	return (uintptr_t)dev->heap->bar_map + (index * system_page_size);
}

void *mlx5_dev_eq_doorbell_addr(const struct vfio_mlx5_dev *dev)
{
	return (void *)((char *)uar2iova(dev, dev->async_eq.uarn) +
			MLX5_EQ_DOORBEL_OFFSET);
}

static int create_map_eq(struct vfio_mlx5_dev *dev, struct mlx5_eq *eq,
			 const struct mlx5_eq_param *param)
{
	uint8_t in[DEVX_ST_SZ_BYTES(create_eq_in) +
		   DEVX_FLD_SZ_BYTES(create_eq_in, pas[0])] = {};
	uint32_t out[DEVX_ST_SZ_DW(create_eq_out)] = {};
	uint8_t vecidx = param->irq_index;
	size_t npages;
	int alloc_size;
	__be64 *pas;
	void *eqc;
	int ret;
	int i;

	eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
	eq->cons_index = 0;
	alloc_size = eq->nent * sizeof(struct mlx5_eqe);
	eq->iova_size = roundup_pow_of_two(alloc_size);

	npages = DIV_ROUND_UP(eq->iova_size, MLX5_ADAPTER_PAGE_SIZE);

	pas = (__be64 *)DEVX_ADDR_OF(create_eq_in, in, pas[0]);

	log_debug("EQ with %d nents need %zu pages, iova size %zu", eq->nent,
		  npages, eq->iova_size);
	/* must be contiguous since get_eqe() assumes contiguous */
	ret = mlx5_alloc_contig_pages(dev->page_alloc, &eq->iova, npages);
	if (ret) {
		log_error("Failed to allocate contiguous pages for EQ, ret(%d)",
			  ret);
		return ret;
	}
	eq->vaddr = iova2vaddr(dev, eq->iova);

	pas[0] = htobe64(eq->iova);
	init_eq_buf(eq);
	DEVX_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);

	for (i = 0; i < 4; i++)
		DEVX_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
				 param->mask[i]);

	eqc = DEVX_ADDR_OF(create_eq_in, in, eq_context_entry);
	DEVX_SET(eqc, eqc, log_eq_size, ilog32(eq->nent - 1));
	DEVX_SET(eqc, eqc, uar_page, dev->async_eq.uarn);
	DEVX_SET(eqc, eqc, intr, vecidx);
	DEVX_SET(eqc, eqc, log_page_size,
		 ilog32(eq->iova_size - 1) - MLX5_ADAPTER_PAGE_SHIFT);

	log_debug("EQ  log_eq_size %d, uar_page %d, intr %d, log_page_size %d",
		  DEVX_GET(eqc, eqc, log_eq_size), DEVX_GET(eqc, eqc, uar_page),
		  DEVX_GET(eqc, eqc, intr), DEVX_GET(eqc, eqc, log_page_size));
	ret = mlx5_vfio_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
	if (ret) {
		log_error("Failed to exec MLX5_CMD_OP_CREATE_EQ cmd, ret(%d)",
			  ret);
		goto err_cmd;
	}

	eq->vecidx = vecidx;
	eq->eqn = DEVX_GET(create_eq_out, out, eq_number);
	eq->doorbell = mlx5_dev_eq_doorbell_addr(dev);

	return 0;

err_cmd:
	mlx5_free_contig_pages(dev->page_alloc, eq->iova, npages);
	return ret;
}

static int setup_async_eq(struct vfio_mlx5_dev *dev,
			  const struct mlx5_eq_param *param, struct mlx5_eq *eq)
{
	int err;

	err = create_map_eq(dev, eq, param);
	if (err) {
		log_error("Failed to create and map EQ, err(%d)", err);
		return err;
	}

	eq_update_ci(eq, 0, 1);

	return 0;
}

static int mlx5_vfio_alloc_uar(struct vfio_mlx5_dev *dev, uint32_t *uarn)
{
	uint32_t out[DEVX_ST_SZ_DW(alloc_uar_out)] = {};
	uint32_t in[DEVX_ST_SZ_DW(alloc_uar_in)] = {};
	int err;

	DEVX_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
	err = mlx5_vfio_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
	if (!err)
		*uarn = DEVX_GET(alloc_uar_out, out, uar);
	return err;
}

static void mlx5_vfio_dealloc_uar(struct vfio_mlx5_dev *dev, uint32_t uarn)
{
	uint32_t out[DEVX_ST_SZ_DW(dealloc_uar_out)] = {};
	uint32_t in[DEVX_ST_SZ_DW(dealloc_uar_in)] = {};

	DEVX_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
	DEVX_SET(dealloc_uar_in, in, uar, uarn);
	mlx5_vfio_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}

int mlx5_vfio_create_async_eq(struct vfio_mlx5_dev *dev)
{
	struct mlx5_eq_param param;
	int err;

	err = mlx5_vfio_alloc_uar(dev, &dev->async_eq.uarn);
	if (err) {
		log_error("Failed to allocate UAR for EQs, err(%d)", err);
		return err;
	}

	param = (struct mlx5_eq_param){
		.irq_index = VFIO_MLX5_IRQ_VEC_IDX,
		.nent = MLX5_NUM_CMD_EQE,
		.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD |
			   1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
	};

	err = setup_async_eq(dev, &param, &dev->async_eq);
	if (err) {
		log_error("Failed to setup async EQ, err(%d)", err);
		goto err;
	}

	dev->have_eq = true;
	return 0;

err:
	mlx5_vfio_dealloc_uar(dev, dev->async_eq.uarn);
	return err;
}

void mlx5_vfio_destroy_async_eqs(struct vfio_mlx5_dev *dev)
{
	dev->have_eq = false;
	mlx5_vfio_destroy_eq(dev, &dev->async_eq);
	mlx5_vfio_dealloc_uar(dev, dev->async_eq.uarn);
}

/* The HCA will think the queue has overflowed if we don't tell it we've been
 * processing events.
 * We create EQs with MLX5_NUM_SPARE_EQE extra entries,
 * so we must update our consumer index at least that often.
 */
static inline uint32_t mlx5_eq_update_cc(struct mlx5_eq *eq, uint32_t cc)
{
	if (unlikely(cc >= MLX5_NUM_SPARE_EQE)) {
		eq_update_ci(eq, cc, 0);
		cc = 0;
	}
	return cc;
}

int mlx5_vfio_async_events_process(struct vfio_mlx5_dev *dev)
{
	struct mlx5_eqe_page_req *page_req_eqe;
	struct mlx5_eqe_cmd *cmd_eqe;
	int num_events_processed = 0;
	unsigned long vector;
	struct mlx5_eqe *eqe;
	int cc = 0;

	while ((eqe = mlx5_eq_get_eqe(&dev->async_eq, cc))) {
		switch (eqe->type) {
		case MLX5_EVENT_TYPE_CMD:
			cmd_eqe = &eqe->data.cmd;
			vector = be32toh(cmd_eqe->vector);

			mlx5_vfio_cmd_eqe_comp(dev, vector);
			num_events_processed++;
			break;
		case MLX5_EVENT_TYPE_PAGE_REQUEST:
			page_req_eqe = &eqe->data.req_pages;

			mlx5_vfio_handle_page_req_event(
				dev, be16toh(page_req_eqe->func_id),
				be32toh(page_req_eqe->num_pages));
			num_events_processed++;
			break;
		default:
			log_warn("Unknown event type %d", eqe->type);
			break;
		}

		cc = mlx5_eq_update_cc(&dev->async_eq, ++cc);
	}

	eq_update_ci(&dev->async_eq, cc, 1);
	return num_events_processed;
}
