/*
 * SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
 * Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 *
 * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 * property and proprietary rights in and to this material, related
 * documentation and any modifications thereto. Any use, reproduction,
 * disclosure or distribution of this material and related documentation
 * without an express license agreement from NVIDIA CORPORATION or
 * its affiliates is strictly prohibited.
 */

#include <config.h>

#include <sys/socket.h>
#include <sys/uio.h>
#include <signal.h>
#include <unistd.h>

#include <doca_flow.h>

#include "bridge.h"
#include "lib/conntrack.h"
#include "lib/conntrack-private.h"
#include "lib/dirs.h"
#include "lib/dpif.h"
#include "lib/dpif-doca.h"
#include "lib/dpif-provider.h"
#include "lib/fatal-signal.h"
#include "lib/netdev-linux.h"
#include "lib/netdev-offload.h"
#include "lib/netdev-provider.h"
#include "lib/ofp-version-opt.h"
#include "lib/ovsdb-idl.h"
#include "lib/ovs-doca.h"
#include "lib/rtnetlink.h"
#include "lib/socket-util.h"
#include "lib/daemon.h"
#include "lib/unixctl.h"
#include "lib/jsonrpc.h"
#include "lib/ovs-router.h"
#include "lib/stream.h"
#include "lib/stream-fd.h"
#include "lib/stream-provider.h"
#include "lib/tnl-neigh-cache.h"
#include "lib/vswitch-idl.h"
#include "lib/mac-learning.h"
#include "ndu.h"
#include "ofproto/ofproto.h"
#include "ofproto/ofproto-dpif.h"
#include "ofproto/ofproto-dpif-upcall.h"
#include "ofproto/ofproto-dpif-xlate.h"
#include "openvswitch/hmap.h"
#include "openvswitch/ofpbuf.h"
#include "openvswitch/ofp-match.h"
#include "openvswitch/ofp-msgs.h"
#include "openvswitch/poll-loop.h"
#include "openvswitch/vconn.h"
#include "openvswitch/vlog.h"

VLOG_DEFINE_THIS_MODULE(ndu);

#define NDU_CT_GRACE_PERIOD_MS 30000
#define NDU_SOCK_NAME "ndu-sock"
#define NDU_MAX_TAP_PORTS 1000

#define NDU_DPIF_NAME "ovs-doca"
#define NDU_DPIF_TYPE "doca"

enum ndu_state {
    NDU_STATE_UNINIT,
    NDU_STATE_IDLE,
    NDU_STATE_REVALIDATOR_PAUSE,
    NDU_STATE_OVSDB_UNLOCK,
    NDU_STATE_BR_RM_MGMT_SNOOP,
    NDU_STATE_PID_FILE,
    NDU_STATE_PID_FILE_SYNC,
    NDU_STATE_SYNC_OF_PORTS,
    NDU_STATE_SYNC_PORTS,
    NDU_STATE_SYNC_TNL_NEIGH,
    NDU_STATE_OPENFLOW_SYNC_WAIT,
    NDU_STATE_OPENFLOW_SYNC_DONE,
    NDU_STATE_SYNC_CT,
    NDU_STATE_DATAPATH_RELEASE,
    NDU_STATE_DONE,
    NDU_STATE_CLIENT,
};

static enum ndu_state ndu_fsm[] = {
    [NDU_STATE_IDLE]              = NDU_STATE_REVALIDATOR_PAUSE,
    [NDU_STATE_REVALIDATOR_PAUSE] = NDU_STATE_OVSDB_UNLOCK,
    [NDU_STATE_OVSDB_UNLOCK]      = NDU_STATE_BR_RM_MGMT_SNOOP,
    [NDU_STATE_BR_RM_MGMT_SNOOP]  = NDU_STATE_PID_FILE,
    [NDU_STATE_PID_FILE]          = NDU_STATE_PID_FILE_SYNC,
    [NDU_STATE_PID_FILE_SYNC]     = NDU_STATE_SYNC_OF_PORTS,
    [NDU_STATE_SYNC_OF_PORTS]     = NDU_STATE_SYNC_PORTS,
    [NDU_STATE_SYNC_PORTS]        = NDU_STATE_SYNC_TNL_NEIGH,
    [NDU_STATE_SYNC_TNL_NEIGH]    = NDU_STATE_OPENFLOW_SYNC_WAIT,
    [NDU_STATE_OPENFLOW_SYNC_WAIT] = NDU_STATE_OPENFLOW_SYNC_DONE,
    [NDU_STATE_OPENFLOW_SYNC_DONE] = NDU_STATE_SYNC_CT,
    [NDU_STATE_SYNC_CT]           = NDU_STATE_DATAPATH_RELEASE,
    [NDU_STATE_DATAPATH_RELEASE]  = NDU_STATE_DONE,
    [NDU_STATE_CLIENT]            = NDU_STATE_CLIENT,
};

static const char *
ndu_state_name(enum ndu_state state)
{
    switch (state) {
    case NDU_STATE_UNINIT:
        OVS_NOT_REACHED();
        return "UNINIT";
    case NDU_STATE_IDLE:
        return "idle";
    case NDU_STATE_REVALIDATOR_PAUSE:
        return "rv-pause";
    case NDU_STATE_OVSDB_UNLOCK:
        return "db-unlock";
    case NDU_STATE_BR_RM_MGMT_SNOOP:
        return "br-rm-mgmt-snoop";
    case NDU_STATE_PID_FILE:
        return "pid-file";
    case NDU_STATE_PID_FILE_SYNC:
        return "pid-file-sync";
    case NDU_STATE_SYNC_OF_PORTS:
        return "sync-of-ports";
    case NDU_STATE_SYNC_PORTS:
        return "sync-ports";
    case NDU_STATE_SYNC_TNL_NEIGH:
        return "sync-tnl-neigh";
    case NDU_STATE_OPENFLOW_SYNC_WAIT:
        return "sync-openflow-wait";
    case NDU_STATE_OPENFLOW_SYNC_DONE:
        return "sync-openflow-done";
    case NDU_STATE_SYNC_CT:
        return "sync-ct";
    case NDU_STATE_DATAPATH_RELEASE:
        return "release";
    case NDU_STATE_DONE:
        return "done";
    case NDU_STATE_CLIENT:
        return "client";
    }
    OVS_NOT_REACHED();
    return "ERR";
}

enum ndu_client_state {
    NDU_CLIENT_STATE_SYNC_DB,
    NDU_CLIENT_STATE_PROBE_PORTS,
    NDU_CLIENT_STATE_SYNC_TNL_NEIGH,
    NDU_CLIENT_STATE_SYNC_OPENFLOW,
    NDU_CLIENT_STATE_SYNC_CT,
    NDU_CLIENT_STATE_DONE,
};

static const char *
ndu_client_state_name(enum ndu_client_state client_state)
{
    switch (client_state) {
    case NDU_CLIENT_STATE_SYNC_DB:
        return "client-sync-db";
    case NDU_CLIENT_STATE_PROBE_PORTS:
        return "client-probe-ports";
    case NDU_CLIENT_STATE_SYNC_TNL_NEIGH:
        return "client-sync-tnl-neigh";
    case NDU_CLIENT_STATE_SYNC_OPENFLOW:
        return "client-sync-openflow";
    case NDU_CLIENT_STATE_SYNC_CT:
        return "client-sync-ct";
    case NDU_CLIENT_STATE_DONE:
        return "client-done";
    }
    OVS_NOT_REACHED();
    return "ERR";
}

struct ndu_conn_server {
    struct pstream *listener;
    char *unix_path;
};

struct ndu_ctx {
    enum ndu_state state;
    bool rollback;
    unsigned int idl_seqno;
    enum ndu_client_state client_state;
    char *remote;
    struct ovsdb_idl *idl;
    char *pidfile;
    struct hmap portmap;
    struct hmap of_portmap;
    struct ndu_conn_server server;
    struct stream *stream;
    int fd;
    pid_t server_pid;
};

struct ndu_port_item {
    char name[IF_NAMESIZE];
    char type[IF_NAMESIZE];
    odp_port_t server_port_no;
    odp_port_t client_port_no;
    ofp_port_t ofp_port;
    int tap_fd;
    struct hmap_node node;
};

struct ndu_fdb_entry {
    struct eth_addr src;
    ofp_port_t port;
    uint16_t vlan;
    bool is_static;
};

typedef int (*appctl_cmd_response_handler)(const struct json *json, struct ofproto_dpif *dpif,
                                           int *err_cnt);

static struct ndu_ctx ndu_ctx = { .state = NDU_STATE_UNINIT, };

static void
ndu_conn_server_create(struct ndu_conn_server *server)
{
    long int pid = getpid();
    int error;
    char *unix_path =
        xasprintf("punix:%s/%s.%ld", ovs_rundir(), NDU_SOCK_NAME, pid);

    error = pstream_open(unix_path, &server->listener, 0);
    if (error) {
        VLOG_FATAL("%d|fail to create ndu_conn server: %s", getpid(), ovs_strerror(error));
    }
    server->unix_path = unix_path;
}

static void
ndu_conn_server_destroy(struct ndu_conn_server *server)
{
    pstream_close(server->listener);
    ignore(remove(server->unix_path));
    free(server->unix_path);
}

void
ndu_init(const char *remote,
         struct ovsdb_idl *idl,
         const char *pidfile)
{
    if (ndu_ctx.state == NDU_STATE_UNINIT && pidfile) {
        /* Running in foreground is not supported so pidfile must exists. */
        ndu_ctx.state = NDU_STATE_IDLE;
        hmap_init(&ndu_ctx.of_portmap);
    }
    if (!pidfile) {
        VLOG_WARN("%d|ndu is not supported.", getpid());
    }
    ndu_ctx.idl_seqno = ovsdb_idl_get_seqno(idl);
    ndu_ctx.remote = xstrdup(remote);
    ndu_ctx.idl = idl;
    ndu_ctx.pidfile = nullable_xstrdup(pidfile);
    hmap_init(&ndu_ctx.portmap);
    ndu_conn_server_create(&ndu_ctx.server);
}

void
ndu_destroy(void)
{
    ndu_conn_server_destroy(&ndu_ctx.server);
    free(ndu_ctx.remote);
    ndu_ctx.remote = NULL;
    free(ndu_ctx.pidfile);
    ndu_ctx.pidfile = NULL;
    hmap_destroy(&ndu_ctx.portmap);
    hmap_destroy(&ndu_ctx.of_portmap);
}

static struct ofproto_dpif *
ndu_get_dpif(void)
{
    const struct ovsrec_open_vswitch *cfg;
    struct ofproto_dpif *dpif = NULL;
    struct ovsrec_bridge **br;
    int i;

    /* All same dpif type bridges share the same backer. */
    cfg = ovsrec_open_vswitch_first(ndu_ctx.idl);
    br = cfg->bridges;
    for (i = 0; i < cfg->n_bridges; i++) {
        if (!strcmp(br[i]->datapath_type, NDU_DPIF_TYPE)) {
            dpif = ofproto_dpif_lookup_by_name(br[i]->name);
            break;
        }
    }

    return dpif;
}

static void
ndu_revalidators_pause_set(bool pause)
{
    struct ofproto_dpif *dpif;
    struct udpif *udpif;

    dpif = ndu_get_dpif();
    if (!dpif) {
        return;
    }
    if (!dpif->backer || !dpif->backer->udpif) {
        return;
    }
    udpif = dpif->backer->udpif;

    /* We cannot directly use udpif_stop_threads, since it
     * will purge all the flows. Force ovs to stop forwarding
     * packets.
     * we also cannot disable upcall, since some pkts need
     * to goto upcall, i.e. ARP querying gateway's MAC, it
     * does not have megaflows.
     */

    if (pause) {
        udpif_pause_revalidators(udpif);
    } else {
        udpif_resume_revalidators(udpif);
    }
}

static int
ndu_ovsdb_unlock_set(bool unlock)
{
    bool current_state;

    if (!ndu_ctx.idl) {
        return 0;
    }

    current_state = ovsdb_idl_has_lock(ndu_ctx.idl);

    if (unlock == current_state) {
        ovsdb_idl_txn_abort_all(ndu_ctx.idl);
        ovsdb_idl_set_lock(ndu_ctx.idl, unlock ? NULL : "ovs_vswitchd");
        current_state = ovsdb_idl_has_lock(ndu_ctx.idl);
    }

    if (unlock != current_state) {
        return 0;
    }

    return EAGAIN;
}

static int
ndu_pid_file_rename(bool upgrade)
{
    char *upgrading_file = xasprintf("%s."NDU_PIDFILE_SUFFIX, ndu_ctx.pidfile);
    char *current_file, *new_file;
    int err;

    if (upgrade) {
        current_file = ndu_ctx.pidfile;
        new_file = upgrading_file;
    } else {
        current_file = upgrading_file;
        new_file = ndu_ctx.pidfile;
    }

    err = rename(current_file, new_file);
    if (err) {
        goto out;
    }

    fatal_signal_add_file_to_unlink(new_file);
    fatal_signal_remove_file_to_unlink(current_file);

out:
    free(upgrading_file);
    return err;
}

static int
read_sync(int fd, void *p_, size_t size)
{
    size_t bytes;
    int rv;

    do {
        rv = read_fully(fd, p_, size, &bytes);
    } while (rv == EAGAIN);

    return rv;
}

static int
write_sync(int fd, const void *p_, size_t size)
{
    size_t bytes;
    int rv;

    do {
        rv = write_fully(fd, p_, size, &bytes);
    } while (rv == EAGAIN);

    return rv;
}

static int
ndu_server_sync_ports(int fd)
{
    uint8_t msgctrl[CMSG_SPACE(sizeof(int) * NDU_MAX_TAP_PORTS)];
    struct dpif_port_dump dump;
    struct dpif_port dpif_port;
    struct cmsghdr *cmsg;
    struct ofpbuf *buf;
    struct msghdr msg;
    struct dpif *dpif;
    struct iovec iov;
    size_t sz = 0;
    int retval;
    int error;
    int *ptr;
    int cnt;

    error = dpif_open(NDU_DPIF_NAME, NDU_DPIF_TYPE, &dpif);
    if (error == ENODEV) {
        return write_sync(fd, &sz, sizeof sz);
    } else if (error) {
        return error;
    }

    buf = ofpbuf_new(1024);
    memset(&msg, 0, sizeof msg);
    memset(&iov, 0, sizeof iov);

    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    msg.msg_control = msgctrl;
    msg.msg_controllen = sizeof msgctrl;

    cmsg = CMSG_FIRSTHDR(&msg);
    cmsg->cmsg_level = SOL_SOCKET;
    cmsg->cmsg_type = SCM_RIGHTS;
    ptr = ALIGNED_CAST(int *, CMSG_DATA(cmsg));
    cnt = 0;

    DPIF_PORT_FOR_EACH (&dpif_port, &dump, dpif) {
        struct ndu_port_item p;
        struct netdev *netdev;

        memset(&p, 0, sizeof p);
        ovs_strzcpy(p.name, dpif_port.name, sizeof p.name);
        ovs_strzcpy(p.type, dpif_port.type, sizeof p.type);
        p.server_port_no = dpif_port.port_no;

        nl_msg_put_unspec(buf, 0, &p, sizeof p);

        VLOG_INFO("%d|Server port: %s,%s,%d", getpid(), p.name, p.type, p.server_port_no);
        if (strcmp(dpif_port.type, "tap")) {
            continue;
        }

        if (cnt >= NDU_MAX_TAP_PORTS) {
            VLOG_ERR("%d|Cannot support more than %d tap ports.", getpid(), NDU_MAX_TAP_PORTS);
            error = ENOSPC;
            goto out;
        }

        error = netdev_open(dpif_port.name, dpif_port.type, &netdev);
        if (error) {
            goto out;
        }
        ptr[cnt] = netdev_linux_get_tap_fd(netdev);
        VLOG_INFO("%d|Server tap-fd: %d", getpid(), ptr[cnt]);
        cnt++;
        netdev_close(netdev);
    }

    iov.iov_len = buf->size;
    iov.iov_base = buf->base;

    if (cnt) {
        msg.msg_controllen = CMSG_SPACE(sizeof(int) * cnt);
        cmsg->cmsg_len = CMSG_LEN(sizeof(int) * cnt);
    } else {
        msg.msg_controllen = 0;
        msg.msg_control = NULL;
    }

    /* Send the expected length. */
    sz = buf->size;
    error = write_sync(fd, &sz, sizeof sz);
    if (error) {
        goto out;
    }

    do {
        retval = sendmsg(fd, &msg, 0);
        if (retval < 0) {
            error = errno;
        }
    } while (error == EINTR);

out:
    ofpbuf_delete(buf);
    dpif_close(dpif);
    return error;
}

static void
ndu_of_portmap_flush(void)
{
    struct ndu_port_item *p;

    HMAP_FOR_EACH_SAFE (p, node, &ndu_ctx.of_portmap) {
        hmap_remove(&ndu_ctx.of_portmap, &p->node);
        free(p);
    }
}

static void
ndu_sync_of_ports_cb(const char *name, ofp_port_t ofp_port, void *aux)
{
    struct ofpbuf *buf = aux;
    struct ndu_port_item p;

    memset(&p, 0, sizeof p);
    ovs_strzcpy(p.name, name, sizeof p.name);
    p.ofp_port = ofp_port;

    nl_msg_put_unspec(buf, 0, &p, sizeof p);
    VLOG_INFO("%d|Server of_port: %s,%d", getpid(), p.name, p.ofp_port);
}

static int
ndu_server_sync_of_ports(int fd)
{
    struct ofpbuf *buf;
    size_t sz;
    int err;

    buf = ofpbuf_new(1024);
    bridge_traverse_ports(ndu_sync_of_ports_cb, buf);

    /* Send the expected length. */
    sz = buf->size;
    err = write_sync(fd, &sz, sizeof sz);
    if (err) {
        goto out;
    }
    if (sz) {
        err = write_sync(fd, buf->data, sz);
        if (err) {
            goto out;
        }
    }

out:
    if (err) {
        ndu_of_portmap_flush();
    }
    ofpbuf_delete(buf);
    return err;
}

static int
ndu_server_sync_tnl_neigh(int fd)
{
    struct ofpbuf *buf;
    uint32_t n_neigh;
    size_t sz;
    int err;

    buf = ofpbuf_new(1024);

    tnl_neigh_ofpbuf_encode(buf, &n_neigh);
    VLOG_INFO("%d|Server %d tnl-neigh entries", getpid(), n_neigh);

    /* Send the expected length. */
    sz = buf->size;
    err = write_sync(fd, &sz, sizeof sz);
    if (err) {
        goto out;
    }
    if (sz) {
        err = write_sync(fd, buf->data, sz);
        if (err) {
            goto out;
        }
    }

out:
    ofpbuf_delete(buf);
    return err;
}

static void
ndu_pmd_pause_del_flag(void)
{
    struct ovsdb_idl_txn *txn = NULL;
    struct ovsdb_idl_loop loop;
    struct ovsdb_idl *idl;

    idl = ovsdb_idl_create(ndu_ctx.remote, &ovsrec_idl_class, false, true);
    loop = (struct ovsdb_idl_loop) OVSDB_IDL_LOOP_INITIALIZER(idl);
    ovsdb_idl_add_table(idl, &ovsrec_table_open_vswitch);
    ovsdb_idl_add_column(idl, &ovsrec_open_vswitch_col_other_config);

    loop.next_cfg = 1;
    while (!txn) {
        txn = ovsdb_idl_loop_run(&loop);
        if (txn) {
            const struct ovsrec_open_vswitch *cfg =
                ovsrec_open_vswitch_first(idl);

            if (smap_get(&cfg->other_config, "pmd-pause")) {
                ovsrec_open_vswitch_update_other_config_delkey(cfg, "pmd-pause");
                ovsdb_idl_txn_increment(txn, &cfg->header_,
                                        &ovsrec_open_vswitch_col_next_cfg, false);
            }
        }
        ovsdb_idl_loop_commit_and_wait(&loop);
    }
    ovsdb_idl_loop_destroy(&loop);
}

static void
ndu_handle_done(void)
{
    stream_close(ndu_ctx.stream);
    ndu_ctx.fd = -1;
}

struct sync_ct_aux {
    struct conntrack *ct;
    int fd;
    int rv;
    uint32_t n_conns;
};

static bool
ndu_sync_ct_cb(struct conn *conn,
               struct ct_dpif_entry *entry OVS_UNUSED,
               long long now OVS_UNUSED,
               void *aux_)
{
    struct sync_ct_aux *aux = aux_;
    struct flat_conn fconn;

    if (!conntrack_conn2flat(conn, &fconn)) {
        return false;
    }

    aux->rv = write_sync(aux->fd, "conn", 4);
    if (aux->rv) {
        return true;
    }

    aux->rv = write_sync(aux->fd, &fconn, sizeof fconn);
    if (aux->rv) {
        return true;
    }
    conn->ndu_sync_done = true;
    aux->n_conns++;
    return false;
}

static int
ndu_server_sync_ct_dirty_list(struct dp_doca *dp, struct sync_ct_aux *aux)
{
    uint32_t c_idx = 0, sync_conns = 0, sync_errors = 0;
    unsigned int max_dirty_conns;
    struct conn *c;
    int err = 0;

    max_dirty_conns = conntrack_offload_size() / 2;

    conntrack_lock(dp->conntrack);
    LIST_FOR_EACH (c, ndu_list_node, &dp->conntrack->ndu_conns) {
        if (c_idx++ > max_dirty_conns) {
            break;
        }
        if (c->ndu_sync_done) {
            continue;
        }
        conntrack_unlock(dp->conntrack);
        if (ndu_sync_ct_cb(c, NULL, 0, aux)) {
            err = aux->rv;
            sync_errors++;
        }
        conntrack_lock(dp->conntrack);
        if (c->ndu_sync_done) {
            sync_conns++;
        }
    }
    conntrack_unlock(dp->conntrack);

    dp_doca_pmd_pause(dp);
    conntrack_lock(dp->conntrack);
    LIST_FOR_EACH_SAFE (c, ndu_list_node, &dp->conntrack->ndu_conns) {
        if (c->ndu_sync_done) {
            conntrack_conn_unref(c);
            continue;
        }
        if (ndu_sync_ct_cb(c, NULL, 0, aux)) {
            err = aux->rv;
            sync_errors++;
        }
        if (c->ndu_sync_done) {
            sync_conns++;
        }
        conntrack_conn_unref(c);
    }
    conntrack_unlock(dp->conntrack);

    VLOG_INFO("%d|Sync %u new conns (%u errors)", getpid(), sync_conns, sync_errors);

    if (err) {
        dp_doca_pmd_resume(dp);
    }

    return err;
}

static int
ndu_server_sync_ct(int fd)
{
    struct conntrack_dump ct_dump;
    struct sync_ct_aux aux = {
        .fd = fd,
        .rv = 0,
        .n_conns = 0,
    };
    struct dp_doca *dp;
    struct dpif *dpif;
    int ptot_bkts;
    int error, rv;

    error = dpif_open(NDU_DPIF_NAME, NDU_DPIF_TYPE, &dpif);
    if (error == ENODEV) {
        return write_sync(fd, "done", 4);
    } else if (error) {
        return error;
    }

    dp = get_dp_doca(dpif);
    aux.ct = dp->conntrack;
    atomic_store_explicit(&dp->conntrack->ndu_sync_in_progress, true, memory_order_release);

    error = conntrack_dump_start_for_cb(dp->conntrack, &ct_dump, NULL, &ptot_bkts,
                                        ndu_sync_ct_cb, &aux);
    if (error) {
        goto err_start;
    }
    while (!(error = conntrack_dump_next(&ct_dump, NULL))) {
    }
    if (error == EOF) {
        error = aux.rv;
    }
    VLOG_INFO("%d|Sync %u conns", getpid(), aux.n_conns);
    if (error) {
        goto err_next;
    }
    error = ndu_server_sync_ct_dirty_list(dp, &aux);
    if (error) {
        VLOG_INFO("%d|Failed to sync CT dirty list err=%d", getpid(), error);
        goto err_next;
    }

err_next:
    conntrack_dump_done(&ct_dump);
    rv = write_sync(fd, "done", 4);
    if (!error) {
        error = rv;
    }
err_start:
    atomic_store_explicit(&dp->conntrack->ndu_sync_in_progress, false, memory_order_release);
    dpif_close(dpif);
    return error;
}

static void
ndu_dpif_release(void)
{
    struct ofproto_dpif *dpif = ndu_get_dpif();

    if (!dpif || !dpif->backer || !dpif->backer->dpif) {
        return;
    }
    dpif_ndu_exit(dpif->backer->dpif);
}

static int
ndu_accept_client(void)
{
    struct ndu_conn_server *server = &ndu_ctx.server;
    int error;

    /* Wait till the old NDU server terminates if there was one. */
    if (ndu_ctx.server_pid && !kill(ndu_ctx.server_pid, 0)) {
        return EAGAIN;
    }

    error = pstream_accept(server->listener, &ndu_ctx.stream);
    if (error == EAGAIN) {
        return error;
    } else if (error) {
        VLOG_WARN("%d|%s: accept failed: %s", getpid(), pstream_get_name(server->listener),
                  ovs_strerror(error));
        return error;
    }
    ndu_ctx.fd = stream_fd_get(ndu_ctx.stream);

    ovs_doca_set_op_state(DOCA_FLOW_PORT_OPERATION_STATE_ACTIVE_READY_TO_SWAP);
    return 0;
}

void
ndu_run(void)
{
    enum ndu_state cur_state = ndu_ctx.state;
    char str[100];
    int err = 0;

    switch (ndu_ctx.state) {
    case NDU_STATE_UNINIT:
        /* Do nothing. */
        err = EAGAIN;
        break;
    case NDU_STATE_IDLE:
        err = ndu_accept_client();
        ndu_ctx.rollback = false;
        break;
    case NDU_STATE_REVALIDATOR_PAUSE:
        ndu_revalidators_pause_set(!ndu_ctx.rollback);
        err = 0;
        break;
    case NDU_STATE_OVSDB_UNLOCK:
        err = ndu_ovsdb_unlock_set(!ndu_ctx.rollback);
        break;
    case NDU_STATE_BR_RM_MGMT_SNOOP:
        bridge_remotes_disable(!ndu_ctx.rollback);
        bridge_remove_ctrl_mgmt_snoop(false);
        bridge_mgmt_rename(!ndu_ctx.rollback);
        err = 0;
        break;
    case NDU_STATE_PID_FILE:
        err = ndu_pid_file_rename(!ndu_ctx.rollback);
        break;
    case NDU_STATE_PID_FILE_SYNC:
        if (ndu_ctx.rollback) {
            err = 0;
            break;
        }
        err = write_sync(ndu_ctx.fd, "sync", 4);
        break;
    case NDU_STATE_SYNC_OF_PORTS:
        if (!ndu_ctx.rollback) {
            err = ndu_server_sync_of_ports(ndu_ctx.fd);
        }
        break;
    case NDU_STATE_SYNC_PORTS:
        if (ndu_ctx.rollback) {
            err = 0;
            break;
        }
        err = ndu_server_sync_ports(ndu_ctx.fd);
        break;
    case NDU_STATE_SYNC_TNL_NEIGH:
        if (!ndu_ctx.rollback) {
            err = ndu_server_sync_tnl_neigh(ndu_ctx.fd);
        }
        break;
    case NDU_STATE_OPENFLOW_SYNC_WAIT: {
        size_t bytes;
        char c;

        err = read_fully(ndu_ctx.fd, &c, 1, &bytes);
        break;
    }
    case NDU_STATE_OPENFLOW_SYNC_DONE:
        bridge_remove_ctrl_mgmt_snoop(true);
        err = 0;
        break;
    case NDU_STATE_SYNC_CT:
        if (ndu_ctx.rollback) {
            err = 0;
            break;
        }
        err = ndu_server_sync_ct(ndu_ctx.fd);
        break;
    case NDU_STATE_DATAPATH_RELEASE:
        err = read_sync(ndu_ctx.fd, str, 4);
        if (err) {
            break;
        }
        if (strncmp(str, "sync", 4)) {
            err = -1;
            break;
        }
        ndu_revalidators_pause_set(false);
        ndu_dpif_release();
        err = 0;
        break;
    case NDU_STATE_DONE:
        ndu_handle_done();
        err = 0;
        break;
    case NDU_STATE_CLIENT:
        /* Client state machine is performed by ndu_client_run(). */
        break;
    }

    if (err == EAGAIN) {
        if (cur_state != NDU_STATE_IDLE) {
            poll_immediate_wake();
        }
        return;
    }

    if (err || ndu_ctx.rollback) {
        for (int i = 0; ; i++) {
            if (ndu_fsm[i] == cur_state) {
                ndu_ctx.state = i;
                break;
            }
        }
        if (!ndu_ctx.rollback) {
            VLOG_ERR("%d|Start a rollback. state=%s err=%d", getpid(), ndu_state_name(cur_state),
                     err);
            ndu_handle_done();
        }
        ndu_ctx.rollback = true;
    } else {
        ndu_ctx.state = ndu_fsm[cur_state];
        poll_immediate_wake();
    }

    if (ndu_ctx.state != cur_state) {
        VLOG_INFO("%d|State changed %s->%s", getpid(),
                  ndu_state_name(cur_state), ndu_state_name(ndu_ctx.state));
    }
}

void
ndu_wait(void)
{
    struct ndu_conn_server *server = &ndu_ctx.server;

    pstream_wait(server->listener);
}

bool
ndu_done(void)
{
    return ndu_ctx.state == NDU_STATE_DONE;
}

/* ------------------- Client code. ------------------- */
static bool
ndu_find_clinet_port_no_cb(struct netdev *netdev,
                           odp_port_t odp_port,
                           void *aux)
{
    struct ndu_port_item *p = aux;

    if (strcmp(netdev_get_name(netdev), p->name)) {
        return false;
    }

    p->client_port_no = odp_port;
    return true;
}

static int
ndu_ports_parse(int *fd_arr, struct ofpbuf *buf)
{
    struct ndu_port_item *p;
    struct nlattr *nla;
    size_t left;
    int rv;

    NL_ATTR_FOR_EACH (nla, left, ofpbuf_at(buf, 0, 0), buf->size) {
        const struct ndu_port_item *nlp = nl_attr_get(nla);
        uint32_t hash;

        p = xmalloc(sizeof *p);
        memcpy(p, nlp, sizeof *p);
        if (!strcmp(p->type, "tap")) {
            p->tap_fd = *fd_arr++;
        }
        VLOG_INFO("%d|Client port: %s,%s,server_port_no=%d, tap_fd=%d", getpid(), p->name, p->type,
                  p->server_port_no, p->tap_fd);
        hash = hash_bytes(&p->server_port_no, sizeof p->server_port_no, 0);
        hmap_insert(&ndu_ctx.portmap, &p->node, hash);
    }

    rv = 0;
    HMAP_FOR_EACH (p, node, &ndu_ctx.portmap) {
        struct netdev *netdev;
        int err;

        netdev_ports_traverse(NDU_DPIF_TYPE, ndu_find_clinet_port_no_cb, p);
        VLOG_INFO("%d|Client port: %s,%s,client_port_no=%d", getpid(), p->name, p->type,
                  p->client_port_no);
        if (strcmp(p->type, "tap")) {
            continue;
        }

        err = netdev_open(p->name, "tap", &netdev);
        if (err) {
            VLOG_ERR("%d|Fail to open tap dev %s", getpid(), p->name);
            if (!rv) {
                rv = err;
            }
            continue;
        }

        if (netdev_linux_get_tap_fd(netdev) != -EBUSY) {
            VLOG_ERR("%d|Tap %s is not ready to be updated", getpid(), p->name);
            netdev_close(netdev);
            if (!rv) {
                rv = -1;
            }
            continue;
        }

        netdev_linux_set_tap_fd(netdev, p->tap_fd);
        netdev_close(netdev);
        netdev_request_reconfigure(netdev);
        rtnetlink_report_link();

        VLOG_INFO("%d|Set netdev:%s with fd %d", getpid(), netdev_get_name(netdev), p->tap_fd);
    }
    return 0;
}

static int
ndu_client_sync_ports(int fd)
{
    uint8_t msgctrl[CMSG_SPACE(sizeof(int) * NDU_MAX_TAP_PORTS)];
    struct cmsghdr *cmsg;
    struct ofpbuf *buf;
    struct msghdr msg;
    struct iovec iov;
    int error = 0;
    int *fd_arr;
    int retval;
    size_t sz;

    /* Get the expected size. */
    error = read_sync(fd, &sz, sizeof sz);
    if (error) {
        return error;
    }
    if (sz == 0) {
        return 0;
    }
    buf = ofpbuf_new(sz);

    memset(&msg, 0, sizeof msg);
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    msg.msg_control = msgctrl;
    msg.msg_controllen = sizeof msgctrl;

    memset(&iov, 0, sizeof iov);
    iov.iov_base = buf->base;
    iov.iov_len = buf->allocated;

    do {
        retval = recvmsg(fd, &msg, 0);
        if (retval < 0) {
            error = errno;
        }
    } while (error == EINTR);
    if (error == EWOULDBLOCK) {
        error = EAGAIN;
    }
    if (error) {
        goto free_ofbuf;
    }

    cmsg = CMSG_FIRSTHDR(&msg);
    if (!cmsg || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) {
        error = EINVAL;
        goto free_ofbuf;
    }

    fd_arr = ALIGNED_CAST(int *, CMSG_DATA(cmsg));
    buf->size += retval;
    error = ndu_ports_parse(fd_arr, buf);

free_ofbuf:
    ofpbuf_delete(buf);
    return error;
}

static void
ndu_translate_client_port_no(odp_port_t *server_port_no)
{
    struct ndu_port_item *p;
    uint32_t hash;

    hash = hash_bytes(server_port_no, sizeof *server_port_no, 0);
    HMAP_FOR_EACH_WITH_HASH (p, node, hash, &ndu_ctx.portmap) {
        if (*server_port_no == p->server_port_no) {
            *server_port_no = p->client_port_no;
            return;
        }
    }
}

static void
ndu_update_fconn_ports(struct flat_conn *fconn)
{
    for (int dir = 0; dir < CT_DIR_NUM; dir++) {
        ndu_translate_client_port_no(&fconn->dir_info[dir].port);
    }
}

static void
ndu_handle_client_done(void)
{
    struct ndu_port_item *p;

    write_sync(ndu_ctx.fd, "sync", 4);
    ndu_handle_done();
    HMAP_FOR_EACH_SAFE (p, node, &ndu_ctx.portmap) {
        hmap_remove(&ndu_ctx.portmap, &p->node);
        free(p);
    }
    ndu_of_portmap_flush();
    ovs_doca_set_op_state(DOCA_FLOW_PORT_OPERATION_STATE_ACTIVE);
    bridge_remotes_disable(false);
    ndu_ctx.state = NDU_STATE_IDLE;
    ndu_pmd_pause_del_flag();
    rtnetlink_report_link();
}

static int
ndu_client_sync_ct(int fd)
{
    long long exp = time_msec() + NDU_CT_GRACE_PERIOD_MS;
    struct dp_doca *dp;
    struct dpif *dpif;
    char str[100];
    int error;

    error = read_sync(fd, str, 4);
    if (error) {
        return error;
    }

    if (!strncmp(str, "done", 4)) {
        return 0;
    }

    error = dpif_open(NDU_DPIF_NAME, NDU_DPIF_TYPE, &dpif);
    if (error) {
        return error;
    }
    dp = get_dp_doca(dpif);
    if (!dp) {
        error = -1;
        goto err;
    }

    while (!strncmp(str, "conn", 4)) {
        struct flat_conn fconn;

        if (read_sync(fd, &fconn, sizeof fconn)) {
            error = -1;
            goto err;
        }
        ndu_update_fconn_ports(&fconn);
        conntrack_insert_flat_conn(dp->conntrack, &fconn, exp);
        error = read_sync(fd, str, 4);
        if (error) {
            goto err;
        }
    }
    if (strncmp(str, "done", 4)) {
        error = -1;
        goto err;
    }

err:
    dpif_close(dpif);
    return error;
}

static int
ndu_client_sync_tnl_neigh(int fd)
{
    struct ofpbuf *buf = NULL;
    uint32_t n_neigh = 0;
    size_t sz;
    int rv;

    /* Get the expected size. */
    rv = read_sync(fd, &sz, sizeof sz);
    if (rv) {
        goto out;
    }

    if (sz == 0) {
        goto out;
    }

    buf = ofpbuf_new(sz);
    rv = read_sync(fd, buf->data, sz);
    if (rv) {
        goto out;
    }

    buf->size = sz;
    tnl_neigh_ofpbuf_decode(buf, &n_neigh);

out:
    VLOG_INFO("%d|Sync %d tnl-neigh entries", getpid(), n_neigh);
    if (buf) {
        ofpbuf_delete(buf);
    }
    return rv;
}

static int
ndu_client_sync_openflow_tlv_map(struct vconn *vconn, struct ofproto *ofproto)
{
    struct ofputil_tlv_table_reply ttr;
    struct ofputil_tlv_table_mod ttm;
    struct ofpbuf *reply = NULL;
    const struct ofp_header *oh;
    struct ofpbuf *request;
    enum ofperr error;
    enum ofpraw raw;
    int rv;

    request = ofpraw_alloc(OFPRAW_NXT_TLV_TABLE_REQUEST, OFP15_VERSION, 0);
    rv = vconn_transact(vconn, request, &reply);
    if (rv || !reply) {
        VLOG_ERR("%d|tlv-map: vconn_transact error %d", getpid(), rv);
        goto out;
    }
    oh = reply->data;
    error = ofpraw_decode(&raw, oh);
    if (error) {
        VLOG_ERR("%d|tlv-map: ofpraw_decode error %d", getpid(), error);
        rv = (int) error;
        goto out;
    }
    if (raw != OFPRAW_NXT_TLV_TABLE_REPLY) {
        VLOG_ERR("%d|tlv-map: unexpected reply type. Expected %d, received %d", getpid(),
                 OFPRAW_NXT_TLV_TABLE_REPLY, raw);
        rv = (int) error;
        goto out;
    }
    error = ofputil_decode_tlv_table_reply(oh, &ttr);
    if (error) {
        VLOG_ERR("%d|tlv-map: error parsing reply %d", getpid(), error);
        rv = (int) error;
        goto out;
    }
    VLOG_INFO("%d|%s: Sync %lu tlv-map mappings", getpid(), ofproto->name,
              ovs_list_size(&ttr.mappings));

    ttm.command = NXTTMC_ADD;
    ovs_list_replace(&ttm.mappings, &ttr.mappings);

    error = ofproto_tlv_table_insert(ofproto, &ttm);
    if (error) {
        VLOG_ERR("%d|tlv-map: insert error %d", getpid(), error);
        rv = (int) error;
    }

    ovs_list_replace(&ttr.mappings, &ttm.mappings);
    ofputil_uninit_tlv_table(&ttr.mappings);

out:
    ofpbuf_delete(reply);
    return rv;
}

static int
ndu_client_sync_openflow_meters(struct vconn *vconn, struct ofproto *ofproto)
{
    struct ofputil_meter_mod mm;
    struct ofpbuf *request;
    struct ofpbuf *reply;
    struct ofpbuf bands;
    uint32_t n_meters;
    struct ofpbuf b;
    enum ofpraw raw;
    int rv;

    request = ofputil_encode_meter_request(OFP15_VERSION, OFPUTIL_METER_CONFIG, OFPM13_ALL);
    rv = vconn_send_block(vconn, request);
    if (rv) {
        VLOG_ERR("%d|%s:meters: send dump request error %d", getpid(), ofproto->name, rv);
        return rv;
    }

    rv = vconn_recv_block(vconn, &reply);
    if (rv) {
        VLOG_ERR("%d|%s:meters: recieve reply error %d", getpid(), ofproto->name, rv);
        return rv;
    }

    rv = ofpraw_decode(&raw, reply->data);
    if (rv || raw != OFPRAW_OFPST13_METER_CONFIG_REPLY) {
        VLOG_ERR("%d|%s:meters: decode error %d, raw=%d", getpid(), ofproto->name, rv, raw);
        ofpbuf_delete(reply);
        return rv;
    }

    b = ofpbuf_const_initializer(reply->data, reply->size);
    ofpbuf_init(&bands, 64);
    mm.command = OFPMC13_ADD;
    for (n_meters = 0; ; n_meters++) {
        rv = ofputil_decode_meter_config(&b, &mm.meter, &bands);
        if (rv) {
            break;
        }
        rv = ofproto_handle_add_meter(ofproto, &mm);
        if (rv) {
            VLOG_ERR("%d|%s:meters: add meter error %d", getpid(), ofproto->name, rv);
            ofpbuf_uninit(&bands);
            ofpbuf_delete(reply);
            return rv;
        }
    }
    VLOG_INFO("%d|%s: Sync %u meters", getpid(), ofproto->name, n_meters);
    ofpbuf_uninit(&bands);
    if (rv != EOF) {
        VLOG_ERR("%d|%s:meters: decode error %d", getpid(), ofproto->name, rv);
    } else {
        rv = 0;
    }

    ofpbuf_delete(reply);
    return rv;
}

static void
ndu_decode_group_desc_mod(struct ofputil_group_desc *gd, struct ofputil_group_mod *gm)
{
    gm->command = OFPGC11_ADD;
    gm->type = gd->type;
    gm->group_id = gd->group_id;
    gm->command_bucket_id = OFPG15_BUCKET_ALL;
    gm->props = gd->props;
    ovs_list_replace(&gm->buckets, &gd->buckets);
}

static int
ndu_client_sync_openflow_groups(struct vconn *vconn, struct ofproto *ofproto)
{
    const struct ofp_header *oh;
    struct ofpbuf *request;
    struct ofpbuf *reply;
    uint32_t n_groups;
    enum ofpraw raw;
    struct ofpbuf b;
    int rv;

    request = ofputil_encode_group_desc_request(OFP15_VERSION, OFPG_ALL);
    if (!request) {
        return -1;
    }

    rv = vconn_transact(vconn, request, &reply);
    if (rv) {
        goto out;
    }

    oh = reply->data;
    rv = ofpraw_decode(&raw, oh);
    if (rv) {
        goto out;
    }
    if (raw != OFPRAW_OFPST11_GROUP_DESC_REPLY) {
        rv = -1;
        goto out;
    }

    b = ofpbuf_const_initializer(oh, ntohs(oh->length));
    for (n_groups = 0; ; n_groups++) {
        struct ofputil_group_desc gd;
        struct ofproto_group_mod ogm;

        rv = ofputil_decode_group_desc_reply(&gd, &b, oh->version);
        if (rv) {
            if (rv == EOF) {
                rv = 0;
            }
            break;
        }

        ndu_decode_group_desc_mod(&gd, &ogm.gm);

        if (ofproto_group_insert(ofproto, &ogm, NULL)) {
            rv = -1;
            break;
        }

        ovs_list_replace(&gd.buckets, &ogm.gm.buckets);
        ofputil_uninit_group_desc(&gd);
    }
    VLOG_INFO("%d|%s: Sync %u groups", getpid(), ofproto->name, n_groups);

out:
    ofpbuf_delete(reply);

    return rv;
}

static void
ndu_decode_flow_stats_mod(struct ofputil_flow_stats *fs,
                          struct ofputil_flow_mod *fm,
                          struct ofproto *ofproto,
                          struct ds *s,
                          struct ofpbuf *ofpacts)
{
    enum ofputil_protocol usable_protocols;
    struct ofputil_flow_mod fm_tmp;
    struct ofpbuf *buf;
    enum ofperr err;
    char *error;

    ds_clear(s);

    ofputil_flow_stats_format(s, fs, NULL, NULL, false);
    error = parse_ofp_flow_mod_str(&fm_tmp, ds_cstr(s), NULL, NULL, OFPFC_ADD, &usable_protocols);
    if (error) {
        VLOG_FATAL("%d|%s: Failed to parse flow-str %s (%s)", getpid(), ofproto->name, ds_cstr(s),
                   error);
    }

    buf = ofputil_encode_flow_mod(&fm_tmp, OFPUTIL_P_OF15_OXM);
    free(CONST_CAST(struct ofpact *, fm_tmp.ofpacts));
    minimatch_destroy(&fm_tmp.match);

    err = ofputil_decode_flow_mod(fm, buf->data, OFPUTIL_P_OF15_OXM,
                                  ofproto_get_tun_tab(ofproto),
                                  &ofproto->vl_mff_map, ofpacts,
                                  u16_to_ofp(ofproto->max_ports),
                                  ofproto->n_tables);
    if (err) {
        VLOG_FATAL("%d|%s: Failed to decode flow-str %s (%d)", getpid(), ofproto->name, ds_cstr(s),
                   err);
    }
    ofpbuf_delete(buf);
}

static int
ndu_client_sync_of_rules(struct vconn *vconn, struct ofproto *ofproto)
{
    enum ofputil_protocol usable_protocols;
    struct ofputil_flow_stats_request fsr;
    struct ofputil_flow_stats *fses;
    uint64_t ofpacts_stub[1024 / 8];
    struct ofputil_flow_mod fm;
    size_t n_fses;
    char *error;
    struct ds s;
    int rv;

    error = parse_ofp_flow_stats_request_str(&fsr, false, "", NULL, NULL, &usable_protocols);
    if (error) {
        VLOG_FATAL("%d|%s: Failed to open socket (%s)", getpid(), ofproto->name, error);
    }

    rv = vconn_dump_flows(vconn, &fsr, OFPUTIL_P_OF15_OXM, &fses, &n_fses);
    if (rv) {
        VLOG_FATAL("%d|%s: Failed to dump-flows (%d)", getpid(), ofproto->name, rv);
    }
    VLOG_INFO("%d|%s: Sync %lu openflow rules", getpid(), ofproto->name, n_fses);
    ds_init(&s);
    for (size_t i = 0; i < n_fses; i++) {
        struct ofpbuf ofpacts;

        ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
        ndu_decode_flow_stats_mod(&fses[i], &fm, ofproto, &s, &ofpacts);
        rv = ofproto_flow_mod(ofproto, &fm);
        minimatch_destroy(&fm.match);
        if (rv) {
            break;
        }
    }
    ds_destroy(&s);
    for (size_t i = 0; i < n_fses; i++) {
        free(CONST_CAST(struct ofpact *, fses[i].ofpacts));
    }
    free(fses);

    return rv;
}

static void
ndu_client_sync_openflow(void)
{
    const struct ovsrec_open_vswitch *cfg;
    struct ovsrec_bridge **br;
    struct vconn *vconn;
    char *vconn_name;
    int error;
    int i;

    cfg = ovsrec_open_vswitch_first(ndu_ctx.idl);
    br = cfg->bridges;
    for (i = 0; i < cfg->n_bridges; i++) {
        struct ofproto *ofproto;

        vconn_name = xasprintf("unix:%s/%s."NDU_PIDFILE_SUFFIX".mgmt", ovs_rundir(), br[i]->name);
        error = vconn_open(vconn_name, 0, DSCP_DEFAULT, &vconn);
        if (error) {
            VLOG_FATAL("%d|%s: Failed to open socket (%s)", getpid(), br[i]->name,
                       ovs_strerror(error));
        }
        free(vconn_name);
        error = vconn_connect_block(vconn, -1);
        if (error) {
            VLOG_FATAL("%d|%s: failed to connect to socket (%s)", getpid(), br[i]->name,
                       ovs_strerror(error));
        }

        ofproto = bridge_get_ofproto_by_name(br[i]->name);
        error = ndu_client_sync_openflow_tlv_map(vconn, ofproto);
        if (error) {
            VLOG_FATAL("%d|%s: failed to sync openflow tlv-map (%s)", getpid(), br[i]->name,
                       ovs_strerror(error));
        }
        error = ndu_client_sync_openflow_meters(vconn, ofproto);
        if (error) {
            VLOG_FATAL("%d|%s: failed to sync openflow meters (%s)", getpid(), br[i]->name,
                       ovs_strerror(error));
        }
        error = ndu_client_sync_openflow_groups(vconn, ofproto);
        if (error) {
            VLOG_FATAL("%d|%s: failed to sync openflow groups (%s)", getpid(), br[i]->name,
                       ovs_strerror(error));
        }
        error = ndu_client_sync_of_rules(vconn, ofproto);
        if (error) {
            VLOG_FATAL("%d|%s: failed to sync openflow rules (%s)", getpid(), br[i]->name,
                       ovs_strerror(error));
        }

        vconn_close(vconn);
    }
}

static void
appctl_reply_to_string(struct json *reply, enum unixctl_output_fmt fmt, struct ds *ds)
{
    ovs_assert(reply);

    if (fmt == UNIXCTL_OUTPUT_FMT_TEXT && reply->type != JSON_STRING) {
        VLOG_FATAL("%d|appctl-state: unexpected reply type in JSON rpc reply: %s", getpid(),
                   json_type_to_string(reply->type));
    }

    if (fmt == UNIXCTL_OUTPUT_FMT_TEXT) {
        ds_put_cstr(ds, json_string(reply));
    } else {
        json_to_ds(reply, JSSF_SORT, ds);
    }

    if (ds_last(ds) != EOF && ds_last(ds) != '\n') {
        ds_put_char(ds, '\n');
    }
}

static int
fdb_entry_from_json(struct ndu_fdb_entry *e,  const struct json *json)
{
    const struct shash_node *node;

    if (json->type != JSON_OBJECT) {
        return -EINVAL;
    }

    SHASH_FOR_EACH (node, json_object(json)) {
        const struct json *value = node->data;

        if (!strcmp(node->name, "port")) {
            if (value->type != JSON_INTEGER) {
                return -EINVAL;
            }
            e->port = (OVS_FORCE ofp_port_t) value->integer;
        } else if (!strcmp(node->name, "vlan")) {
            if (value->type != JSON_INTEGER) {
                return -EINVAL;
            }
            e->vlan = value->integer;
        } else if (!strcmp(node->name, "mac")) {
            if (value->type != JSON_STRING ||
                !ovs_scan(json_string(value), ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(e->src))) {
                return -EINVAL;
            }
        } else if (!strcmp(node->name, "static")) {
            e->is_static = json_boolean(value);
        }
    }

    return 0;
}

static int
ndu_client_sync_fdb_entries(const struct json *fdb_entries, struct ofproto_dpif *dpif, int *err_cnt)
{
    struct ndu_fdb_entry mac;
    int err;
    int i;

    if (fdb_entries->type != JSON_ARRAY) {
        return -EINVAL;
    }

    *err_cnt = 0;
    for (i = 0; i < fdb_entries->array.n; i++) {
        memset(&mac, 0, sizeof mac);
        err = fdb_entry_from_json(&mac, fdb_entries->array.elems[i]);
        if (err) {
            return err;
        }

        if (mac.is_static) {
            if (!xlate_add_static_mac_entry(dpif, mac.port, mac.src, mac.vlan)) {
                (*err_cnt)++;
            }
        } else {
            xlate_mac_learning_update(dpif, mac.port, mac.src, mac.vlan, false);
        }
    }

    return 0;
}

static int
ndu_client_sync_appctl_cmd_over_bridges(struct jsonrpc *conn, const char *cmd,
                                        appctl_cmd_response_handler cb)
{
    const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ndu_ctx.idl);
    struct json *cmd_result = NULL, *cmd_error = NULL;
    struct ovsrec_bridge **br = cfg->bridges;
    struct ofproto_dpif *dpif = NULL;
    struct ds reply_str;
    int err_cnt;
    int error;
    int i;

    for (i = 0; i < cfg->n_bridges; i++) {
        error = unixctl_client_transact(conn, cmd, 1, (char *[]){br[i]->name},
                                        &cmd_result, &cmd_error);
        if (error) {
            VLOG_FATAL("%d|appctl-state: Failed to communicate with the server", getpid());
        }
        if (cmd_error) {
            ds_init(&reply_str);
            appctl_reply_to_string(cmd_error, UNIXCTL_OUTPUT_FMT_TEXT, &reply_str);
            VLOG_ERR("%d|appctl-state: %s %s, server error: %s", getpid(), cmd, br[i]->name,
                     ds_cstr_ro(&reply_str));
            ds_destroy(&reply_str);
            error = -1;
            goto err;
        }

        ds_init(&reply_str);
        appctl_reply_to_string(cmd_result, UNIXCTL_OUTPUT_FMT_JSON, &reply_str);
        VLOG_DBG("%d|appctl-state: %s %s, server reply: %s", getpid(), cmd, br[i]->name,
                 ds_cstr_ro(&reply_str));
        ds_destroy(&reply_str);

        dpif = ofproto_dpif_lookup_by_name(br[i]->name);
        if (!dpif) {
            VLOG_ERR("%d|%s: Bridge %s not found", getpid(), cmd, br[i]->name);
            goto err;
        }

        error = cb(cmd_result, dpif, &err_cnt);
        if (error) {
            VLOG_INFO("%d|%s:%s: Failed to sync %d", getpid(), br[i]->name, cmd, error);
        } else {
            VLOG_INFO("%d|%s:%s: Sync %lu entries (errors %u)", getpid(), br[i]->name, cmd,
                      cmd_result->array.n, err_cnt);
        }

        json_destroy(cmd_result);
        json_destroy(cmd_error);
    }
    return 0;
err:
    json_destroy(cmd_result);
    json_destroy(cmd_error);
    return error;
}

static int
router_entry_from_json(const struct json *json, struct ovs_router_entry *re)
{
    const char *src_addr_str = NULL;
    const char *nw_addr_str = NULL;
    const struct shash_node *node;
    const char *gw_str = NULL;
    ovs_be32 src = 0;
    ovs_be32 gw = 0;
    ovs_be32 ip;

    if (json->type != JSON_OBJECT) {
        return -EINVAL;
    }

    memset(&re->cr, 0, sizeof re->cr);

    /* Initialize optional fields */
    re->mark = 0;
    re->gw = in6addr_any;
    memset(re->output_netdev, 0, sizeof(re->output_netdev));

    SHASH_FOR_EACH (node, json_object(json)) {
        const struct json *value = node->data;

        if (!strcmp(node->name, "mark")) {
            re->mark = json_integer(value);
        } else if (!strcmp(node->name, "table")) {
            re->table = json_integer(value);
        } else if (!strcmp(node->name, "priority")) {
            re->priority = json_integer(value);
        } else if (!strcmp(node->name, "prefix")) {
            re->plen = json_integer(value);
        } else if (!strcmp(node->name, "nexthops")) {
            for (size_t i = 0; i < json_array(value)->n; i++) {
                struct json *nh_json = json_array(value)->elems[i];
                const struct shash_node *nh_node;

                SHASH_FOR_EACH (nh_node, json_object(nh_json)) {
                    if (!strcmp(nh_node->name, "dev")) {
                        ovs_strlcpy(re->output_netdev, json_string(nh_node->data),
                                    sizeof re->output_netdev / sizeof re->output_netdev[0]);
                    } else if (!strcmp(nh_node->name, "gateway")) {
                        gw_str = json_string(nh_node->data);
                    }
                }
            }
        } else if (!strcmp(node->name, "dst")) {
            nw_addr_str = json_string(value);
        } else if (!strcmp(node->name, "prefsrc")) {
            src_addr_str = json_string(value);
        }
    }

    if (!re->output_netdev[0]) {
        VLOG_ERR("Failed to construct router entry from json, "
                 "dev field is missing.");
        return -EINVAL;
    } else if (!nw_addr_str) {
        VLOG_ERR("Failed to construct router entry from json, "
                 "dst address field is missing.");
        return -EINVAL;
    } else if (!src_addr_str) {
        VLOG_ERR("Failed to construct router entry from json, "
                 "src address field is missing.");
        return -EINVAL;
    }

    if (ip_parse(nw_addr_str, &ip)) {
        re->plen += 96;
        in6_addr_set_mapped_ipv4(&re->nw_addr, ip);

        if (gw_str) {
            if (!ip_parse(gw_str, &gw)) {
                goto err_gw;
            }
            in6_addr_set_mapped_ipv4(&re->gw, gw);
        }

        if (!ip_parse(src_addr_str, &src)) {
            goto err_src_addr;
        }
        in6_addr_set_mapped_ipv4(&re->src_addr, src);
    } else if (ipv6_parse(nw_addr_str, &re->nw_addr)) {
        if (gw_str && !ipv6_parse(gw_str, &re->gw)) {
            goto err_gw;
        }
        if (!ipv6_parse(src_addr_str, &re->src_addr)) {
            goto err_src_addr;
        }
    } else {
        VLOG_ERR("Failed to construct router entry from json, invalid nw_addr field.");
        return -EINVAL;
    }

    return 0;
err_gw:
    VLOG_ERR("Failed to construct router entry from json, invalid gw field.");
    goto err;
err_src_addr:
    VLOG_ERR("Failed to construct router entry from json, invalid src_addr field.");
    goto err;
err:
    return -EINVAL;
}

static int
ndu_client_router_insert_json(const struct json *json)
{
    struct ovs_router_entry re;
    int err;

    err = router_entry_from_json(json, &re);
    if (err) {
        return err;
    }

    return ovs_router_insert__(re.table, re.mark, re.priority, &re.nw_addr, re.plen,
                               re.output_netdev, &re.gw, &re.src_addr);
}

static int
ndu_client_sync_routes(const struct json *routes, struct ofproto_dpif *dpif OVS_UNUSED,
                       int *err_cnt)
{
    int err;
    int i;

    if (routes->type != JSON_ARRAY) {
        return -EINVAL;
    }

    *err_cnt = 0;
    for (i = 0; i < routes->array.n; i++) {
        err = ndu_client_router_insert_json(routes->array.elems[i]);
        if (err) {
            (*err_cnt)++;
        }
    }

    return 0;
}

static int
ndu_client_sync_appctl_cmd(struct jsonrpc *conn, const char *cmd, appctl_cmd_response_handler cb)
{
    struct json *cmd_result = NULL, *cmd_error = NULL;
    struct ds reply_str = DS_EMPTY_INITIALIZER;
    struct ofproto_dpif *dpif = NULL;
    int err_cnt;
    int error;

    error = unixctl_client_transact(conn, cmd, 0, NULL, &cmd_result, &cmd_error);
    if (error) {
        VLOG_FATAL("%d|appctl-state: Failed to communicate with the server", getpid());
    }
    if (cmd_error) {
        appctl_reply_to_string(cmd_error, UNIXCTL_OUTPUT_FMT_TEXT, &reply_str);
        VLOG_ERR("%d|appctl-state: %s, server error: %s", getpid(), cmd, ds_cstr_ro(&reply_str));
        ds_destroy(&reply_str);
        error = -1;
        goto out;
    }

    appctl_reply_to_string(cmd_result, UNIXCTL_OUTPUT_FMT_JSON, &reply_str);
    VLOG_DBG("%d|appctl-state: %s, server reply: %s", getpid(), cmd, ds_cstr_ro(&reply_str));
    ds_destroy(&reply_str);

    error = cb(cmd_result, dpif, &err_cnt);
    if (error) {
        VLOG_ERR("%d|%s: Failed to sync %d", getpid(), cmd, error);
    } else {
        VLOG_INFO("%d|%s: Sync %lu entries (errors %u)", getpid(), cmd, cmd_result->array.n,
                  err_cnt);
    }
out:
    json_destroy(cmd_result);
    json_destroy(cmd_error);
    return error;
}

static struct jsonrpc *
ndu_client_appctl_connect(void)
{
    struct jsonrpc *client;
    char *pidfile_name;
    char *socket_name;
    pid_t pid;
    int error;

    pidfile_name = xasprintf("%s."NDU_PIDFILE_SUFFIX, ndu_ctx.pidfile);
    pid = read_pidfile(pidfile_name);
    if (pid < 0) {
        VLOG_FATAL("%d|appctl-connect: Failed to read %s", getpid(), ndu_ctx.pidfile);
    }
    free(pidfile_name);
    socket_name = xasprintf("%s/%s.%ld.ctl", ovs_rundir(), ovs_get_program_name(), (long int) pid);

    error = unixctl_client_create(socket_name, &client);
    if (error) {
        VLOG_FATAL("%d|appctl-state: Failed to connec to the server's control socket", getpid());
    }
    free(socket_name);

    return client;
}

static void
ndu_client_sync_appctl_state(void)
{
    struct json *cmd_result = NULL, *cmd_error = NULL;
    struct ds reply_str = DS_EMPTY_INITIALIZER;
    struct jsonrpc *client;
    int error;

    client = ndu_client_appctl_connect();
    if (!client) {
        VLOG_FATAL("%d|appctl-state: failed to connec to the server's control socket", getpid());
    }

    error = unixctl_client_transact(client, "set-options", 2, (char *[]){"--format", "json"},
                                    &cmd_result, &cmd_error);
    if (error) {
        VLOG_FATAL("%d|appctl-state: failed to communicate with the server", getpid());
    }
    if (cmd_error) {
        appctl_reply_to_string(cmd_error, UNIXCTL_OUTPUT_FMT_TEXT, &reply_str);
        VLOG_ERR("%d|appctl-state: server error: %s", getpid(), ds_cstr_ro(&reply_str));
        ds_destroy(&reply_str);
        goto out;
    }

    error = ndu_client_sync_appctl_cmd_over_bridges(client, "fdb/show",
                                                    ndu_client_sync_fdb_entries);
    if (error) {
        VLOG_ERR("%d|appctl-state: fdb sync failed or might be incomplete %d", getpid(), error);
    }

    error = ndu_client_sync_appctl_cmd(client, "ovs/route/show", ndu_client_sync_routes);
    if (error) {
        VLOG_ERR("%d|appctl-state: route sync failed or might be incomplete %d", getpid(), error);
    }
out:
    json_destroy(cmd_result);
    json_destroy(cmd_error);
    jsonrpc_close(client);
}

int
ndu_client_run(void)
{
    enum ndu_client_state cur_state = ndu_ctx.client_state;
    int rv = 0;

    if (ndu_ctx.state != NDU_STATE_CLIENT) {
        return 0;
    }

    switch (ndu_ctx.client_state) {
    case NDU_CLIENT_STATE_SYNC_DB:
        if (ndu_ctx.idl_seqno == ovsdb_idl_get_seqno(ndu_ctx.idl)) {
            /* syncing with db, let main loop run */
            rv = EAGAIN;
            break;
        }
        ndu_ctx.client_state = NDU_CLIENT_STATE_PROBE_PORTS;
        break;
    case NDU_CLIENT_STATE_PROBE_PORTS:
        rv = ndu_client_sync_ports(ndu_ctx.fd);
        if (rv) {
            VLOG_FATAL("%d|Could not sync ports. rv=%d", getpid(), rv);
        }
        ndu_ctx.client_state = NDU_CLIENT_STATE_SYNC_TNL_NEIGH;
        break;
    case NDU_CLIENT_STATE_SYNC_TNL_NEIGH:
        rv = ndu_client_sync_tnl_neigh(ndu_ctx.fd);
        if (rv) {
            VLOG_FATAL("%d|Could not sync ports. rv=%d", getpid(), rv);
        }
        ndu_ctx.client_state = NDU_CLIENT_STATE_SYNC_OPENFLOW;
        break;
    case NDU_CLIENT_STATE_SYNC_OPENFLOW:
        bridge_ofproto_deny_flush(true);
        ndu_client_sync_openflow();
        ndu_client_sync_appctl_state();
        write_sync(ndu_ctx.fd, "x", 1);
        ndu_ctx.client_state = NDU_CLIENT_STATE_SYNC_CT;
        break;
    case NDU_CLIENT_STATE_SYNC_CT:
        rv = ndu_client_sync_ct(ndu_ctx.fd);
        if (rv) {
            VLOG_FATAL("%d|Could not sync ct. rv=%d", getpid(), rv);
        }
        ndu_ctx.client_state = NDU_CLIENT_STATE_DONE;
        break;
    case NDU_CLIENT_STATE_DONE:
        ndu_handle_client_done();
        break;
    }

    if (ndu_ctx.client_state != cur_state) {
        VLOG_INFO("%d|Client state changed %s->%s", getpid(),
                  ndu_client_state_name(cur_state), ndu_client_state_name(ndu_ctx.client_state));
    }

    poll_immediate_wake();
    return rv;
}

static int
ndu_of_ports_parse(int fd, size_t sz)
{
    struct ndu_port_item *p;
    struct nlattr *nla;
    struct ofpbuf *buf;
    size_t left;
    int rv;

    hmap_init(&ndu_ctx.of_portmap);

    if (sz == 0) {
        return 0;
    }

    buf = ofpbuf_new(sz);
    rv = read_sync(fd, buf->data, sz);
    if (rv) {
        ofpbuf_delete(buf);
        return rv;
    }
    buf->size = sz;
    NL_ATTR_FOR_EACH (nla, left, ofpbuf_at(buf, 0, 0), buf->size) {
        const struct ndu_port_item *nlp = nl_attr_get(nla);

        p = xmalloc(sizeof *p);
        memcpy(p, nlp, sizeof *p);
        hmap_insert(&ndu_ctx.of_portmap, &p->node, hash_string(p->name, 0));
        VLOG_INFO("%d|Client of_port: %s,%d", getpid(), p->name, p->ofp_port);
    }
    ofpbuf_delete(buf);

    return 0;
}

int
ndu_connect_and_sync(long int pid)
{
    struct stream *stream;
    char *unix_path;
    char str[100];
    size_t sz;
    int error;

    unix_path = xasprintf("unix:%s/%s.%ld", ovs_rundir(), NDU_SOCK_NAME, pid);
    error = stream_open(unix_path, &stream, DSCP_DEFAULT);
    free(unix_path);
    if (error) {
        return error;
    }

    ndu_ctx.server_pid = pid;
    ndu_ctx.stream = stream;
    ndu_ctx.fd = stream_fd_get(stream);
    error = read_sync(ndu_ctx.fd, str, 4);
    if (error || strncmp(str, "sync", 4)) {
        ndu_handle_done();
        return -1;
    }

    /* Get the expected size. */
    error = read_sync(ndu_ctx.fd, &sz, sizeof sz);
    if (error) {
        ndu_handle_done();
        return error;
    }
    error = ndu_of_ports_parse(ndu_ctx.fd, sz);
    if (error) {
        ndu_handle_done();
        return error;
    }

    ovs_doca_set_op_state(DOCA_FLOW_PORT_OPERATION_STATE_STANDBY);
    ndu_ctx.state = NDU_STATE_CLIENT;
    bridge_remotes_disable(true);
    ndu_ctx.client_state = NDU_CLIENT_STATE_SYNC_DB;
    return 0;
}

ofp_port_t
ndu_ofport_number_get(const char *name)
{
    struct ndu_port_item *p;

    HMAP_FOR_EACH_WITH_HASH (p, node, hash_string(name, 0), &ndu_ctx.of_portmap) {
        if (!strcmp(name, p->name)) {
            VLOG_INFO("%d|%s: found ofp_port=%d", getpid(), name, p->ofp_port);
            return p->ofp_port;
        }
    }

    return OFPP_NONE;
}
