/*
 * Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <config.h>

#include "openvswitch/thread.h"
#include "openvswitch/util.h"
#include "ovs-atomic.h"
#include "ovs-rcu.h"
#include "ovs-thread.h"

#include "concurrent-array.h"

/*
 * Concurrent array
 * ================
 *
 * This is an implementation of a dynamic concurrent array.
 * This means that the exposed API is both thread-safe but also
 * that get / set operations on the array should execute
 * concurrently at least most of the time.
 *
 * To simplify the design, this array does not support shrinking.
 *
 * Implementation
 * --------------
 *
 * The array itself is a set of buckets pointing to slabs of values.
 * As the array grow, the array of buckets is expanded.
 *
 * Creating the new array of buckets is done under lock, ensuring
 * synchronicity in a simple way. As the array grows, it is expected
 * that there should be less and less collisions of threads trying to
 * each grow the array at the same time.
 *
 * +----------------+
 * |concurrent_array|
 * +----------------+
 * | lock {         |   +---------+
 * |    <impl> o------> | ca_impl |   +--------------------------+
 * | }              |   +---------+   | ca_slab                  |
 * +----------------+   | [       |   +--------------------------+
 *                      |   [0] o---> [ <v00>, <v01>, <012>, ... ]
 *                      |   [1] o---> [ <v10>, <v11>, <v12>, ... ]
 *                      |   ...   |
 *                      |   [N]   |
 *                      | ]       |
 *                      +---------+
 *
 * The bucket array is stored in the 'ca_impl' type, which is heap-allocated
 * and atomically linked within the concurrent array structure.
 *
 * A reader can thus dereference this sub-structure safely at any time, as long
 * as each instance is removed using the RCU when replaced by a new one.
 *
 * When allocating a new instance of 'ca_impl', the existing slab pointers are
 * kept and remain valid. As the array does not shrink, they will remain valid
 * the whole lifetime of the structure.
 *
 * To allow both load and store concurrently to each slots in a slab, each values
 * is itself an atomic type.
 *
 */

struct ca_slab {
    atomic_uintptr_t entries[CONCURRENT_ARRAY_SLAB_SIZE];
};

struct ca_impl {
    struct ca_slab **slabs;
    size_t n_slabs;
};

struct concurrent_array {
    struct ovs_mutex lock;
    ATOMIC(struct ca_impl *) impl;
};

struct concurrent_array *
concurrent_array_create(void)
{
    struct concurrent_array *c;

    c = xmalloc(sizeof *c);
    atomic_init(&c->impl, NULL);
    ovs_mutex_init(&c->lock);

    return c;
}

static void
concurrent_array_impl_free_deep(struct ca_impl *impl)
{
    for (size_t i = 0; i < impl->n_slabs; i++) {
        free(impl->slabs[i]);
    }
    free(impl->slabs);
    free(impl);
}

void
concurrent_array_destroy(struct concurrent_array *c)
{
    struct ca_impl *impl;

    if (!c) {
        return;
    }

    atomic_read(&c->impl, &impl);
    atomic_store(&c->impl, NULL);
    if (impl) {
        ovsrcu_postpone(concurrent_array_impl_free_deep, impl);
    }

    ovs_mutex_destroy(&c->lock);

    free(c);
}

static void
concurrent_array_impl_free_shallow(struct ca_impl *impl)
{
    free(impl->slabs);
    free(impl);
}

static struct ca_impl *
concurrent_array_get_impl(struct concurrent_array *c, size_t n_slabs)
{
    struct ca_impl *impl;

    atomic_read(&c->impl, &impl);
    if (impl && impl->n_slabs >= n_slabs) {
        return impl;
    }

    ovs_mutex_lock(&c->lock);

    /* Reload after lock to ensure having
     * the latest consistent read. */
    atomic_read(&c->impl, &impl);
    if (impl == NULL) {
        impl = xmalloc(sizeof *impl);
        impl->slabs = xcalloc(n_slabs, sizeof impl->slabs[0]);
        impl->n_slabs = n_slabs;
        for (size_t i = 0; i < n_slabs; i++) {
            impl->slabs[i] = xzalloc(sizeof(struct ca_slab));
        }
        atomic_store(&c->impl, impl);
    } else if (impl->n_slabs < n_slabs) {
        struct ca_impl *new_impl;

        new_impl = xmalloc(sizeof *new_impl);
        new_impl->slabs = xcalloc(n_slabs, sizeof new_impl->slabs[0]);
        new_impl->n_slabs = n_slabs;

        for (size_t i = 0; i < impl->n_slabs; i++) {
            new_impl->slabs[i] = impl->slabs[i];
        }

        for (size_t i = impl->n_slabs; i < new_impl->n_slabs; i++) {
            new_impl->slabs[i] = xzalloc(sizeof(struct ca_slab));
        }

        atomic_store(&c->impl, new_impl);
        ovsrcu_postpone(concurrent_array_impl_free_shallow, impl);
        impl = new_impl;
    }

    ovs_mutex_unlock(&c->lock);

    return impl;
}

void
concurrent_array_set(struct concurrent_array *c, uint32_t id, void *data)
{
    size_t idx = id / CONCURRENT_ARRAY_SLAB_SIZE;
    size_t n_slabs = idx + 1;
    struct ca_impl *impl;

    impl = concurrent_array_get_impl(c, n_slabs);
    atomic_store(&impl->slabs[idx]->entries[id % CONCURRENT_ARRAY_SLAB_SIZE], (uintptr_t) data);
}

void *
concurrent_array_get(struct concurrent_array *c, uint32_t id)
{
    size_t idx = id / CONCURRENT_ARRAY_SLAB_SIZE;
    struct ca_impl *impl;
    uintptr_t data;

    atomic_read(&c->impl, &impl);
    if (impl == NULL || impl->n_slabs <= idx) {
        return NULL;
    }

    atomic_read(&impl->slabs[idx]->entries[id % CONCURRENT_ARRAY_SLAB_SIZE], &data);
    return (void *) data;
}
