/*
 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <config.h>
#include "coverage.h"
#include "coverage-private.h"
#include <inttypes.h>
#include <stdlib.h>
#include "openvswitch/dynamic-string.h"
#include "hash.h"
#include "svec.h"
#include "timeval.h"
#include "unixctl.h"
#include "util.h"
#include "openvswitch/vlog.h"

VLOG_DEFINE_THIS_MODULE(coverage);

/* The coverage counters. */
struct coverage_counter **coverage_counters[COVERAGE_LEVELS];
size_t n_coverage_counters[COVERAGE_LEVELS];
size_t allocated_coverage_counters[COVERAGE_LEVELS];

struct ovs_mutex coverage_mutex[COVERAGE_LEVELS] = {
    [COVERAGE_LEVEL_INFO] = OVS_MUTEX_INITIALIZER,
    [COVERAGE_LEVEL_WARN] = OVS_MUTEX_INITIALIZER,
    [COVERAGE_LEVEL_ERR] = OVS_MUTEX_INITIALIZER,
};

DEFINE_STATIC_PER_THREAD_DATA(long long int, coverage_clear_time, LLONG_MIN);
static long long int coverage_run_time = LLONG_MIN;

/* Index counter used to compute the moving average array's index. */
static unsigned int idx_count = 0;

static void coverage_read(struct svec *, enum coverage_level);
static unsigned int coverage_array_sum(const unsigned int *arr,
                                       const unsigned int len,
                                       enum coverage_level level);

/* Registers a coverage counter with the coverage core */
void
coverage_counter_register(struct coverage_counter *counter, enum coverage_level level)
{
    if (n_coverage_counters[level] >= allocated_coverage_counters[level]) {
        coverage_counters[level] = x2nrealloc(coverage_counters[level],
                                              &allocated_coverage_counters[level],
                                              sizeof(struct coverage_counter *));
    }
    coverage_counters[level][n_coverage_counters[level]++] = counter;
}

static enum coverage_level
coverage_level_from_name(const char *name)
{
    for (enum coverage_level l = 0; l < COVERAGE_LEVELS; l++) {
        if (nullable_string_is_equal(name, coverage_level_name(l))) {
            return l;
        }
    }
    return COVERAGE_LEVELS;
}

static void
coverage_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
                     const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
{
    enum coverage_level level;
    struct svec lines;
    const char *line;
    struct ds reply;
    size_t i;

    svec_init(&lines);

    if (argc > 1) {
        level = coverage_level_from_name(argv[1]);
        if (level == COVERAGE_LEVELS) {
            unixctl_command_reply_error(conn, "Invalid level");
            svec_destroy(&lines);
            return;
        }
        coverage_read(&lines, level);
    } else {
        for (level = 0; level < COVERAGE_LEVELS; level++) {
            coverage_read(&lines, level);
        }
    }

    ds_init(&reply);
    SVEC_FOR_EACH (i, line, &lines) {
        if ((i & 1) == 0) {
            continue;
        }
        ds_put_cstr(&reply, line);
        ds_put_cstr(&reply, "\n");
    }
    unixctl_command_reply(conn, ds_cstr(&reply));
    ds_destroy(&reply);
    svec_destroy(&lines);
}

static void
coverage_unixctl_read_counter(struct unixctl_conn *conn, int argc OVS_UNUSED,
                              const char *argv[], void *aux OVS_UNUSED)
{
    unsigned long long count;
    char *reply;
    bool ok;

    ok = coverage_read_counter(argv[1], &count);
    if (!ok) {
        unixctl_command_reply_error(conn, "No such counter");
        return;
    }

    reply = xasprintf("%llu\n", count);
    unixctl_command_reply(conn, reply);
    free(reply);
}

void
coverage_init(void)
{
    unixctl_command_register("coverage/show", "[level]", 0, 1,
                             coverage_unixctl_show, NULL);
    unixctl_command_register("coverage/read-counter", "COUNTER", 1, 1,
                             coverage_unixctl_read_counter, NULL);
    coverage_metrics_init();
}

/* Sorts coverage counters in descending order by total, within equal
 * totals alphabetically by name. */
static int
compare_coverage_counters(const void *a_, const void *b_)
{
    const struct coverage_counter *const *ap = a_;
    const struct coverage_counter *const *bp = b_;
    const struct coverage_counter *a = *ap;
    const struct coverage_counter *b = *bp;
    if (a->total != b->total) {
        return a->total < b->total ? 1 : -1;
    } else {
        return strcmp(a->name, b->name);
    }
}

static uint32_t
coverage_hash(enum coverage_level level)
{
    struct coverage_counter **c;
    uint32_t hash = 0;
    int n_groups, i;

    /* Sort coverage counters into groups with equal totals. */
    c = xmalloc(n_coverage_counters[level] * sizeof *c);
    ovs_mutex_lock(&coverage_mutex[level]);
    for (i = 0; i < n_coverage_counters[level]; i++) {
        c[i] = coverage_counters[level][i];
    }
    ovs_mutex_unlock(&coverage_mutex[level]);
    qsort(c, n_coverage_counters[level], sizeof *c, compare_coverage_counters);

    /* Hash the names in each group along with the rank. */
    n_groups = 0;
    i = 0;
    while (i < n_coverage_counters[level]) {
        int j;

        if (!c[i]->total) {
            break;
        }
        n_groups++;
        hash = hash_int(i, hash);
        for (j = i; j < n_coverage_counters[level]; j++) {
            if (c[j]->total != c[i]->total) {
                break;
            }
            hash = hash_string(c[j]->name, hash);
        }
        i = j;
    }

    free(c);

    return hash_int(n_groups, hash);
}

static bool
coverage_hit(uint32_t hash)
{
    enum { HIT_BITS = 1024, BITS_PER_WORD = 32 };
    static uint32_t hit[HIT_BITS / BITS_PER_WORD];
    BUILD_ASSERT_DECL(IS_POW2(HIT_BITS));

    static long long int next_clear = LLONG_MIN;

    unsigned int bit_index = hash & (HIT_BITS - 1);
    unsigned int word_index = bit_index / BITS_PER_WORD;
    unsigned int word_mask = 1u << (bit_index % BITS_PER_WORD);

    /* Expire coverage hash suppression once a day. */
    if (time_msec() >= next_clear) {
        memset(hit, 0, sizeof hit);
        next_clear = time_msec() + 60 * 60 * 24 * 1000LL;
    }

    if (hit[word_index] & word_mask) {
        return true;
    } else {
        hit[word_index] |= word_mask;
        return false;
    }
}

static enum vlog_level
coverage_vlog_level(const char *line)
{
    if (line[0] == COVERAGE_LEVEL_INFO) {
        return VLL_INFO;
    } else if (line[0] == COVERAGE_LEVEL_WARN) {
        return VLL_WARN;
    } else if (line[0] == COVERAGE_LEVEL_ERR) {
        return VLL_ERR;
    }

    OVS_NOT_REACHED();
    return VLL_EMER;
}

/* Logs the coverage counters, unless a similar set of events has already been
 * logged.
 *
 * This function logs at log level <level>.  Use care before adjusting this
 * level, because depending on its configuration, syslogd can write changes
 * synchronously, which can cause the coverage messages to take several seconds
 * to write. */
static void
coverage_log__(enum coverage_level level)
{
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
    static struct vlog_rate_limit rl[COVERAGE_LEVELS];

    if (ovsthread_once_start(&once)) {
        for (enum coverage_level l = 0; l < COVERAGE_LEVELS; l++) {
            rl[l] = (struct vlog_rate_limit) VLOG_RATE_LIMIT_INIT(1, 3);
        }
        ovsthread_once_done(&once);
    }

    if (!VLOG_DROP_INFO(&rl[level])) {
        uint32_t hash = coverage_hash(level);
        if (coverage_hit(hash)) {
            VLOG_INFO("Skipping details of duplicate event coverage for "
                      "hash=%08"PRIx32, hash);
        } else {
            enum vlog_level vl_level = 0;
            struct svec lines;
            const char *line;
            size_t i;

            svec_init(&lines);
            coverage_read(&lines, level);
            SVEC_FOR_EACH (i, line, &lines) {
                if ((i & 1) == 0) {
                    vl_level = coverage_vlog_level(line);
                    continue;
                }
                VLOG(vl_level, "%s", line);
            }
            svec_destroy(&lines);
        }
    }
}

void
coverage_log(void)
{
    enum coverage_level level;

    for (level = 0; level < COVERAGE_LEVELS; level++) {
        coverage_log__(level);
    }
}

/* Adds coverage counter information to 'lines'. */
static void
coverage_read(struct svec *lines, enum coverage_level level)
{
    struct coverage_counter **c = coverage_counters[level];
    unsigned long long int *totals;
    size_t n_never_hit;
    uint32_t hash;
    size_t i;

    hash = coverage_hash(level);

    n_never_hit = 0;
    svec_add_nocopy(lines, xasprintf("%c", COVERAGE_LEVEL_INFO));
    svec_add_nocopy(lines,
                    xasprintf("Event coverage level %s, avg rate over last: %d "
                              "seconds, last minute, last hour,  "
                              "hash=%08"PRIx32":", coverage_level_name(level),
                              COVERAGE_RUN_INTERVAL/1000, hash));

    totals = xmalloc(n_coverage_counters[level] * sizeof *totals);
    ovs_mutex_lock(&coverage_mutex[level]);
    for (i = 0; i < n_coverage_counters[level]; i++) {
        totals[i] = c[i]->total;
    }
    ovs_mutex_unlock(&coverage_mutex[level]);

    for (i = 0; i < n_coverage_counters[level]; i++) {
        if (totals[i]) {
            /* Shows the averaged per-second rates for the last
             * COVERAGE_RUN_INTERVAL interval, the last minute and
             * the last hour. */
            svec_add_nocopy(lines, xasprintf("%c", level));
            svec_add_nocopy(lines,
                xasprintf("%-24s %5.1f/sec %9.3f/sec "
                          "%13.4f/sec   total: %llu",
                          c[i]->name,
                          (c[i]->min[(idx_count - 1) % MIN_AVG_LEN]
                           * 1000.0 / COVERAGE_RUN_INTERVAL),
                          coverage_array_sum(c[i]->min, MIN_AVG_LEN, level) / 60.0,
                          coverage_array_sum(c[i]->hr,  HR_AVG_LEN, level) / 3600.0,
                          totals[i]));
        } else {
            n_never_hit++;
        }
    }

    svec_add_nocopy(lines, xasprintf("%c", COVERAGE_LEVEL_INFO));
    svec_add_nocopy(lines, xasprintf("%"PRIuSIZE" events never hit", n_never_hit));
    free(totals);
}

/* Runs approximately every COVERAGE_CLEAR_INTERVAL amount of time to
 * synchronize per-thread counters with global counters. Every thread maintains
 * a separate timer to ensure all counters are periodically aggregated.
 *
 * Uses 'ovs_mutex_trylock()' if 'trylock' is true.  This is to prevent
 * multiple performance-critical threads contending over the 'coverage_mutex'.
 *
 * */
static void
coverage_clear__(bool trylock, enum coverage_level level)
{
    size_t i;

    if (trylock) {
        /* Returns if cannot acquire lock. */
        if (ovs_mutex_trylock(&coverage_mutex[level])) {
            return;
        }
    } else {
        ovs_mutex_lock(&coverage_mutex[level]);
    }

    for (i = 0; i < n_coverage_counters[level]; i++) {
        struct coverage_counter *c = coverage_counters[level][i];
        c->total += c->count();
    }
    ovs_mutex_unlock(&coverage_mutex[level]);
}

void
coverage_clear(void)
{
    long long int now, *thread_time;
    enum coverage_level level;

    now = time_msec();
    thread_time = coverage_clear_time_get();

    /* Initialize the coverage_clear_time. */
    if (*thread_time == LLONG_MIN) {
        *thread_time = now + COVERAGE_CLEAR_INTERVAL;
    }

    if (now >= *thread_time) {
        for (level = 0; level < COVERAGE_LEVELS; level++) {
            coverage_clear__(false, level);
        }
        *thread_time = now + COVERAGE_CLEAR_INTERVAL;
    }
}

void
coverage_try_clear(void)
{
    long long int now, *thread_time;
    enum coverage_level level;

    now = time_msec();
    thread_time = coverage_clear_time_get();

    /* Initialize the coverage_clear_time. */
    if (*thread_time == LLONG_MIN) {
        *thread_time = now + COVERAGE_CLEAR_INTERVAL;
    }

    if (now >= *thread_time) {
        for (level = 0; level < COVERAGE_LEVELS; level++) {
            coverage_clear__(true, level);
        }
        *thread_time = now + COVERAGE_CLEAR_INTERVAL;
    }
}

/* Runs approximately every COVERAGE_RUN_INTERVAL amount of time to update the
 * coverage counters' 'min' and 'hr' array.  'min' array is for cumulating
 * per second counts into per minute count.  'hr' array is for cumulating per
 * minute counts into per hour count.  Every thread may call this function. */
static void
coverage_run__(enum coverage_level level)
{
    struct coverage_counter **c = coverage_counters[level];
    long long int now;

    ovs_mutex_lock(&coverage_mutex[level]);
    now = time_msec();
    /* Initialize the coverage_run_time. */
    if (coverage_run_time == LLONG_MIN) {
        coverage_run_time = now + COVERAGE_RUN_INTERVAL;
    }

    if (now >= coverage_run_time) {
        size_t i, j;
        /* Computes the number of COVERAGE_RUN_INTERVAL slots, since
         * it is possible that the actual run interval is multiple of
         * COVERAGE_RUN_INTERVAL. */
        int slots = (now - coverage_run_time) / COVERAGE_RUN_INTERVAL + 1;

        for (i = 0; i < n_coverage_counters[level]; i++) {
            unsigned int count, portion;
            unsigned int idx = idx_count;

            /* Computes the differences between the current total and the one
             * recorded in last invocation of coverage_run(). */
            count = c[i]->total - c[i]->last_total;
            c[i]->last_total = c[i]->total;
            /* The count over the time interval is evenly distributed
             * among slots by calculating the portion. */
            portion = count / slots;

            for (j = 0; j < slots; j++) {
                /* Updates the index variables. */
                /* The m_idx is increased from 0 to MIN_AVG_LEN - 1. Every
                 * time the m_idx finishes a cycle (a cycle is one minute),
                 * the h_idx is incremented by 1. */
                unsigned int m_idx = idx % MIN_AVG_LEN;
                unsigned int h_idx = idx / MIN_AVG_LEN;

                c[i]->min[m_idx] = portion + (j == (slots - 1)
                                              ? count % slots : 0);
                c[i]->hr[h_idx] = m_idx == 0
                                  ? c[i]->min[m_idx]
                                  : (c[i]->hr[h_idx] + c[i]->min[m_idx]);
                /* This is to guarantee that h_idx ranges from 0 to 59. */
                idx = (idx + 1) % (MIN_AVG_LEN * HR_AVG_LEN);
            }
        }

        /* Updates the global index variables. */
        idx_count = (idx_count + slots) % (MIN_AVG_LEN * HR_AVG_LEN);
        /* Updates the run time. */
        coverage_run_time = now + COVERAGE_RUN_INTERVAL;
    }
    ovs_mutex_unlock(&coverage_mutex[level]);
}

void
coverage_run(void)
{
    enum coverage_level level;

    for (level = 0; level < COVERAGE_LEVELS; level++) {
        coverage_run__(level);
    }
}

static unsigned int
coverage_array_sum(const unsigned int *arr, const unsigned int len, enum coverage_level level)
{
    unsigned int sum = 0;
    size_t i;

    ovs_mutex_lock(&coverage_mutex[level]);
    for (i = 0; i < len; i++) {
        sum += arr[i];
    }
    ovs_mutex_unlock(&coverage_mutex[level]);
    return sum;
}

bool
coverage_read_counter(const char *name, unsigned long long int *count)
{
    enum coverage_level l;

    for (l = 0; l < COVERAGE_LEVELS; l++) {
        for (size_t i = 0; i < n_coverage_counters[l]; i++) {
            struct coverage_counter *c = coverage_counters[l][i];

            if (!strcmp(c->name, name)) {
                ovs_mutex_lock(&coverage_mutex[l]);
                c->total += c->count();
                *count = c->total;
                ovs_mutex_unlock(&coverage_mutex[l]);
                return true;
            }
        }
    }

    return false;
}

const char *
coverage_level_name(enum coverage_level level)
{
    switch (level) {
    case COVERAGE_LEVEL_INFO:
        return "info";
    case COVERAGE_LEVEL_WARN:
        return "warning";
    case COVERAGE_LEVEL_ERR:
        return "error";
    case COVERAGE_LEVELS:
    default:
        OVS_NOT_REACHED();
    }

    OVS_NOT_REACHED();
    return "ERR";
}
