Merging upstream version 2.11.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-09 09:06:16 +01:00 · 2025-02-09 09:06:16 +01:00 · 510d3566ee
commit 510d3566ee
parent 934790e30b
50 changed files with 895 additions and 132 deletions
--- a/src/dnsperf.c
+++ b/src/dnsperf.c
@ -1,5 +1,5 @@
 /*
- * Copyright 2019-2022 OARC, Inc.
+ * Copyright 2019-2023 OARC, Inc.
 * Copyright 2017-2018 Akamai Technologies
 * Copyright 2006-2016 Nominum, Inc.
 * All rights reserved.
@ -32,6 +32,10 @@
 #include "util.h"
 #include "list.h"
 #include "buffer.h"
+#if HAVE_STDATOMIC_H
+#include "ext/hg64.h"
+#define USE_HISTOGRAMS
+#endif

 #include <inttypes.h>
 #include <errno.h>
@ -49,6 +53,8 @@
 #include <openssl/conf.h>
 #include <openssl/err.h>

+#define HISTOGRAM_SIGBITS 5 /* about 3 % latency precision */
+
 #define DEFAULT_SERVER_NAME "127.0.0.1"
 #define DEFAULT_SERVER_PORT 53
 #define DEFAULT_SERVER_DOT_PORT 853
@ -85,6 +91,7 @@ typedef struct {
    uint32_t            max_outstanding;
    uint32_t            max_qps;
    uint64_t            stats_interval;
+    bool                verbose_interval_stats;
    bool                updates;
    bool                binary_input;
    perf_input_format_t input_format;
@ -92,6 +99,9 @@ typedef struct {
    enum perf_net_mode  mode;
    perf_suppress_t     suppress;
    size_t              num_queries_per_conn;
+#ifdef USE_HISTOGRAMS
+    bool latency_histogram;
+#endif
 } config_t;

 typedef struct {
@ -101,8 +111,9 @@ typedef struct {
    struct timespec stop_time_ns;
 } times_t;

+#define DNSPERF_STATS_RCODECOUNTS 16
 typedef struct {
-    uint64_t rcodecounts[16];
+    uint64_t rcodecounts[DNSPERF_STATS_RCODECOUNTS];

    uint64_t num_sent;
    uint64_t num_interrupted;
@ -124,6 +135,11 @@ typedef struct {
    uint64_t conn_latency_sum_squares;
    uint64_t conn_latency_min;
    uint64_t conn_latency_max;
+
+#ifdef USE_HISTOGRAMS
+    hg64* latency;
+    hg64* conn_latency;
+#endif
 } stats_t;

 typedef perf_list(struct query_info) query_list;
@ -257,7 +273,86 @@ stddev(uint64_t sum_of_squares, uint64_t sum, uint64_t total)
 }

 static void
-print_statistics(const config_t* config, const times_t* times, stats_t* stats)
+diff_stats(const config_t* config, stats_t* last, stats_t* now, stats_t* diff)
+{
+    int i = 0;
+    for (; i < DNSPERF_STATS_RCODECOUNTS; i++) {
+        diff->rcodecounts[i] = now->rcodecounts[i] - last->rcodecounts[i];
+    }
+
+    diff->num_sent        = now->num_sent - last->num_sent;
+    diff->num_interrupted = now->num_interrupted - last->num_interrupted;
+    diff->num_timedout    = now->num_timedout - last->num_timedout;
+    diff->num_completed   = now->num_completed - last->num_completed;
+
+    diff->total_request_size  = now->total_request_size - last->total_request_size;
+    diff->total_response_size = now->total_response_size - last->total_response_size;
+
+    diff->latency_sum         = now->latency_sum - last->latency_sum;
+    diff->latency_sum_squares = now->latency_sum_squares - last->latency_sum_squares;
+    diff->latency_min         = 0; /* not enough data */
+    diff->latency_max         = 0;
+
+    diff->num_conn_reconnect = now->num_conn_reconnect - last->num_conn_reconnect;
+    diff->num_conn_completed = now->num_conn_completed - last->num_conn_completed;
+
+    diff->conn_latency_sum         = now->conn_latency_sum - last->conn_latency_sum;
+    diff->conn_latency_sum_squares = now->conn_latency_sum_squares - last->conn_latency_sum_squares;
+    diff->conn_latency_min         = 0;
+    diff->conn_latency_max         = 0;
+
+#ifdef USE_HISTOGRAMS
+    if (config->latency_histogram) {
+        free(diff->latency);
+        diff->latency = hg64_create(HISTOGRAM_SIGBITS);
+        if (last->latency) {
+            hg64_diff(now->latency, last->latency, diff->latency);
+        } else { /* first sample */
+            hg64_merge(diff->latency, now->latency);
+        }
+        hg64_get(diff->latency, hg64_min_key(diff->latency), &diff->latency_min, NULL, NULL);
+        hg64_get(diff->latency, hg64_max_key(diff->latency), NULL, &diff->latency_max, NULL);
+
+        free(diff->conn_latency);
+        diff->conn_latency = hg64_create(HISTOGRAM_SIGBITS);
+        if (last->conn_latency) {
+            hg64_diff(now->conn_latency, last->conn_latency, diff->conn_latency);
+        } else { /* first sample */
+            hg64_merge(diff->conn_latency, now->conn_latency);
+        }
+        hg64_get(diff->conn_latency, hg64_min_key(diff->conn_latency), &diff->conn_latency_min, NULL, NULL);
+        hg64_get(diff->conn_latency, hg64_max_key(diff->conn_latency), NULL, &diff->conn_latency_max, NULL);
+    }
+#endif
+}
+
+#ifdef USE_HISTOGRAMS
+static void
+print_histogram(hg64* histogram, const char* const desc)
+{
+    printf("  Latency bucket (s):   %s\n", desc);
+    uint64_t pmin, pmax, pcount;
+    for (unsigned key = 0;
+         hg64_get(histogram, key, &pmin, &pmax, &pcount) == true;
+         key = hg64_next(histogram, key)) {
+        if (pcount == 0)
+            continue;
+        printf("  %" PRIu64 ".%06" PRIu64 " - %" PRIu64 ".%06" PRIu64 ":  %" PRIu64 "\n",
+            pmin / MILLION,
+            pmin % MILLION,
+            pmax / MILLION,
+            pmax % MILLION,
+            pcount);
+    };
+}
+#endif
+
+/*
+ * now != 0 is call to print stats in the middle of test run.
+ * min-max values are not available on per-interval basis, so skip them.
+ */
+static void
+print_statistics(const config_t* config, const times_t* times, stats_t* stats, uint64_t now, uint64_t interval_time)
 {
    const char*  units;
    uint64_t     run_time;
@ -267,9 +362,12 @@ print_statistics(const config_t* config, const times_t* times, stats_t* stats)

    units = config->updates ? "Updates" : "Queries";

-    run_time = times->end_time - times->start_time;
+    if (now)
+        run_time = now - times->start_time;
+    else
+        run_time = times->end_time - times->start_time;

-    printf("Statistics:\n\n");
+    printf("%sStatistics:\n\n", now ? "Interval " : "");

    printf("  %s sent:         %" PRIu64 "\n",
        units, stats->num_sent);
@ -287,7 +385,7 @@ print_statistics(const config_t* config, const times_t* times, stats_t* stats)

    printf("  Response codes:       ");
    first_rcode = true;
-    for (i = 0; i < 16; i++) {
+    for (i = 0; i < DNSPERF_STATS_RCODECOUNTS; i++) {
        if (stats->rcodecounts[i] == 0)
            continue;
        if (first_rcode)
@ -308,23 +406,33 @@ print_statistics(const config_t* config, const times_t* times, stats_t* stats)
        (unsigned int)(run_time / MILLION),
        (unsigned int)(run_time % MILLION));
    printf("  %s per second:   %.6lf\n", units,
-        PERF_SAFE_DIV(stats->num_completed, (((double)run_time) / MILLION)));
+        PERF_SAFE_DIV(stats->num_completed, (((double)(now ? interval_time : run_time) / MILLION))));

    printf("\n");

    latency_avg = PERF_SAFE_DIV(stats->latency_sum, stats->num_completed);
-    printf("  Average Latency (s):  %u.%06u (min %u.%06u, max %u.%06u)\n",
+    printf("  Average Latency (s):  %u.%06u",
        (unsigned int)(latency_avg / MILLION),
-        (unsigned int)(latency_avg % MILLION),
-        (unsigned int)(stats->latency_min / MILLION),
-        (unsigned int)(stats->latency_min % MILLION),
-        (unsigned int)(stats->latency_max / MILLION),
-        (unsigned int)(stats->latency_max % MILLION));
+        (unsigned int)(latency_avg % MILLION));
+    if (!now) {
+        printf(" (min %u.%06u, max %u.%06u)\n",
+            (unsigned int)(stats->latency_min / MILLION),
+            (unsigned int)(stats->latency_min % MILLION),
+            (unsigned int)(stats->latency_max / MILLION),
+            (unsigned int)(stats->latency_max % MILLION));
+    } else {
+        printf("\n");
+    }
+
    if (stats->num_completed > 1) {
        printf("  Latency StdDev (s):   %f\n",
            stddev(stats->latency_sum_squares, stats->latency_sum,
                stats->num_completed)
                / MILLION);
+#ifdef USE_HISTOGRAMS
+        if (config->latency_histogram)
+            print_histogram(stats->latency, "answer count");
+#endif
    }

    printf("\n");
@ -336,32 +444,56 @@ print_statistics(const config_t* config, const times_t* times, stats_t* stats)
    printf("Connection Statistics:\n\n");
    printf("  Reconnections:        %" PRIu64 "\n\n", stats->num_conn_reconnect);
    latency_avg = PERF_SAFE_DIV(stats->conn_latency_sum, stats->num_conn_completed);
-    printf("  Average Latency (s):  %u.%06u (min %u.%06u, max %u.%06u)\n",
+    printf("  Average Latency (s):  %u.%06u",
        (unsigned int)(latency_avg / MILLION),
-        (unsigned int)(latency_avg % MILLION),
-        (unsigned int)(stats->conn_latency_min / MILLION),
-        (unsigned int)(stats->conn_latency_min % MILLION),
-        (unsigned int)(stats->conn_latency_max / MILLION),
-        (unsigned int)(stats->conn_latency_max % MILLION));
+        (unsigned int)(latency_avg % MILLION));
+    if (!now) {
+        printf(" (min %u.%06u, max %u.%06u)\n",
+            (unsigned int)(stats->conn_latency_min / MILLION),
+            (unsigned int)(stats->conn_latency_min % MILLION),
+            (unsigned int)(stats->conn_latency_max / MILLION),
+            (unsigned int)(stats->conn_latency_max % MILLION));
+    } else {
+        printf("\n");
+    }
    if (stats->num_conn_completed > 1) {
        printf("  Latency StdDev (s):   %f\n",
            stddev(stats->conn_latency_sum_squares, stats->conn_latency_sum, stats->num_conn_completed) / MILLION);
+#ifdef USE_HISTOGRAMS
+        if (config->latency_histogram)
+            print_histogram(stats->latency, "connection count");
+#endif
    }

    printf("\n");
 }

+/*
+ * Caller must free() stats->latency and stats->conn_latency.
+ */
 static void
 sum_stats(const config_t* config, stats_t* total)
 {
    unsigned int i, j;

    memset(total, 0, sizeof(*total));
+#ifdef USE_HISTOGRAMS
+    if (config->latency_histogram) {
+        total->latency      = hg64_create(HISTOGRAM_SIGBITS);
+        total->conn_latency = hg64_create(HISTOGRAM_SIGBITS);
+    }
+#endif

    for (i = 0; i < config->threads; i++) {
        stats_t* stats = &threads[i].stats;
+#ifdef USE_HISTOGRAMS
+        if (config->latency_histogram) {
+            hg64_merge(total->latency, stats->latency);
+            hg64_merge(total->conn_latency, stats->conn_latency);
+        }
+#endif

-        for (j = 0; j < 16; j++)
+        for (j = 0; j < DNSPERF_STATS_RCODECOUNTS; j++)
            total->rcodecounts[j] += stats->rcodecounts[j];

        total->num_sent += stats->num_sent;
@ -499,6 +631,12 @@ setup(int argc, char** argv, config_t* config)
        "suppress messages/warnings, see man-page for list of message types", NULL, &local_suppress);
    perf_long_opt_add("num-queries-per-conn", perf_opt_uint, "queries",
        "Number of queries to send per connection", NULL, &config->num_queries_per_conn);
+    perf_long_opt_add("verbose-interval-stats", perf_opt_boolean, NULL,
+        "print detailed statistics for each stats_interval", NULL, &config->verbose_interval_stats);
+#ifdef USE_HISTOGRAMS
+    perf_long_opt_add("latency-histogram", perf_opt_boolean, NULL,
+        "collect and print detailed latency histograms", NULL, &config->latency_histogram);
+#endif

    bool log_stdout = false;
    perf_opt_add('W', perf_opt_boolean, NULL, "log warnings and errors to stdout instead of stderr", NULL, &log_stdout);
@ -574,14 +712,18 @@ setup(int argc, char** argv, config_t* config)
     * If we run more threads than max-qps, some threads will have
     * ->max_qps set to 0, and be unlimited.
     */
-    if (config->max_qps > 0 && config->threads > config->max_qps)
+    if (config->max_qps > 0 && config->threads > config->max_qps) {
+        perf_log_warning("requested max QPS limit (-Q %u) is lower than number of threads (-T %u), lowering number of threads", config->max_qps, config->threads);
        config->threads = config->max_qps;
+    }

    /*
     * We also can't run more threads than clients.
     */
-    if (config->threads > config->clients)
+    if (config->threads > config->clients) {
+        perf_log_warning("requested number of threads (-T %u) exceeds number of clients (-c %u), lowering number of threads\n", config->threads, config->clients);
        config->threads = config->clients;
+    }

 #ifndef HAVE_LDNS
    if (config->updates) {
@ -636,6 +778,10 @@ query_move(threadinfo_t* tinfo, query_info* q, query_move_op op)
 static inline uint64_t
 num_outstanding(const stats_t* stats)
 {
+    /* make sure negative values aren't returned */
+    if (stats->num_completed + stats->num_timedout > stats->num_sent) {
+        return 0;
+    }
    return stats->num_sent - stats->num_completed - stats->num_timedout;
 }

@ -920,7 +1066,10 @@ recv_one(threadinfo_t* tinfo, int which_sock,
    }
    if (!n) {
        // Treat connection closed like try again until reconnection features are in
-        *saved_errnop = EAGAIN;
+        if (!*saved_errnop) {
+            // only set this if there was no error before to allow above error check to overwrite EAGAIN
+            *saved_errnop = EAGAIN;
+        }
        return false;
    }
    recvd->sock           = tinfo->socks[which_sock];
@ -1006,8 +1155,6 @@ do_recv(void* arg)
                    break;
                }
                bit_set(socketbits, current_socket);
-                if (saved_errno != EAGAIN)
-                    break;
            }
            if (j == tinfo->nsocks)
                break;
@ -1039,11 +1186,13 @@ do_recv(void* arg)
                perf_log_warning("received short response");
                continue;
            }
-            if (recvd[i].unexpected && !tinfo->config->suppress.unexpected) {
-                perf_log_warning("received a response with an "
-                                 "unexpected (maybe timed out) "
-                                 "id: %u",
-                    recvd[i].qid);
+            if (recvd[i].unexpected) {
+                if (!tinfo->config->suppress.unexpected) {
+                    perf_log_warning("received a response with an "
+                                     "unexpected (maybe timed out) "
+                                     "id: %u",
+                        recvd[i].qid);
+                }
                continue;
            }
            latency = recvd[i].when - recvd[i].sent;
@ -1061,6 +1210,11 @@ do_recv(void* arg)
            stats->total_response_size += recvd[i].size;
            stats->rcodecounts[recvd[i].rcode]++;
            stats->latency_sum += latency;
+#ifdef USE_HISTOGRAMS
+            if (stats->latency) {
+                hg64_inc(stats->latency, latency);
+            }
+#endif
            stats->latency_sum_squares += (latency * latency);
            if (latency < stats->latency_min || stats->num_completed == 1)
                stats->latency_min = latency;
@ -1097,18 +1251,17 @@ static void*
 do_interval_stats(void* arg)
 {
    threadinfo_t*          tinfo;
-    stats_t                total;
+    stats_t                total = {};
+    stats_t                last  = {};
+    stats_t                diff  = {};
    uint64_t               now;
    uint64_t               last_interval_time;
-    uint64_t               last_completed;
    uint64_t               interval_time;
-    uint64_t               num_completed;
    double                 qps;
    struct perf_net_socket sock = { .mode = sock_pipe, .fd = threadpipe[0] };

    tinfo              = arg;
    last_interval_time = tinfo->times->start_time;
-    last_completed     = 0;

    wait_for_start();
    while (perf_os_waituntilreadable(&sock, threadpipe[0],
@ -1117,13 +1270,23 @@ do_interval_stats(void* arg)
        now = perf_get_time();
        sum_stats(tinfo->config, &total);
        interval_time = now - last_interval_time;
-        num_completed = total.num_completed - last_completed;
-        qps           = num_completed / (((double)interval_time) / MILLION);
-        perf_log_printf("%u.%06u: %.6lf",
-            (unsigned int)(now / MILLION),
-            (unsigned int)(now % MILLION), qps);
+
+        if (tinfo->config->verbose_interval_stats) {
+            diff_stats(tinfo->config, &last, &total, &diff);
+            print_statistics(tinfo->config, tinfo->times, &diff, now, interval_time);
+        } else {
+            qps = (total.num_completed - last.num_completed) / (((double)interval_time) / MILLION);
+            perf_log_printf("%u.%06u: %.6lf",
+                (unsigned int)(now / MILLION),
+                (unsigned int)(now % MILLION), qps);
+        }
+
        last_interval_time = now;
-        last_completed     = total.num_completed;
+#ifdef USE_HISTOGRAMS
+        free(last.latency);
+        free(last.conn_latency);
+#endif
+        last = total;
    }

    return NULL;
@ -1189,6 +1352,11 @@ static void perf__net_event(struct perf_net_socket* sock, perf_socket_event_t ev
    case perf_socket_event_connected:
        stats->num_conn_completed++;

+#ifdef USE_HISTOGRAMS
+        if (stats->conn_latency) {
+            hg64_inc(stats->conn_latency, elapsed_time);
+        }
+#endif
        stats->conn_latency_sum += elapsed_time;
        stats->conn_latency_sum_squares += (elapsed_time * elapsed_time);
        if (elapsed_time < stats->conn_latency_min || stats->num_conn_completed == 1)
@ -1218,6 +1386,12 @@ threadinfo_init(threadinfo_t* tinfo, const config_t* config,

    perf_list_init(tinfo->outstanding_queries);
    perf_list_init(tinfo->unused_queries);
+#ifdef USE_HISTOGRAMS
+    if (config->latency_histogram) {
+        tinfo->stats.latency      = hg64_create(HISTOGRAM_SIGBITS);
+        tinfo->stats.conn_latency = hg64_create(HISTOGRAM_SIGBITS);
+    }
+#endif
    for (i = 0; i < NQIDS; i++) {
        perf_link_init(&tinfo->queries[i]);
        perf_list_append(tinfo->unused_queries, &tinfo->queries[i]);
@ -1240,11 +1414,15 @@ threadinfo_init(threadinfo_t* tinfo, const config_t* config,
    /*
     * We can't have more than 64k outstanding queries per thread.
     */
-    if (tinfo->max_outstanding > NQIDS)
+    if (tinfo->max_outstanding > NQIDS) {
+        perf_log_warning("requested number of outstanding queries (-q %u) per single thread (-T) exceeds built-in maximum %u, adjusting\n", tinfo->max_outstanding, NQIDS);
        tinfo->max_outstanding = NQIDS;
+    }

-    if (tinfo->nsocks > MAX_SOCKETS)
+    if (tinfo->nsocks > MAX_SOCKETS) {
+        perf_log_warning("requested number of clients (-c %u) per thread (-T) exceeds built-in maximum %u, adjusting\n", tinfo->nsocks, MAX_SOCKETS);
        tinfo->nsocks = MAX_SOCKETS;
+    }

    if (!(tinfo->socks = calloc(tinfo->nsocks, sizeof(*tinfo->socks)))) {
        perf_log_fatal("out of memory");
@ -1383,7 +1561,7 @@ int main(int argc, char** argv)
    print_final_status(&config);

    sum_stats(&config, &total_stats);
-    print_statistics(&config, &times, &total_stats);
+    print_statistics(&config, &times, &total_stats, 0, 0);
    perf_net_stats_print(config.mode);

    cleanup(&config);