Merging upstream version 0.7.1 (Closes: #991419).

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-09 07:39:31 +01:00
parent 05c588e9d7
commit 9e09e0ef69
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
99 changed files with 6727 additions and 943 deletions

View file

@ -11,6 +11,7 @@ OBJECTS=ck_barrier_centralized.o \
ck_barrier_dissemination.o \
ck_barrier_tournament.o \
ck_barrier_mcs.o \
ck_ec.o \
ck_epoch.o \
ck_ht.o \
ck_hp.o \
@ -24,11 +25,14 @@ libck.so: $(OBJECTS)
$(LD) $(LDFLAGS) -o $(TARGET_DIR)/libck.so $(OBJECTS)
libck.a: $(OBJECTS)
ar rcs $(TARGET_DIR)/libck.a $(OBJECTS)
$(AR) rcs $(TARGET_DIR)/libck.a $(OBJECTS)
ck_array.o: $(INCLUDE_DIR)/ck_array.h $(SDIR)/ck_array.c
$(CC) $(CFLAGS) -c -o $(TARGET_DIR)/ck_array.o $(SDIR)/ck_array.c
ck_ec.o: $(INCLUDE_DIR)/ck_ec.h $(SDIR)/ck_ec.c $(SDIR)/ck_ec_timeutil.h
$(CC) $(CFLAGS) -c -o $(TARGET_DIR)/ck_ec.o $(SDIR)/ck_ec.c
ck_epoch.o: $(INCLUDE_DIR)/ck_epoch.h $(SDIR)/ck_epoch.c $(INCLUDE_DIR)/ck_stack.h
$(CC) $(CFLAGS) -c -o $(TARGET_DIR)/ck_epoch.o $(SDIR)/ck_epoch.c

View file

@ -35,7 +35,7 @@ struct ck_barrier_combining_queue {
struct ck_barrier_combining_group *tail;
};
CK_CC_INLINE static struct ck_barrier_combining_group *
static struct ck_barrier_combining_group *
ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue)
{
struct ck_barrier_combining_group *front = NULL;
@ -48,7 +48,7 @@ ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue)
return front;
}
CK_CC_INLINE static void
static void
ck_barrier_combining_insert(struct ck_barrier_combining_group *parent,
struct ck_barrier_combining_group *tnode,
struct ck_barrier_combining_group **child)
@ -72,7 +72,7 @@ ck_barrier_combining_insert(struct ck_barrier_combining_group *parent,
* into the barrier's tree. We use a queue to implement this
* traversal.
*/
CK_CC_INLINE static void
static void
ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue,
struct ck_barrier_combining_group *node_value)
{
@ -185,10 +185,10 @@ ck_barrier_combining_aux(struct ck_barrier_combining *barrier,
ck_pr_fence_store();
ck_pr_store_uint(&tnode->sense, ~tnode->sense);
} else {
ck_pr_fence_memory();
while (sense != ck_pr_load_uint(&tnode->sense))
ck_pr_stall();
}
ck_pr_fence_memory();
return;
}

425
src/ck_ec.c Normal file
View file

@ -0,0 +1,425 @@
#include <ck_ec.h>
#include <ck_limits.h>
#include "ck_ec_timeutil.h"
#define DEFAULT_BUSY_LOOP_ITER 100U
/*
* The 2ms, 8x/iter default parameter hit 1.024 seconds after 3
* iterations.
*/
#define DEFAULT_INITIAL_WAIT_NS 2000000L /* Start at 2 ms */
/* Grow the wait time 8x/iteration. */
#define DEFAULT_WAIT_SCALE_FACTOR 8
#define DEFAULT_WAIT_SHIFT_COUNT 0
struct ck_ec32_slow_path_state {
struct ck_ec32 *ec;
uint32_t flagged_word;
};
#ifdef CK_F_EC64
struct ck_ec64_slow_path_state {
struct ck_ec64 *ec;
uint64_t flagged_word;
};
#endif
/* Once we've waited for >= 1 sec, go for the full deadline. */
static const struct timespec final_wait_time = {
.tv_sec = 1
};
void
ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops)
{
/* Spurious wake-ups are OK. Clear the flag before futexing. */
ck_pr_and_32(&ec->counter, (1U << 31) - 1);
ops->wake32(ops, &ec->counter);
return;
}
int
ck_ec32_wait_slow(struct ck_ec32 *ec,
const struct ck_ec_ops *ops,
uint32_t old_value,
const struct timespec *deadline)
{
return ck_ec32_wait_pred_slow(ec, ops, old_value,
NULL, NULL, deadline);
}
#ifdef CK_F_EC64
void
ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops)
{
ck_pr_and_64(&ec->counter, ~1);
ops->wake64(ops, &ec->counter);
return;
}
int
ck_ec64_wait_slow(struct ck_ec64 *ec,
const struct ck_ec_ops *ops,
uint64_t old_value,
const struct timespec *deadline)
{
return ck_ec64_wait_pred_slow(ec, ops, old_value,
NULL, NULL, deadline);
}
#endif
int
ck_ec_deadline_impl(struct timespec *new_deadline,
const struct ck_ec_ops *ops,
const struct timespec *timeout)
{
struct timespec now;
int r;
if (timeout == NULL) {
new_deadline->tv_sec = TIME_MAX;
new_deadline->tv_nsec = NSEC_MAX;
return 0;
}
r = ops->gettime(ops, &now);
if (r != 0) {
return -1;
}
*new_deadline = timespec_add(now, *timeout);
return 0;
}
/* The rest of the file implements wait_pred_slow. */
/*
* Returns a timespec value for deadline_ptr. If deadline_ptr is NULL,
* returns a timespec far in the future.
*/
static struct timespec
canonical_deadline(const struct timespec *deadline_ptr)
{
if (deadline_ptr == NULL) {
return (struct timespec) { .tv_sec = TIME_MAX };
}
return *deadline_ptr;
}
/*
* Really slow (sleeping) path for ck_ec_wait. Drives the exponential
* backoff scheme to sleep for longer and longer periods of time,
* until either the sleep function returns true (the eventcount's
* value has changed), or the predicate returns non-0 (something else
* has changed).
*
* If deadline is ever reached, returns -1 (timeout).
*
* TODO: add some form of randomisation to the intermediate timeout
* values.
*/
static int
exponential_backoff(struct ck_ec_wait_state *wait_state,
bool (*sleep)(const void *sleep_state,
const struct ck_ec_wait_state *wait_state,
const struct timespec *partial_deadline),
const void *sleep_state,
int (*pred)(const struct ck_ec_wait_state *state,
struct timespec *deadline),
const struct timespec *deadline)
{
struct timespec begin;
struct timespec stop_backoff;
const struct ck_ec_ops *ops = wait_state->ops;
const uint32_t scale_factor = (ops->wait_scale_factor != 0)
? ops->wait_scale_factor
: DEFAULT_WAIT_SCALE_FACTOR;
const uint32_t shift_count = (ops->wait_shift_count != 0)
? ops->wait_shift_count
: DEFAULT_WAIT_SHIFT_COUNT;
uint32_t wait_ns = (ops->initial_wait_ns != 0)
? ops->initial_wait_ns
: DEFAULT_INITIAL_WAIT_NS;
bool first = true;
for (;;) {
struct timespec now;
struct timespec partial_deadline;
if (check_deadline(&now, ops, *deadline) == true) {
/* Timeout. Bail out. */
return -1;
}
if (first) {
begin = now;
wait_state->start = begin;
stop_backoff = timespec_add(begin, final_wait_time);
first = false;
}
wait_state->now = now;
if (timespec_cmp(now, stop_backoff) >= 0) {
partial_deadline = *deadline;
} else {
do {
partial_deadline =
timespec_add_ns(begin, wait_ns);
wait_ns =
wait_time_scale(wait_ns,
scale_factor,
shift_count);
} while (timespec_cmp(partial_deadline, now) <= 0);
}
if (pred != NULL) {
int r = pred(wait_state, &partial_deadline);
if (r != 0) {
return r;
}
}
/* Canonicalize deadlines in the far future to NULL. */
if (sleep(sleep_state, wait_state,
((partial_deadline.tv_sec == TIME_MAX)
? NULL : &partial_deadline)) == true) {
return 0;
}
}
}
/*
* Loops up to BUSY_LOOP_ITER times, or until ec's counter value
* (including the flag) differs from old_value.
*
* Returns the new value in ec.
*/
#define DEF_WAIT_EASY(W) \
static uint##W##_t ck_ec##W##_wait_easy(struct ck_ec##W* ec, \
const struct ck_ec_ops *ops, \
uint##W##_t expected) \
{ \
uint##W##_t current = ck_pr_load_##W(&ec->counter); \
size_t n = (ops->busy_loop_iter != 0) \
? ops->busy_loop_iter \
: DEFAULT_BUSY_LOOP_ITER; \
\
for (size_t i = 0; \
i < n && current == expected; \
i++) { \
ck_pr_stall(); \
current = ck_pr_load_##W(&ec->counter); \
} \
\
return current; \
}
DEF_WAIT_EASY(32)
#ifdef CK_F_EC64
DEF_WAIT_EASY(64)
#endif
#undef DEF_WAIT_EASY
/*
* Attempts to upgrade ec->counter from unflagged to flagged.
*
* Returns true if the event count has changed. Otherwise, ec's
* counter word is equal to flagged on return, or has been at some
* time before the return.
*/
#define DEF_UPGRADE(W) \
static bool ck_ec##W##_upgrade(struct ck_ec##W* ec, \
uint##W##_t current, \
uint##W##_t unflagged, \
uint##W##_t flagged) \
{ \
uint##W##_t old_word; \
\
if (current == flagged) { \
/* Nothing to do, no change. */ \
return false; \
} \
\
if (current != unflagged) { \
/* We have a different counter value! */ \
return true; \
} \
\
/* \
* Flag the counter value. The CAS only fails if the \
* counter is already flagged, or has a new value. \
*/ \
return (ck_pr_cas_##W##_value(&ec->counter, \
unflagged, flagged, \
&old_word) == false && \
old_word != flagged); \
}
DEF_UPGRADE(32)
#ifdef CK_F_EC64
DEF_UPGRADE(64)
#endif
#undef DEF_UPGRADE
/*
* Blocks until partial_deadline on the ck_ec. Returns true if the
* eventcount's value has changed. If partial_deadline is NULL, wait
* forever.
*/
static bool
ck_ec32_wait_slow_once(const void *vstate,
const struct ck_ec_wait_state *wait_state,
const struct timespec *partial_deadline)
{
const struct ck_ec32_slow_path_state *state = vstate;
const struct ck_ec32 *ec = state->ec;
const uint32_t flagged_word = state->flagged_word;
wait_state->ops->wait32(wait_state, &ec->counter,
flagged_word, partial_deadline);
return ck_pr_load_32(&ec->counter) != flagged_word;
}
#ifdef CK_F_EC64
static bool
ck_ec64_wait_slow_once(const void *vstate,
const struct ck_ec_wait_state *wait_state,
const struct timespec *partial_deadline)
{
const struct ck_ec64_slow_path_state *state = vstate;
const struct ck_ec64 *ec = state->ec;
const uint64_t flagged_word = state->flagged_word;
/* futex_wait will only compare the low 32 bits. Perform a
* full comparison here to maximise the changes of catching an
* ABA in the low 32 bits.
*/
if (ck_pr_load_64(&ec->counter) != flagged_word) {
return true;
}
wait_state->ops->wait64(wait_state, &ec->counter,
flagged_word, partial_deadline);
return ck_pr_load_64(&ec->counter) != flagged_word;
}
#endif
/*
* The full wait logic is a lot of code (> 1KB). Encourage the
* compiler to lay this all out linearly with LIKELY annotations on
* every early exit.
*/
#define WAIT_SLOW_BODY(W, ec, ops, pred, data, deadline_ptr, \
old_value, unflagged, flagged) \
do { \
struct ck_ec_wait_state wait_state = { \
.ops = ops, \
.data = data \
}; \
const struct ck_ec##W##_slow_path_state state = { \
.ec = ec, \
.flagged_word = flagged \
}; \
const struct timespec deadline = \
canonical_deadline(deadline_ptr); \
\
/* Detect infinite past deadlines. */ \
if (CK_CC_LIKELY(deadline.tv_sec <= 0)) { \
return -1; \
} \
\
for (;;) { \
uint##W##_t current; \
int r; \
\
current = ck_ec##W##_wait_easy(ec, ops, unflagged); \
\
/* \
* We're about to wait harder (i.e., \
* potentially with futex). Make sure the \
* counter word is flagged. \
*/ \
if (CK_CC_LIKELY( \
ck_ec##W##_upgrade(ec, current, \
unflagged, flagged) == true)) { \
ck_pr_fence_acquire(); \
return 0; \
} \
\
/* \
* By now, ec->counter == flagged_word (at \
* some point in the past). Spin some more to \
* heuristically let any in-flight SP inc/add \
* to retire. This does not affect \
* correctness, but practically eliminates \
* lost wake-ups. \
*/ \
current = ck_ec##W##_wait_easy(ec, ops, flagged); \
if (CK_CC_LIKELY(current != flagged_word)) { \
ck_pr_fence_acquire(); \
return 0; \
} \
\
r = exponential_backoff(&wait_state, \
ck_ec##W##_wait_slow_once, \
&state, \
pred, &deadline); \
if (r != 0) { \
return r; \
} \
\
if (ck_ec##W##_value(ec) != old_value) { \
ck_pr_fence_acquire(); \
return 0; \
} \
\
/* Spurious wake-up. Redo the slow path. */ \
} \
} while (0)
int
ck_ec32_wait_pred_slow(struct ck_ec32 *ec,
const struct ck_ec_ops *ops,
uint32_t old_value,
int (*pred)(const struct ck_ec_wait_state *state,
struct timespec *deadline),
void *data,
const struct timespec *deadline_ptr)
{
const uint32_t unflagged_word = old_value;
const uint32_t flagged_word = old_value | (1UL << 31);
if (CK_CC_UNLIKELY(ck_ec32_value(ec) != old_value)) {
return 0;
}
WAIT_SLOW_BODY(32, ec, ops, pred, data, deadline_ptr,
old_value, unflagged_word, flagged_word);
}
#ifdef CK_F_EC64
int
ck_ec64_wait_pred_slow(struct ck_ec64 *ec,
const struct ck_ec_ops *ops,
uint64_t old_value,
int (*pred)(const struct ck_ec_wait_state *state,
struct timespec *deadline),
void *data,
const struct timespec *deadline_ptr)
{
const uint64_t unflagged_word = old_value << 1;
const uint64_t flagged_word = unflagged_word | 1;
if (CK_CC_UNLIKELY(ck_ec64_value(ec) != old_value)) {
return 0;
}
WAIT_SLOW_BODY(64, ec, ops, pred, data, deadline_ptr,
old_value, unflagged_word, flagged_word);
}
#endif
#undef WAIT_SLOW_BODY

150
src/ck_ec_timeutil.h Normal file
View file

@ -0,0 +1,150 @@
#ifndef CK_EC_TIMEUTIL_H
#define CK_EC_TIMEUTIL_H
#include <ck_cc.h>
#include <ck_ec.h>
#include <ck_limits.h>
#include <ck_stdint.h>
#include <sys/time.h>
#define TIME_MAX ((time_t)((1ULL << ((sizeof(time_t) * CHAR_BIT) - 1)) - 1))
#define NSEC_MAX ((1000L * 1000 * 1000) - 1)
/*
* Approximates (nsec * multiplier) >> shift. Clamps to UINT32_MAX on
* overflow.
*/
CK_CC_UNUSED static uint32_t
wait_time_scale(uint32_t nsec,
uint32_t multiplier,
unsigned int shift)
{
uint64_t temp = (uint64_t)nsec * multiplier;
uint64_t max = (uint64_t)UINT32_MAX << shift;
if (temp >= max) {
return UINT32_MAX;
}
return temp >> shift;
}
/*
* Returns ts + ns. ns is clamped to at most 1 second. Clamps the
* return value to TIME_MAX, NSEC_MAX on overflow.
*
*/
CK_CC_UNUSED static struct timespec timespec_add_ns(const struct timespec ts,
uint32_t ns)
{
struct timespec ret = {
.tv_sec = TIME_MAX,
.tv_nsec = NSEC_MAX
};
time_t sec;
uint32_t sum_ns;
if (ns > (uint32_t)NSEC_MAX) {
if (ts.tv_sec >= TIME_MAX) {
return ret;
}
ret.tv_sec = ts.tv_sec + 1;
ret.tv_nsec = ts.tv_nsec;
return ret;
}
sec = ts.tv_sec;
sum_ns = ns + ts.tv_nsec;
if (sum_ns > NSEC_MAX) {
if (sec >= TIME_MAX) {
return ret;
}
sec++;
sum_ns -= (NSEC_MAX + 1);
}
ret.tv_sec = sec;
ret.tv_nsec = sum_ns;
return ret;
}
/*
* Returns ts + inc. If inc is negative, it is normalized to 0.
* Clamps the return value to TIME_MAX, NSEC_MAX on overflow.
*/
CK_CC_UNUSED static struct timespec timespec_add(const struct timespec ts,
const struct timespec inc)
{
/* Initial return value is clamped to infinite future. */
struct timespec ret = {
.tv_sec = TIME_MAX,
.tv_nsec = NSEC_MAX
};
time_t sec;
unsigned long nsec;
/* Non-positive delta is a no-op. Invalid nsec is another no-op. */
if (inc.tv_sec < 0 || inc.tv_nsec < 0 || inc.tv_nsec > NSEC_MAX) {
return ts;
}
/* Detect overflow early. */
if (inc.tv_sec > TIME_MAX - ts.tv_sec) {
return ret;
}
sec = ts.tv_sec + inc.tv_sec;
/* This sum can't overflow if the inputs are valid.*/
nsec = (unsigned long)ts.tv_nsec + inc.tv_nsec;
if (nsec > NSEC_MAX) {
if (sec >= TIME_MAX) {
return ret;
}
sec++;
nsec -= (NSEC_MAX + 1);
}
ret.tv_sec = sec;
ret.tv_nsec = nsec;
return ret;
}
/* Compares two timespecs. Returns -1 if x < y, 0 if x == y, and 1 if x > y. */
CK_CC_UNUSED static int timespec_cmp(const struct timespec x,
const struct timespec y)
{
if (x.tv_sec != y.tv_sec) {
return (x.tv_sec < y.tv_sec) ? -1 : 1;
}
if (x.tv_nsec != y.tv_nsec) {
return (x.tv_nsec < y.tv_nsec) ? -1 : 1;
}
return 0;
}
/*
* Overwrites now with the current CLOCK_MONOTONIC time, and returns
* true if the current time is greater than or equal to the deadline,
* or the clock is somehow broken.
*/
CK_CC_UNUSED static bool check_deadline(struct timespec *now,
const struct ck_ec_ops *ops,
const struct timespec deadline)
{
int r;
r = ops->gettime(ops, now);
if (r != 0) {
return true;
}
return timespec_cmp(*now, deadline) >= 0;
}
#endif /* !CK_EC_TIMEUTIL_H */

View file

@ -127,6 +127,14 @@
*/
#define CK_EPOCH_GRACE 3U
/*
* CK_EPOCH_LENGTH must be a power-of-2 (because (CK_EPOCH_LENGTH - 1) is used
* as a mask, and it must be at least 3 (see comments above).
*/
#if (CK_EPOCH_LENGTH < 3 || (CK_EPOCH_LENGTH & (CK_EPOCH_LENGTH - 1)) != 0)
#error "CK_EPOCH_LENGTH must be a power of 2 and >= 3"
#endif
enum {
CK_EPOCH_STATE_USED = 0,
CK_EPOCH_STATE_FREE = 1
@ -139,7 +147,7 @@ CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
#define CK_EPOCH_SENSE_MASK (CK_EPOCH_SENSE - 1)
void
bool
_ck_epoch_delref(struct ck_epoch_record *record,
struct ck_epoch_section *section)
{
@ -150,7 +158,7 @@ _ck_epoch_delref(struct ck_epoch_record *record,
current->count--;
if (current->count > 0)
return;
return false;
/*
* If the current bucket no longer has any references, then
@ -161,8 +169,7 @@ _ck_epoch_delref(struct ck_epoch_record *record,
* If no other active bucket exists, then the record will go
* inactive in order to allow for forward progress.
*/
other = &record->local.bucket[(i + 1) &
CK_EPOCH_SENSE_MASK];
other = &record->local.bucket[(i + 1) & CK_EPOCH_SENSE_MASK];
if (other->count > 0 &&
((int)(current->epoch - other->epoch) < 0)) {
/*
@ -172,7 +179,7 @@ _ck_epoch_delref(struct ck_epoch_record *record,
ck_pr_store_uint(&record->epoch, other->epoch);
}
return;
return true;
}
void
@ -230,7 +237,7 @@ ck_epoch_init(struct ck_epoch *global)
}
struct ck_epoch_record *
ck_epoch_recycle(struct ck_epoch *global)
ck_epoch_recycle(struct ck_epoch *global, void *ct)
{
struct ck_epoch_record *record;
ck_stack_entry_t *cursor;
@ -249,6 +256,12 @@ ck_epoch_recycle(struct ck_epoch *global)
CK_EPOCH_STATE_USED);
if (state == CK_EPOCH_STATE_FREE) {
ck_pr_dec_uint(&global->n_free);
ck_pr_store_ptr(&record->ct, ct);
/*
* The context pointer is ordered by a
* subsequent protected section.
*/
return record;
}
}
@ -258,7 +271,8 @@ ck_epoch_recycle(struct ck_epoch *global)
}
void
ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record)
ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record,
void *ct)
{
size_t i;
@ -269,6 +283,7 @@ ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record)
record->n_dispatch = 0;
record->n_peak = 0;
record->n_pending = 0;
record->ct = ct;
memset(&record->local, 0, sizeof record->local);
for (i = 0; i < CK_EPOCH_LENGTH; i++)
@ -295,6 +310,7 @@ ck_epoch_unregister(struct ck_epoch_record *record)
for (i = 0; i < CK_EPOCH_LENGTH; i++)
ck_stack_init(&record->pending[i]);
ck_pr_store_ptr(&record->ct, NULL);
ck_pr_fence_store();
ck_pr_store_uint(&record->state, CK_EPOCH_STATE_FREE);
ck_pr_inc_uint(&global->n_free);
@ -340,31 +356,41 @@ ck_epoch_scan(struct ck_epoch *global,
return NULL;
}
static void
ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e)
static unsigned int
ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e, ck_stack_t *deferred)
{
unsigned int epoch = e & (CK_EPOCH_LENGTH - 1);
ck_stack_entry_t *head, *next, *cursor;
unsigned int n_pending, n_peak;
unsigned int i = 0;
head = CK_STACK_FIRST(&record->pending[epoch]);
ck_stack_init(&record->pending[epoch]);
head = ck_stack_batch_pop_upmc(&record->pending[epoch]);
for (cursor = head; cursor != NULL; cursor = next) {
struct ck_epoch_entry *entry =
ck_epoch_entry_container(cursor);
next = CK_STACK_NEXT(cursor);
entry->function(entry);
if (deferred != NULL)
ck_stack_push_spnc(deferred, &entry->stack_entry);
else
entry->function(entry);
i++;
}
if (record->n_pending > record->n_peak)
record->n_peak = record->n_pending;
n_peak = ck_pr_load_uint(&record->n_peak);
n_pending = ck_pr_load_uint(&record->n_pending);
record->n_dispatch += i;
record->n_pending -= i;
return;
/* We don't require accuracy around peak calculation. */
if (n_pending > n_peak)
ck_pr_store_uint(&record->n_peak, n_peak);
if (i > 0) {
ck_pr_add_uint(&record->n_dispatch, i);
ck_pr_sub_uint(&record->n_pending, i);
}
return i;
}
/*
@ -376,7 +402,18 @@ ck_epoch_reclaim(struct ck_epoch_record *record)
unsigned int epoch;
for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
ck_epoch_dispatch(record, epoch);
ck_epoch_dispatch(record, epoch, NULL);
return;
}
CK_CC_FORCE_INLINE static void
epoch_block(struct ck_epoch *global, struct ck_epoch_record *cr,
ck_epoch_wait_cb_t *cb, void *ct)
{
if (cb != NULL)
cb(global, cr, ct);
return;
}
@ -385,9 +422,9 @@ ck_epoch_reclaim(struct ck_epoch_record *record)
* This function must not be called with-in read section.
*/
void
ck_epoch_synchronize(struct ck_epoch_record *record)
ck_epoch_synchronize_wait(struct ck_epoch *global,
ck_epoch_wait_cb_t *cb, void *ct)
{
struct ck_epoch *global = record->global;
struct ck_epoch_record *cr;
unsigned int delta, epoch, goal, i;
bool active;
@ -424,10 +461,27 @@ ck_epoch_synchronize(struct ck_epoch_record *record)
* period.
*/
e_d = ck_pr_load_uint(&global->epoch);
if (e_d != delta) {
delta = e_d;
goto reload;
if (e_d == delta) {
epoch_block(global, cr, cb, ct);
continue;
}
/*
* If the epoch has been updated, we may have already
* met our goal.
*/
delta = e_d;
if ((goal > epoch) & (delta >= goal))
goto leave;
epoch_block(global, cr, cb, ct);
/*
* If the epoch has been updated, then a grace period
* requires that all threads are observed idle at the
* same epoch.
*/
cr = NULL;
}
/*
@ -459,20 +513,6 @@ ck_epoch_synchronize(struct ck_epoch_record *record)
* Otherwise, we have just acquired latest snapshot.
*/
delta = delta + r;
continue;
reload:
if ((goal > epoch) & (delta >= goal)) {
/*
* Right now, epoch overflow is handled as an edge
* case. If we have already observed an epoch
* generation, then we can be sure no hazardous
* references exist to objects from this generation. We
* can actually avoid an addtional scan step at this
* point.
*/
break;
}
}
/*
@ -480,8 +520,16 @@ reload:
* However, if non-temporal instructions are used, full barrier
* semantics are necessary.
*/
leave:
ck_pr_fence_memory();
record->epoch = delta;
return;
}
void
ck_epoch_synchronize(struct ck_epoch_record *record)
{
ck_epoch_synchronize_wait(record->global, NULL, NULL);
return;
}
@ -494,6 +542,16 @@ ck_epoch_barrier(struct ck_epoch_record *record)
return;
}
void
ck_epoch_barrier_wait(struct ck_epoch_record *record, ck_epoch_wait_cb_t *cb,
void *ct)
{
ck_epoch_synchronize_wait(record->global, cb, ct);
ck_epoch_reclaim(record);
return;
}
/*
* It may be worth it to actually apply these deferral semantics to an epoch
* that was observed at ck_epoch_call time. The problem is that the latter
@ -505,41 +563,61 @@ ck_epoch_barrier(struct ck_epoch_record *record)
* is far from ideal too.
*/
bool
ck_epoch_poll(struct ck_epoch_record *record)
ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred)
{
bool active;
unsigned int epoch;
unsigned int snapshot;
struct ck_epoch_record *cr = NULL;
struct ck_epoch *global = record->global;
unsigned int n_dispatch;
epoch = ck_pr_load_uint(&global->epoch);
/* Serialize epoch snapshots with respect to global epoch. */
ck_pr_fence_memory();
/*
* At this point, epoch is the current global epoch value.
* There may or may not be active threads which observed epoch - 1.
* (ck_epoch_scan() will tell us that). However, there should be
* no active threads which observed epoch - 2.
*
* Note that checking epoch - 2 is necessary, as race conditions can
* allow another thread to increment the global epoch before this
* thread runs.
*/
n_dispatch = ck_epoch_dispatch(record, epoch - 2, deferred);
cr = ck_epoch_scan(global, cr, epoch, &active);
if (cr != NULL) {
record->epoch = epoch;
return false;
}
if (cr != NULL)
return (n_dispatch > 0);
/* We are at a grace period if all threads are inactive. */
if (active == false) {
record->epoch = epoch;
for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
ck_epoch_dispatch(record, epoch);
ck_epoch_dispatch(record, epoch, deferred);
return true;
}
/* If an active thread exists, rely on epoch observation. */
if (ck_pr_cas_uint_value(&global->epoch, epoch, epoch + 1,
&snapshot) == false) {
record->epoch = snapshot;
} else {
record->epoch = epoch + 1;
}
/*
* If an active thread exists, rely on epoch observation.
*
* All the active threads entered the epoch section during
* the current epoch. Therefore, we can now run the handlers
* for the immediately preceding epoch and attempt to
* advance the epoch if it hasn't been already.
*/
(void)ck_pr_cas_uint(&global->epoch, epoch, epoch + 1);
ck_epoch_dispatch(record, epoch + 1);
ck_epoch_dispatch(record, epoch - 1, deferred);
return true;
}
bool
ck_epoch_poll(struct ck_epoch_record *record)
{
return ck_epoch_poll_deferred(record, NULL);
}

View file

@ -105,19 +105,10 @@ ck_hs_map_signal(struct ck_hs_map *map, unsigned long h)
return;
}
void
ck_hs_iterator_init(struct ck_hs_iterator *iterator)
static bool
_ck_hs_next(struct ck_hs *hs, struct ck_hs_map *map,
struct ck_hs_iterator *i, void **key)
{
iterator->cursor = NULL;
iterator->offset = 0;
return;
}
bool
ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
{
struct ck_hs_map *map = hs->map;
void *value;
if (i->offset >= map->capacity)
@ -129,6 +120,8 @@ ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
#ifdef CK_HS_PP
if (hs->mode & CK_HS_MODE_OBJECT)
value = CK_HS_VMA(value);
#else
(void)hs; /* Avoid unused parameter warning. */
#endif
i->offset++;
*key = value;
@ -139,6 +132,35 @@ ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
return false;
}
void
ck_hs_iterator_init(struct ck_hs_iterator *iterator)
{
iterator->cursor = NULL;
iterator->offset = 0;
iterator->map = NULL;
return;
}
bool
ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
{
return _ck_hs_next(hs, hs->map, i, key);
}
bool
ck_hs_next_spmc(struct ck_hs *hs, struct ck_hs_iterator *i, void **key)
{
struct ck_hs_map *m = i->map;
if (m == NULL) {
m = i->map = ck_pr_load_ptr(&hs->map);
}
return _ck_hs_next(hs, m, i, key);
}
void
ck_hs_stat(struct ck_hs *hs, struct ck_hs_stat *st)
{
@ -206,7 +228,7 @@ ck_hs_map_create(struct ck_hs *hs, unsigned long entries)
map->probe_limit = (unsigned int)limit;
map->probe_maximum = 0;
map->capacity = n_entries;
map->step = ck_internal_bsf(n_entries);
map->step = ck_cc_ffsl(n_entries);
map->mask = n_entries - 1;
map->n_entries = 0;

View file

@ -30,9 +30,6 @@
/*
* This implementation borrows several techniques from Josh Dybnis's
* nbds library which can be found at http://code.google.com/p/nbds
*
* This release currently only includes support for 64-bit platforms.
* We can address 32-bit platforms in a future release.
*/
#include <ck_cc.h>
#include <ck_md.h>
@ -171,7 +168,7 @@ ck_ht_map_create(struct ck_ht *table, CK_HT_TYPE entries)
map->deletions = 0;
map->probe_maximum = 0;
map->capacity = n_entries;
map->step = ck_internal_bsf_64(map->capacity);
map->step = ck_cc_ffsll(map->capacity);
map->mask = map->capacity - 1;
map->n_entries = 0;
map->entries = (struct ck_ht_entry *)(((uintptr_t)&map[1] + prefix +

View file

@ -88,7 +88,15 @@ static inline uint64_t rotl64 ( uint64_t x, int8_t r )
FORCE_INLINE static uint32_t getblock ( const uint32_t * p, int i )
{
#ifdef __s390x__
uint32_t res;
__asm__ (" lrv %0,%1\n"
: "=r" (res) : "Q" (p[i]) : "cc", "mem");
return res;
#else
return p[i];
#endif /* !__s390x__ */
}
//-----------------------------------------------------------------------------
@ -147,7 +155,9 @@ static inline void MurmurHash3_x86_32 ( const void * key, int len,
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
/* fall through */
case 2: k1 ^= tail[1] << 8;
/* fall through */
case 1: k1 ^= tail[0];
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
@ -196,11 +206,17 @@ static inline uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed
switch(len & 7)
{
case 7: h ^= (uint64_t)(data2[6]) << 48;
/* fall through */
case 6: h ^= (uint64_t)(data2[5]) << 40;
/* fall through */
case 5: h ^= (uint64_t)(data2[4]) << 32;
/* fall through */
case 4: h ^= (uint64_t)(data2[3]) << 24;
/* fall through */
case 3: h ^= (uint64_t)(data2[2]) << 16;
/* fall through */
case 2: h ^= (uint64_t)(data2[1]) << 8;
/* fall through */
case 1: h ^= (uint64_t)(data2[0]);
h *= m;
};
@ -249,7 +265,9 @@ static inline uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed
switch(len)
{
case 3: h2 ^= ((const unsigned char*)data)[2] << 16;
/* fall through */
case 2: h2 ^= ((const unsigned char*)data)[1] << 8;
/* fall through */
case 1: h2 ^= ((const unsigned char*)data)[0];
h2 *= m;
};

View file

@ -80,40 +80,3 @@ ck_internal_max_32(uint32_t x, uint32_t y)
return x ^ ((x ^ y) & -(x < y));
}
CK_CC_INLINE static unsigned long
ck_internal_bsf(unsigned long v)
{
#if defined(__GNUC__)
return __builtin_ffs(v);
#else
unsigned int i;
const unsigned int s = sizeof(unsigned long) * 8 - 1;
for (i = 0; i < s; i++) {
if (v & (1UL << (s - i)))
return sizeof(unsigned long) * 8 - i;
}
return 1;
#endif /* !__GNUC__ */
}
CK_CC_INLINE static uint64_t
ck_internal_bsf_64(uint64_t v)
{
#if defined(__GNUC__)
return __builtin_ffs(v);
#else
unsigned int i;
const unsigned int s = sizeof(unsigned long) * 8 - 1;
for (i = 0; i < s; i++) {
if (v & (1ULL << (63U - i)))
return i;
}
#endif /* !__GNUC__ */
return 1;
}

View file

@ -366,7 +366,7 @@ ck_rhs_map_create(struct ck_rhs *hs, unsigned long entries)
map->probe_limit = (unsigned int)limit;
map->probe_maximum = 0;
map->capacity = n_entries;
map->step = ck_internal_bsf(n_entries);
map->step = ck_cc_ffsl(n_entries);
map->mask = n_entries - 1;
map->n_entries = 0;