Adding upstream version 0.6.0.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c49a9029dc
commit
7f70a05c55
465 changed files with 60158 additions and 0 deletions
545
src/ck_epoch.c
Normal file
545
src/ck_epoch.c
Normal file
|
@ -0,0 +1,545 @@
|
|||
/*
|
||||
* Copyright 2011-2015 Samy Al Bahra.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The implementation here is inspired from the work described in:
|
||||
* Fraser, K. 2004. Practical Lock-Freedom. PhD Thesis, University
|
||||
* of Cambridge Computing Laboratory.
|
||||
*/
|
||||
|
||||
#include <ck_backoff.h>
|
||||
#include <ck_cc.h>
|
||||
#include <ck_epoch.h>
|
||||
#include <ck_pr.h>
|
||||
#include <ck_stack.h>
|
||||
#include <ck_stdbool.h>
|
||||
#include <ck_string.h>
|
||||
|
||||
/*
|
||||
* Only three distinct values are used for reclamation, but reclamation occurs
|
||||
* at e+2 rather than e+1. Any thread in a "critical section" would have
|
||||
* acquired some snapshot (e) of the global epoch value (e_g) and set an active
|
||||
* flag. Any hazardous references will only occur after a full memory barrier.
|
||||
* For example, assume an initial e_g value of 1, e value of 0 and active value
|
||||
* of 0.
|
||||
*
|
||||
* ck_epoch_begin(...)
|
||||
* e = e_g
|
||||
* active = 1
|
||||
* memory_barrier();
|
||||
*
|
||||
* Any serialized reads may observe e = 0 or e = 1 with active = 0, or e = 0 or
|
||||
* e = 1 with active = 1. The e_g value can only go from 1 to 2 if every thread
|
||||
* has already observed the value of "1" (or the value we are incrementing
|
||||
* from). This guarantees us that for any given value e_g, any threads with-in
|
||||
* critical sections (referred to as "active" threads from here on) would have
|
||||
* an e value of e_g-1 or e_g. This also means that hazardous references may be
|
||||
* shared in both e_g-1 and e_g even if they are logically deleted in e_g.
|
||||
*
|
||||
* For example, assume all threads have an e value of e_g. Another thread may
|
||||
* increment to e_g to e_g+1. Older threads may have a reference to an object
|
||||
* which is only deleted in e_g+1. It could be that reader threads are
|
||||
* executing some hash table look-ups, while some other writer thread (which
|
||||
* causes epoch counter tick) actually deletes the same items that reader
|
||||
* threads are looking up (this writer thread having an e value of e_g+1).
|
||||
* This is possible if the writer thread re-observes the epoch after the
|
||||
* counter tick.
|
||||
*
|
||||
* Psuedo-code for writer:
|
||||
* ck_epoch_begin()
|
||||
* ht_delete(x)
|
||||
* ck_epoch_end()
|
||||
* ck_epoch_begin()
|
||||
* ht_delete(x)
|
||||
* ck_epoch_end()
|
||||
*
|
||||
* Psuedo-code for reader:
|
||||
* for (;;) {
|
||||
* x = ht_lookup(x)
|
||||
* ck_pr_inc(&x->value);
|
||||
* }
|
||||
*
|
||||
* Of course, it is also possible for references logically deleted at e_g-1 to
|
||||
* still be accessed at e_g as threads are "active" at the same time
|
||||
* (real-world time) mutating shared objects.
|
||||
*
|
||||
* Now, if the epoch counter is ticked to e_g+1, then no new hazardous
|
||||
* references could exist to objects logically deleted at e_g-1. The reason for
|
||||
* this is that at e_g+1, all epoch read-side critical sections started at
|
||||
* e_g-1 must have been completed. If any epoch read-side critical sections at
|
||||
* e_g-1 were still active, then we would never increment to e_g+1 (active != 0
|
||||
* ^ e != e_g). Additionally, e_g may still have hazardous references to
|
||||
* objects logically deleted at e_g-1 which means objects logically deleted at
|
||||
* e_g-1 cannot be deleted at e_g+1 unless all threads have observed e_g+1
|
||||
* (since it is valid for active threads to be at e_g and threads at e_g still
|
||||
* require safe memory accesses).
|
||||
*
|
||||
* However, at e_g+2, all active threads must be either at e_g+1 or e_g+2.
|
||||
* Though e_g+2 may share hazardous references with e_g+1, and e_g+1 shares
|
||||
* hazardous references to e_g, no active threads are at e_g or e_g-1. This
|
||||
* means no hazardous references could exist to objects deleted at e_g-1 (at
|
||||
* e_g+2).
|
||||
*
|
||||
* To summarize these important points,
|
||||
* 1) Active threads will always have a value of e_g or e_g-1.
|
||||
* 2) Items that are logically deleted e_g or e_g-1 cannot be physically
|
||||
* deleted.
|
||||
* 3) Objects logically deleted at e_g-1 can be physically destroyed at e_g+2
|
||||
* or at e_g+1 if no threads are at e_g.
|
||||
*
|
||||
* Last but not least, if we are at e_g+2, then no active thread is at e_g
|
||||
* which means it is safe to apply modulo-3 arithmetic to e_g value in order to
|
||||
* re-use e_g to represent the e_g+3 state. This means it is sufficient to
|
||||
* represent e_g using only the values 0, 1 or 2. Every time a thread re-visits
|
||||
* a e_g (which can be determined with a non-empty deferral list) it can assume
|
||||
* objects in the e_g deferral list involved at least three e_g transitions and
|
||||
* are thus, safe, for physical deletion.
|
||||
*
|
||||
* Blocking semantics for epoch reclamation have additional restrictions.
|
||||
* Though we only require three deferral lists, reasonable blocking semantics
|
||||
* must be able to more gracefully handle bursty write work-loads which could
|
||||
* easily cause e_g wrap-around if modulo-3 arithmetic is used. This allows for
|
||||
* easy-to-trigger live-lock situations. The work-around to this is to not
|
||||
* apply modulo arithmetic to e_g but only to deferral list indexing.
|
||||
*/
|
||||
#define CK_EPOCH_GRACE 3U
|
||||
|
||||
enum {
|
||||
CK_EPOCH_STATE_USED = 0,
|
||||
CK_EPOCH_STATE_FREE = 1
|
||||
};
|
||||
|
||||
CK_STACK_CONTAINER(struct ck_epoch_record, record_next,
|
||||
ck_epoch_record_container)
|
||||
CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
|
||||
ck_epoch_entry_container)
|
||||
|
||||
#define CK_EPOCH_SENSE_MASK (CK_EPOCH_SENSE - 1)
|
||||
|
||||
void
|
||||
_ck_epoch_delref(struct ck_epoch_record *record,
|
||||
struct ck_epoch_section *section)
|
||||
{
|
||||
struct ck_epoch_ref *current, *other;
|
||||
unsigned int i = section->bucket;
|
||||
|
||||
current = &record->local.bucket[i];
|
||||
current->count--;
|
||||
|
||||
if (current->count > 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the current bucket no longer has any references, then
|
||||
* determine whether we have already transitioned into a newer
|
||||
* epoch. If so, then make sure to update our shared snapshot
|
||||
* to allow for forward progress.
|
||||
*
|
||||
* If no other active bucket exists, then the record will go
|
||||
* inactive in order to allow for forward progress.
|
||||
*/
|
||||
other = &record->local.bucket[(i + 1) &
|
||||
CK_EPOCH_SENSE_MASK];
|
||||
if (other->count > 0 &&
|
||||
((int)(current->epoch - other->epoch) < 0)) {
|
||||
/*
|
||||
* The other epoch value is actually the newest,
|
||||
* transition to it.
|
||||
*/
|
||||
ck_pr_store_uint(&record->epoch, other->epoch);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
_ck_epoch_addref(struct ck_epoch_record *record,
|
||||
struct ck_epoch_section *section)
|
||||
{
|
||||
struct ck_epoch *global = record->global;
|
||||
struct ck_epoch_ref *ref;
|
||||
unsigned int epoch, i;
|
||||
|
||||
epoch = ck_pr_load_uint(&global->epoch);
|
||||
i = epoch & CK_EPOCH_SENSE_MASK;
|
||||
ref = &record->local.bucket[i];
|
||||
|
||||
if (ref->count++ == 0) {
|
||||
#ifndef CK_MD_TSO
|
||||
struct ck_epoch_ref *previous;
|
||||
|
||||
/*
|
||||
* The system has already ticked. If another non-zero bucket
|
||||
* exists, make sure to order our observations with respect
|
||||
* to it. Otherwise, it is possible to acquire a reference
|
||||
* from the previous epoch generation.
|
||||
*
|
||||
* On TSO architectures, the monoticity of the global counter
|
||||
* and load-{store, load} ordering are sufficient to guarantee
|
||||
* this ordering.
|
||||
*/
|
||||
previous = &record->local.bucket[(i + 1) &
|
||||
CK_EPOCH_SENSE_MASK];
|
||||
if (previous->count > 0)
|
||||
ck_pr_fence_acqrel();
|
||||
#endif /* !CK_MD_TSO */
|
||||
|
||||
/*
|
||||
* If this is this is a new reference into the current
|
||||
* bucket then cache the associated epoch value.
|
||||
*/
|
||||
ref->epoch = epoch;
|
||||
}
|
||||
|
||||
section->bucket = i;
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
ck_epoch_init(struct ck_epoch *global)
|
||||
{
|
||||
|
||||
ck_stack_init(&global->records);
|
||||
global->epoch = 1;
|
||||
global->n_free = 0;
|
||||
ck_pr_fence_store();
|
||||
return;
|
||||
}
|
||||
|
||||
struct ck_epoch_record *
|
||||
ck_epoch_recycle(struct ck_epoch *global)
|
||||
{
|
||||
struct ck_epoch_record *record;
|
||||
ck_stack_entry_t *cursor;
|
||||
unsigned int state;
|
||||
|
||||
if (ck_pr_load_uint(&global->n_free) == 0)
|
||||
return NULL;
|
||||
|
||||
CK_STACK_FOREACH(&global->records, cursor) {
|
||||
record = ck_epoch_record_container(cursor);
|
||||
|
||||
if (ck_pr_load_uint(&record->state) == CK_EPOCH_STATE_FREE) {
|
||||
/* Serialize with respect to deferral list clean-up. */
|
||||
ck_pr_fence_load();
|
||||
state = ck_pr_fas_uint(&record->state,
|
||||
CK_EPOCH_STATE_USED);
|
||||
if (state == CK_EPOCH_STATE_FREE) {
|
||||
ck_pr_dec_uint(&global->n_free);
|
||||
return record;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
record->global = global;
|
||||
record->state = CK_EPOCH_STATE_USED;
|
||||
record->active = 0;
|
||||
record->epoch = 0;
|
||||
record->n_dispatch = 0;
|
||||
record->n_peak = 0;
|
||||
record->n_pending = 0;
|
||||
memset(&record->local, 0, sizeof record->local);
|
||||
|
||||
for (i = 0; i < CK_EPOCH_LENGTH; i++)
|
||||
ck_stack_init(&record->pending[i]);
|
||||
|
||||
ck_pr_fence_store();
|
||||
ck_stack_push_upmc(&global->records, &record->record_next);
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
ck_epoch_unregister(struct ck_epoch_record *record)
|
||||
{
|
||||
struct ck_epoch *global = record->global;
|
||||
size_t i;
|
||||
|
||||
record->active = 0;
|
||||
record->epoch = 0;
|
||||
record->n_dispatch = 0;
|
||||
record->n_peak = 0;
|
||||
record->n_pending = 0;
|
||||
memset(&record->local, 0, sizeof record->local);
|
||||
|
||||
for (i = 0; i < CK_EPOCH_LENGTH; i++)
|
||||
ck_stack_init(&record->pending[i]);
|
||||
|
||||
ck_pr_fence_store();
|
||||
ck_pr_store_uint(&record->state, CK_EPOCH_STATE_FREE);
|
||||
ck_pr_inc_uint(&global->n_free);
|
||||
return;
|
||||
}
|
||||
|
||||
static struct ck_epoch_record *
|
||||
ck_epoch_scan(struct ck_epoch *global,
|
||||
struct ck_epoch_record *cr,
|
||||
unsigned int epoch,
|
||||
bool *af)
|
||||
{
|
||||
ck_stack_entry_t *cursor;
|
||||
|
||||
if (cr == NULL) {
|
||||
cursor = CK_STACK_FIRST(&global->records);
|
||||
*af = false;
|
||||
} else {
|
||||
cursor = &cr->record_next;
|
||||
*af = true;
|
||||
}
|
||||
|
||||
while (cursor != NULL) {
|
||||
unsigned int state, active;
|
||||
|
||||
cr = ck_epoch_record_container(cursor);
|
||||
|
||||
state = ck_pr_load_uint(&cr->state);
|
||||
if (state & CK_EPOCH_STATE_FREE) {
|
||||
cursor = CK_STACK_NEXT(cursor);
|
||||
continue;
|
||||
}
|
||||
|
||||
active = ck_pr_load_uint(&cr->active);
|
||||
*af |= active;
|
||||
|
||||
if (active != 0 && ck_pr_load_uint(&cr->epoch) != epoch)
|
||||
return cr;
|
||||
|
||||
cursor = CK_STACK_NEXT(cursor);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e)
|
||||
{
|
||||
unsigned int epoch = e & (CK_EPOCH_LENGTH - 1);
|
||||
ck_stack_entry_t *head, *next, *cursor;
|
||||
unsigned int i = 0;
|
||||
|
||||
head = CK_STACK_FIRST(&record->pending[epoch]);
|
||||
ck_stack_init(&record->pending[epoch]);
|
||||
|
||||
for (cursor = head; cursor != NULL; cursor = next) {
|
||||
struct ck_epoch_entry *entry =
|
||||
ck_epoch_entry_container(cursor);
|
||||
|
||||
next = CK_STACK_NEXT(cursor);
|
||||
entry->function(entry);
|
||||
i++;
|
||||
}
|
||||
|
||||
if (record->n_pending > record->n_peak)
|
||||
record->n_peak = record->n_pending;
|
||||
|
||||
record->n_dispatch += i;
|
||||
record->n_pending -= i;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reclaim all objects associated with a record.
|
||||
*/
|
||||
void
|
||||
ck_epoch_reclaim(struct ck_epoch_record *record)
|
||||
{
|
||||
unsigned int epoch;
|
||||
|
||||
for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
|
||||
ck_epoch_dispatch(record, epoch);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function must not be called with-in read section.
|
||||
*/
|
||||
void
|
||||
ck_epoch_synchronize(struct ck_epoch_record *record)
|
||||
{
|
||||
struct ck_epoch *global = record->global;
|
||||
struct ck_epoch_record *cr;
|
||||
unsigned int delta, epoch, goal, i;
|
||||
bool active;
|
||||
|
||||
ck_pr_fence_memory();
|
||||
|
||||
/*
|
||||
* The observation of the global epoch must be ordered with respect to
|
||||
* all prior operations. The re-ordering of loads is permitted given
|
||||
* monoticity of global epoch counter.
|
||||
*
|
||||
* If UINT_MAX concurrent mutations were to occur then it is possible
|
||||
* to encounter an ABA-issue. If this is a concern, consider tuning
|
||||
* write-side concurrency.
|
||||
*/
|
||||
delta = epoch = ck_pr_load_uint(&global->epoch);
|
||||
goal = epoch + CK_EPOCH_GRACE;
|
||||
|
||||
for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) {
|
||||
bool r;
|
||||
|
||||
/*
|
||||
* Determine whether all threads have observed the current
|
||||
* epoch with respect to the updates on invocation.
|
||||
*/
|
||||
while (cr = ck_epoch_scan(global, cr, delta, &active),
|
||||
cr != NULL) {
|
||||
unsigned int e_d;
|
||||
|
||||
ck_pr_stall();
|
||||
|
||||
/*
|
||||
* Another writer may have already observed a grace
|
||||
* period.
|
||||
*/
|
||||
e_d = ck_pr_load_uint(&global->epoch);
|
||||
if (e_d != delta) {
|
||||
delta = e_d;
|
||||
goto reload;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have observed all threads as inactive, then we assume
|
||||
* we are at a grace period.
|
||||
*/
|
||||
if (active == false)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Increment current epoch. CAS semantics are used to eliminate
|
||||
* increment operations for synchronization that occurs for the
|
||||
* same global epoch value snapshot.
|
||||
*
|
||||
* If we can guarantee there will only be one active barrier or
|
||||
* epoch tick at a given time, then it is sufficient to use an
|
||||
* increment operation. In a multi-barrier workload, however,
|
||||
* it is possible to overflow the epoch value if we apply
|
||||
* modulo-3 arithmetic.
|
||||
*/
|
||||
r = ck_pr_cas_uint_value(&global->epoch, delta, delta + 1,
|
||||
&delta);
|
||||
|
||||
/* Order subsequent thread active checks. */
|
||||
ck_pr_fence_atomic_load();
|
||||
|
||||
/*
|
||||
* If CAS has succeeded, then set delta to latest snapshot.
|
||||
* Otherwise, we have just acquired latest snapshot.
|
||||
*/
|
||||
delta = delta + r;
|
||||
continue;
|
||||
|
||||
reload:
|
||||
if ((goal > epoch) & (delta >= goal)) {
|
||||
/*
|
||||
* Right now, epoch overflow is handled as an edge
|
||||
* case. If we have already observed an epoch
|
||||
* generation, then we can be sure no hazardous
|
||||
* references exist to objects from this generation. We
|
||||
* can actually avoid an addtional scan step at this
|
||||
* point.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A majority of use-cases will not require full barrier semantics.
|
||||
* However, if non-temporal instructions are used, full barrier
|
||||
* semantics are necessary.
|
||||
*/
|
||||
ck_pr_fence_memory();
|
||||
record->epoch = delta;
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
ck_epoch_barrier(struct ck_epoch_record *record)
|
||||
{
|
||||
|
||||
ck_epoch_synchronize(record);
|
||||
ck_epoch_reclaim(record);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* It may be worth it to actually apply these deferral semantics to an epoch
|
||||
* that was observed at ck_epoch_call time. The problem is that the latter
|
||||
* would require a full fence.
|
||||
*
|
||||
* ck_epoch_call will dispatch to the latest epoch snapshot that was observed.
|
||||
* There are cases where it will fail to reclaim as early as it could. If this
|
||||
* becomes a problem, we could actually use a heap for epoch buckets but that
|
||||
* is far from ideal too.
|
||||
*/
|
||||
bool
|
||||
ck_epoch_poll(struct ck_epoch_record *record)
|
||||
{
|
||||
bool active;
|
||||
unsigned int epoch;
|
||||
unsigned int snapshot;
|
||||
struct ck_epoch_record *cr = NULL;
|
||||
struct ck_epoch *global = record->global;
|
||||
|
||||
epoch = ck_pr_load_uint(&global->epoch);
|
||||
|
||||
/* Serialize epoch snapshots with respect to global epoch. */
|
||||
ck_pr_fence_memory();
|
||||
cr = ck_epoch_scan(global, cr, epoch, &active);
|
||||
if (cr != NULL) {
|
||||
record->epoch = epoch;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* We are at a grace period if all threads are inactive. */
|
||||
if (active == false) {
|
||||
record->epoch = epoch;
|
||||
for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
|
||||
ck_epoch_dispatch(record, epoch);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If an active thread exists, rely on epoch observation. */
|
||||
if (ck_pr_cas_uint_value(&global->epoch, epoch, epoch + 1,
|
||||
&snapshot) == false) {
|
||||
record->epoch = snapshot;
|
||||
} else {
|
||||
record->epoch = epoch + 1;
|
||||
}
|
||||
|
||||
ck_epoch_dispatch(record, epoch + 1);
|
||||
return true;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue