Merging upstream version 0.7.1 (Closes: #991419).
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
05c588e9d7
commit
9e09e0ef69
99 changed files with 6727 additions and 943 deletions
|
@ -50,7 +50,7 @@ ck_backoff_eb(unsigned int *c)
|
|||
for (i = 0; i < ceiling; i++)
|
||||
ck_pr_barrier();
|
||||
|
||||
*c = ceiling <<= ceiling < CK_BACKOFF_CEILING;
|
||||
*c = ceiling << (ceiling < CK_BACKOFF_CEILING);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
* Container function.
|
||||
* This relies on (compiler) implementation-defined behavior.
|
||||
*/
|
||||
#ifndef CK_CC_CONTAINER
|
||||
#define CK_CC_CONTAINER(F, T, M, N) \
|
||||
CK_CC_INLINE static T * \
|
||||
N(F *p) \
|
||||
|
@ -57,6 +58,7 @@
|
|||
F *n = p; \
|
||||
return (T *)(void *)(((char *)n) - ((size_t)&((T *)0)->M)); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define CK_CC_PAD(x) union { char pad[x]; }
|
||||
|
||||
|
@ -104,41 +106,35 @@
|
|||
#define CK_CC_TYPEOF(X, DEFAULT) (DEFAULT)
|
||||
#endif
|
||||
|
||||
#define CK_F_CC_FFS_G(L, T) \
|
||||
CK_CC_INLINE static int \
|
||||
ck_cc_##L(T v) \
|
||||
{ \
|
||||
unsigned int i; \
|
||||
\
|
||||
if (v == 0) \
|
||||
return 0; \
|
||||
\
|
||||
for (i = 1; (v & 1) == 0; i++, v >>= 1); \
|
||||
return i; \
|
||||
}
|
||||
|
||||
#ifndef CK_F_CC_FFS
|
||||
#define CK_F_CC_FFS
|
||||
CK_CC_INLINE static int
|
||||
ck_cc_ffs(unsigned int x)
|
||||
{
|
||||
unsigned int i;
|
||||
CK_F_CC_FFS_G(ffs, unsigned int)
|
||||
#endif /* CK_F_CC_FFS */
|
||||
|
||||
if (x == 0)
|
||||
return 0;
|
||||
#ifndef CK_F_CC_FFSL
|
||||
#define CK_F_CC_FFSL
|
||||
CK_F_CC_FFS_G(ffsl, unsigned long)
|
||||
#endif /* CK_F_CC_FFSL */
|
||||
|
||||
for (i = 1; (x & 1) == 0; i++, x >>= 1);
|
||||
#ifndef CK_F_CC_FFSLL
|
||||
#define CK_F_CC_FFSLL
|
||||
CK_F_CC_FFS_G(ffsll, unsigned long long)
|
||||
#endif /* CK_F_CC_FFSLL */
|
||||
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef CK_F_CC_CLZ
|
||||
#define CK_F_CC_CLZ
|
||||
#include <ck_limits.h>
|
||||
|
||||
CK_CC_INLINE static int
|
||||
ck_cc_clz(unsigned int x)
|
||||
{
|
||||
unsigned int count, i;
|
||||
|
||||
for (count = 0, i = sizeof(unsigned int) * CHAR_BIT; i > 0; count++) {
|
||||
unsigned int bit = 1U << --i;
|
||||
|
||||
if (x & bit)
|
||||
break;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
#endif
|
||||
#undef CK_F_CC_FFS_G
|
||||
|
||||
#ifndef CK_F_CC_CTZ
|
||||
#define CK_F_CC_CTZ
|
||||
|
@ -151,7 +147,6 @@ ck_cc_ctz(unsigned int x)
|
|||
return 0;
|
||||
|
||||
for (i = 0; (x & 1) == 0; i++, x >>= 1);
|
||||
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
|
945
include/ck_ec.h
Normal file
945
include/ck_ec.h
Normal file
|
@ -0,0 +1,945 @@
|
|||
/*
|
||||
* Copyright 2018 Paul Khuong, Google LLC.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Overview
|
||||
* ========
|
||||
*
|
||||
* ck_ec implements 32- and 64- bit event counts. Event counts let us
|
||||
* easily integrate OS-level blocking (e.g., futexes) in lock-free
|
||||
* protocols. Waiters block conditionally, if the event count's value
|
||||
* is still equal to some old value.
|
||||
*
|
||||
* Event counts come in four variants: 32 and 64 bit (with one bit
|
||||
* stolen for internal signaling, so 31 and 63 bit counters), and
|
||||
* single or multiple producers (wakers). Waiters are always multiple
|
||||
* consumers. The 32 bit variants are smaller, and more efficient,
|
||||
* especially in single producer mode. The 64 bit variants are larger,
|
||||
* but practically invulnerable to ABA.
|
||||
*
|
||||
* The 32 bit variant is always available. The 64 bit variant is only
|
||||
* available if CK supports 64-bit atomic operations. Currently,
|
||||
* specialization for single producer is only implemented for x86 and
|
||||
* x86-64, on compilers that support GCC extended inline assembly;
|
||||
* other platforms fall back to the multiple producer code path.
|
||||
*
|
||||
* A typical usage pattern is:
|
||||
*
|
||||
* 1. On the producer side:
|
||||
*
|
||||
* - Make changes to some shared data structure, without involving
|
||||
* the event count at all.
|
||||
* - After each change, call ck_ec_inc on the event count. The call
|
||||
* acts as a write-write barrier, and wakes up any consumer blocked
|
||||
* on the event count (waiting for new changes).
|
||||
*
|
||||
* 2. On the consumer side:
|
||||
*
|
||||
* - Snapshot ck_ec_value of the event count. The call acts as a
|
||||
* read barrier.
|
||||
* - Read and process the shared data structure.
|
||||
* - Wait for new changes by calling ck_ec_wait with the snapshot value.
|
||||
*
|
||||
* Some data structures may opt for tighter integration with their
|
||||
* event count. For example, an SPMC ring buffer or disruptor might
|
||||
* use the event count's value as the write pointer. If the buffer is
|
||||
* regularly full, it might also make sense to store the read pointer
|
||||
* in an MP event count.
|
||||
*
|
||||
* This event count implementation supports tighter integration in two
|
||||
* ways.
|
||||
*
|
||||
* Producers may opt to increment by an arbitrary value (less than
|
||||
* INT32_MAX / INT64_MAX), in order to encode, e.g., byte
|
||||
* offsets. Larger increment values make wraparound more likely, so
|
||||
* the increments should still be relatively small.
|
||||
*
|
||||
* Consumers may pass a predicate to ck_ec_wait_pred. This predicate
|
||||
* can make `ck_ec_wait_pred` return early, before the event count's
|
||||
* value changes, and can override the deadline passed to futex_wait.
|
||||
* This lets consumer block on one eventcount, while optimistically
|
||||
* looking at other waking conditions.
|
||||
*
|
||||
* API Reference
|
||||
* =============
|
||||
*
|
||||
* When compiled as C11 or later, this header defines type-generic
|
||||
* macros for ck_ec32 and ck_ec64; the reference describes this
|
||||
* type-generic API.
|
||||
*
|
||||
* ck_ec needs additional OS primitives to determine the current time,
|
||||
* to wait on an address, and to wake all threads waiting on a given
|
||||
* address. These are defined with fields in a struct ck_ec_ops. Each
|
||||
* ck_ec_ops may additionally define the number of spin loop
|
||||
* iterations in the slow path, as well as the initial wait time in
|
||||
* the internal exponential backoff, the exponential scale factor, and
|
||||
* the right shift count (< 32).
|
||||
*
|
||||
* The ops, in addition to the single/multiple producer flag, are
|
||||
* encapsulated in a struct ck_ec_mode, passed to most ck_ec
|
||||
* operations.
|
||||
*
|
||||
* ec is a struct ck_ec32 *, or a struct ck_ec64 *.
|
||||
*
|
||||
* value is an uint32_t for ck_ec32, and an uint64_t for ck_ec64. It
|
||||
* never exceeds INT32_MAX and INT64_MAX respectively.
|
||||
*
|
||||
* mode is a struct ck_ec_mode *.
|
||||
*
|
||||
* deadline is either NULL, or a `const struct timespec *` that will
|
||||
* be treated as an absolute deadline.
|
||||
*
|
||||
* `void ck_ec_init(ec, value)`: initializes the event count to value.
|
||||
*
|
||||
* `value ck_ec_value(ec)`: returns the current value of the event
|
||||
* counter. This read acts as a read (acquire) barrier.
|
||||
*
|
||||
* `bool ck_ec_has_waiters(ec)`: returns whether some thread has
|
||||
* marked the event count as requiring an OS wakeup.
|
||||
*
|
||||
* `void ck_ec_inc(ec, mode)`: increments the value of the event
|
||||
* counter by one. This writes acts as a write barrier. Wakes up
|
||||
* any waiting thread.
|
||||
*
|
||||
* `value ck_ec_add(ec, mode, value)`: increments the event counter by
|
||||
* `value`, and returns the event counter's previous value. This
|
||||
* write acts as a write barrier. Wakes up any waiting thread.
|
||||
*
|
||||
* `int ck_ec_deadline(struct timespec *new_deadline,
|
||||
* mode,
|
||||
* const struct timespec *timeout)`:
|
||||
* computes a deadline `timeout` away from the current time. If
|
||||
* timeout is NULL, computes a deadline in the infinite future. The
|
||||
* resulting deadline is written to `new_deadline`. Returns 0 on
|
||||
* success, and -1 if ops->gettime failed (without touching errno).
|
||||
*
|
||||
* `int ck_ec_wait(ec, mode, value, deadline)`: waits until the event
|
||||
* counter's value differs from `value`, or, if `deadline` is
|
||||
* provided and non-NULL, until the current time is after that
|
||||
* deadline. Use a deadline with tv_sec = 0 for a non-blocking
|
||||
* execution. Returns 0 if the event counter has changed, and -1 on
|
||||
* timeout. This function acts as a read (acquire) barrier.
|
||||
*
|
||||
* `int ck_ec_wait_pred(ec, mode, value, pred, data, deadline)`: waits
|
||||
* until the event counter's value differs from `value`, or until
|
||||
* `pred` returns non-zero, or, if `deadline` is provided and
|
||||
* non-NULL, until the current time is after that deadline. Use a
|
||||
* deadline with tv_sec = 0 for a non-blocking execution. Returns 0 if
|
||||
* the event counter has changed, `pred`'s return value if non-zero,
|
||||
* and -1 on timeout. This function acts as a read (acquire) barrier.
|
||||
*
|
||||
* `pred` is always called as `pred(data, iteration_deadline, now)`,
|
||||
* where `iteration_deadline` is a timespec of the deadline for this
|
||||
* exponential backoff iteration, and `now` is the current time. If
|
||||
* `pred` returns a non-zero value, that value is immediately returned
|
||||
* to the waiter. Otherwise, `pred` is free to modify
|
||||
* `iteration_deadline` (moving it further in the future is a bad
|
||||
* idea).
|
||||
*
|
||||
* Implementation notes
|
||||
* ====================
|
||||
*
|
||||
* The multiple producer implementation is a regular locked event
|
||||
* count, with a single flag bit to denote the need to wake up waiting
|
||||
* threads.
|
||||
*
|
||||
* The single producer specialization is heavily tied to
|
||||
* [x86-TSO](https://www.cl.cam.ac.uk/~pes20/weakmemory/cacm.pdf), and
|
||||
* to non-atomic read-modify-write instructions (e.g., `inc mem`);
|
||||
* these non-atomic RMW let us write to the same memory locations with
|
||||
* atomic and non-atomic instructions, without suffering from process
|
||||
* scheduling stalls.
|
||||
*
|
||||
* The reason we can mix atomic and non-atomic writes to the `counter`
|
||||
* word is that every non-atomic write obviates the need for the
|
||||
* atomically flipped flag bit: we only use non-atomic writes to
|
||||
* update the event count, and the atomic flag only informs the
|
||||
* producer that we would like a futex_wake, because of the update.
|
||||
* We only require the non-atomic RMW counter update to prevent
|
||||
* preemption from introducing arbitrarily long worst case delays.
|
||||
*
|
||||
* Correctness does not rely on the usual ordering argument: in the
|
||||
* absence of fences, there is no strict ordering between atomic and
|
||||
* non-atomic writes. The key is instead x86-TSO's guarantee that a
|
||||
* read is satisfied from the most recent buffered write in the local
|
||||
* store queue if there is one, or from memory if there is no write to
|
||||
* that address in the store queue.
|
||||
*
|
||||
* x86-TSO's constraint on reads suffices to guarantee that the
|
||||
* producer will never forget about a counter update. If the last
|
||||
* update is still queued, the new update will be based on the queued
|
||||
* value. Otherwise, the new update will be based on the value in
|
||||
* memory, which may or may not have had its flag flipped. In either
|
||||
* case, the value of the counter (modulo flag) is correct.
|
||||
*
|
||||
* When the producer forwards the counter's value from its store
|
||||
* queue, the new update might not preserve a flag flip. Any waiter
|
||||
* thus has to check from time to time to determine if it wasn't
|
||||
* woken up because the flag bit was silently cleared.
|
||||
*
|
||||
* In reality, the store queue in x86-TSO stands for in-flight
|
||||
* instructions in the chip's out-of-order backend. In the vast
|
||||
* majority of cases, instructions will only remain in flight for a
|
||||
* few hundred or thousand of cycles. That's why ck_ec_wait spins on
|
||||
* the `counter` word for ~100 iterations after flipping its flag bit:
|
||||
* if the counter hasn't changed after that many iterations, it is
|
||||
* very likely that the producer's next counter update will observe
|
||||
* the flag flip.
|
||||
*
|
||||
* That's still not a hard guarantee of correctness. Conservatively,
|
||||
* we can expect that no instruction will remain in flight for more
|
||||
* than 1 second... if only because some interrupt will have forced
|
||||
* the chip to store its architectural state in memory, at which point
|
||||
* an instruction is either fully retired or rolled back. Interrupts,
|
||||
* particularly the pre-emption timer, are why single-producer updates
|
||||
* must happen in a single non-atomic read-modify-write instruction.
|
||||
* Having a single instruction as the critical section means we only
|
||||
* have to consider the worst-case execution time for that
|
||||
* instruction. That's easier than doing the same for a pair of
|
||||
* instructions, which an unlucky pre-emption could delay for
|
||||
* arbitrarily long.
|
||||
*
|
||||
* Thus, after a short spin loop, ck_ec_wait enters an exponential
|
||||
* backoff loop, where each "sleep" is instead a futex_wait. The
|
||||
* backoff is only necessary to handle rare cases where the flag flip
|
||||
* was overwritten after the spin loop. Eventually, more than one
|
||||
* second will have elapsed since the flag flip, and the sleep timeout
|
||||
* becomes infinite: since the flag bit has been set for much longer
|
||||
* than the time for which an instruction may remain in flight, the
|
||||
* flag will definitely be observed at the next counter update.
|
||||
*
|
||||
* The 64 bit ck_ec_wait pulls another trick: futexes only handle 32
|
||||
* bit ints, so we must treat the 64 bit counter's low 32 bits as an
|
||||
* int in futex_wait. That's a bit dodgy, but fine in practice, given
|
||||
* that the OS's futex code will always read whatever value is
|
||||
* currently in memory: even if the producer thread were to wait on
|
||||
* its own event count, the syscall and ring transition would empty
|
||||
* the store queue (the out-of-order execution backend).
|
||||
*
|
||||
* Finally, what happens when the producer is migrated to another core
|
||||
* or otherwise pre-empted? Migration must already incur a barrier, so
|
||||
* that thread always sees its own writes, so that's safe. As for
|
||||
* pre-emption, that requires storing the architectural state, which
|
||||
* means every instruction must either be executed fully or not at
|
||||
* all when pre-emption happens.
|
||||
*/
|
||||
|
||||
#ifndef CK_EC_H
|
||||
#define CK_EC_H
|
||||
#include <ck_cc.h>
|
||||
#include <ck_pr.h>
|
||||
#include <ck_stdbool.h>
|
||||
#include <ck_stdint.h>
|
||||
#include <ck_stddef.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
/*
|
||||
* If we have ck_pr_faa_64 (and, presumably, ck_pr_load_64), we
|
||||
* support 63 bit counters.
|
||||
*/
|
||||
#ifdef CK_F_PR_FAA_64
|
||||
#define CK_F_EC64
|
||||
#endif /* CK_F_PR_FAA_64 */
|
||||
|
||||
/*
|
||||
* GCC inline assembly lets us exploit non-atomic read-modify-write
|
||||
* instructions on x86/x86_64 for a fast single-producer mode.
|
||||
*
|
||||
* If we CK_F_EC_SP is not defined, CK_EC always uses the slower
|
||||
* multiple producer code.
|
||||
*/
|
||||
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
|
||||
#define CK_F_EC_SP
|
||||
#endif /* GNUC && (__i386__ || __x86_64__) */
|
||||
|
||||
struct ck_ec_ops;
|
||||
|
||||
struct ck_ec_wait_state {
|
||||
struct timespec start; /* Time when we entered ck_ec_wait. */
|
||||
struct timespec now; /* Time now. */
|
||||
const struct ck_ec_ops *ops;
|
||||
void *data; /* Opaque pointer for the predicate's internal state. */
|
||||
|
||||
};
|
||||
|
||||
/*
|
||||
* ck_ec_ops define system-specific functions to get the current time,
|
||||
* atomically wait on an address if it still has some expected value,
|
||||
* and to wake all threads waiting on an address.
|
||||
*
|
||||
* Each platform is expected to have few (one) opaque pointer to a
|
||||
* const ops struct, and reuse it for all ck_ec_mode structs.
|
||||
*/
|
||||
struct ck_ec_ops {
|
||||
/* Populates out with the current time. Returns non-zero on failure. */
|
||||
int (*gettime)(const struct ck_ec_ops *, struct timespec *out);
|
||||
|
||||
/*
|
||||
* Waits on address if its value is still `expected`. If
|
||||
* deadline is non-NULL, stops waiting once that deadline is
|
||||
* reached. May return early for any reason.
|
||||
*/
|
||||
void (*wait32)(const struct ck_ec_wait_state *, const uint32_t *,
|
||||
uint32_t expected, const struct timespec *deadline);
|
||||
|
||||
/*
|
||||
* Same as wait32, but for a 64 bit counter. Only used if
|
||||
* CK_F_EC64 is defined.
|
||||
*
|
||||
* If underlying blocking primitive only supports 32 bit
|
||||
* control words, it should be safe to block on the least
|
||||
* significant half of the 64 bit address.
|
||||
*/
|
||||
void (*wait64)(const struct ck_ec_wait_state *, const uint64_t *,
|
||||
uint64_t expected, const struct timespec *deadline);
|
||||
|
||||
/* Wakes up all threads waiting on address. */
|
||||
void (*wake32)(const struct ck_ec_ops *, const uint32_t *address);
|
||||
|
||||
/*
|
||||
* Same as wake32, but for a 64 bit counter. Only used if
|
||||
* CK_F_EC64 is defined.
|
||||
*
|
||||
* When wait64 truncates the control word at address to `only`
|
||||
* consider its least significant half, wake64 should perform
|
||||
* any necessary fixup (e.g., on big endian platforms).
|
||||
*/
|
||||
void (*wake64)(const struct ck_ec_ops *, const uint64_t *address);
|
||||
|
||||
/*
|
||||
* Number of iterations for the initial busy wait. 0 defaults
|
||||
* to 100 (not ABI stable).
|
||||
*/
|
||||
uint32_t busy_loop_iter;
|
||||
|
||||
/*
|
||||
* Delay in nanoseconds for the first iteration of the
|
||||
* exponential backoff. 0 defaults to 2 ms (not ABI stable).
|
||||
*/
|
||||
uint32_t initial_wait_ns;
|
||||
|
||||
/*
|
||||
* Scale factor for the exponential backoff. 0 defaults to 8x
|
||||
* (not ABI stable).
|
||||
*/
|
||||
uint32_t wait_scale_factor;
|
||||
|
||||
/*
|
||||
* Right shift count for the exponential backoff. The update
|
||||
* after each iteration is
|
||||
* wait_ns = (wait_ns * wait_scale_factor) >> wait_shift_count,
|
||||
* until one second has elapsed. After that, the deadline goes
|
||||
* to infinity.
|
||||
*/
|
||||
uint32_t wait_shift_count;
|
||||
};
|
||||
|
||||
/*
|
||||
* ck_ec_mode wraps the ops table, and informs the fast path whether
|
||||
* it should attempt to specialize for single producer mode.
|
||||
*
|
||||
* mode structs are expected to be exposed by value, e.g.,
|
||||
*
|
||||
* extern const struct ck_ec_ops system_ec_ops;
|
||||
*
|
||||
* static const struct ck_ec_mode ec_sp = {
|
||||
* .ops = &system_ec_ops,
|
||||
* .single_producer = true
|
||||
* };
|
||||
*
|
||||
* static const struct ck_ec_mode ec_mp = {
|
||||
* .ops = &system_ec_ops,
|
||||
* .single_producer = false
|
||||
* };
|
||||
*
|
||||
* ck_ec_mode structs are only passed to inline functions defined in
|
||||
* this header, and never escape to their slow paths, so they should
|
||||
* not result in any object file size increase.
|
||||
*/
|
||||
struct ck_ec_mode {
|
||||
const struct ck_ec_ops *ops;
|
||||
/*
|
||||
* If single_producer is true, the event count has a unique
|
||||
* incrementer. The implementation will specialize ck_ec_inc
|
||||
* and ck_ec_add if possible (if CK_F_EC_SP is defined).
|
||||
*/
|
||||
bool single_producer;
|
||||
};
|
||||
|
||||
struct ck_ec32 {
|
||||
/* Flag is "sign" bit, value in bits 0:30. */
|
||||
uint32_t counter;
|
||||
};
|
||||
|
||||
typedef struct ck_ec32 ck_ec32_t;
|
||||
|
||||
#ifdef CK_F_EC64
|
||||
struct ck_ec64 {
|
||||
/*
|
||||
* Flag is bottom bit, value in bits 1:63. Eventcount only
|
||||
* works on x86-64 (i.e., little endian), so the futex int
|
||||
* lies in the first 4 (bottom) bytes.
|
||||
*/
|
||||
uint64_t counter;
|
||||
};
|
||||
|
||||
typedef struct ck_ec64 ck_ec64_t;
|
||||
#endif /* CK_F_EC64 */
|
||||
|
||||
#define CK_EC_INITIALIZER { .counter = 0 }
|
||||
|
||||
/*
|
||||
* Initializes the event count to `value`. The value must not
|
||||
* exceed INT32_MAX.
|
||||
*/
|
||||
static void ck_ec32_init(struct ck_ec32 *ec, uint32_t value);
|
||||
|
||||
#ifndef CK_F_EC64
|
||||
#define ck_ec_init ck_ec32_init
|
||||
#else
|
||||
/*
|
||||
* Initializes the event count to `value`. The value must not
|
||||
* exceed INT64_MAX.
|
||||
*/
|
||||
static void ck_ec64_init(struct ck_ec64 *ec, uint64_t value);
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
#define ck_ec_init(EC, VALUE) \
|
||||
(_Generic(*(EC), \
|
||||
struct ck_ec32 : ck_ec32_init, \
|
||||
struct ck_ec64 : ck_ec64_init)((EC), (VALUE)))
|
||||
#endif /* __STDC_VERSION__ */
|
||||
#endif /* CK_F_EC64 */
|
||||
|
||||
/*
|
||||
* Returns the counter value in the event count. The value is at most
|
||||
* INT32_MAX.
|
||||
*/
|
||||
static uint32_t ck_ec32_value(const struct ck_ec32* ec);
|
||||
|
||||
#ifndef CK_F_EC64
|
||||
#define ck_ec_value ck_ec32_value
|
||||
#else
|
||||
/*
|
||||
* Returns the counter value in the event count. The value is at most
|
||||
* INT64_MAX.
|
||||
*/
|
||||
static uint64_t ck_ec64_value(const struct ck_ec64* ec);
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
#define ck_ec_value(EC) \
|
||||
(_Generic(*(EC), \
|
||||
struct ck_ec32 : ck_ec32_value, \
|
||||
struct ck_ec64 : ck_ec64_value)((EC)))
|
||||
#endif /* __STDC_VERSION__ */
|
||||
#endif /* CK_F_EC64 */
|
||||
|
||||
/*
|
||||
* Returns whether there may be slow pathed waiters that need an
|
||||
* explicit OS wakeup for this event count.
|
||||
*/
|
||||
static bool ck_ec32_has_waiters(const struct ck_ec32 *ec);
|
||||
|
||||
#ifndef CK_F_EC64
|
||||
#define ck_ec_has_waiters ck_ec32_has_waiters
|
||||
#else
|
||||
static bool ck_ec64_has_waiters(const struct ck_ec64 *ec);
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
#define ck_ec_has_waiters(EC) \
|
||||
(_Generic(*(EC), \
|
||||
struct ck_ec32 : ck_ec32_has_waiters, \
|
||||
struct ck_ec64 : ck_ec64_has_waiters)((EC)))
|
||||
#endif /* __STDC_VERSION__ */
|
||||
#endif /* CK_F_EC64 */
|
||||
|
||||
/*
|
||||
* Increments the counter value in the event count by one, and wakes
|
||||
* up any waiter.
|
||||
*/
|
||||
static void ck_ec32_inc(struct ck_ec32 *ec, const struct ck_ec_mode *mode);
|
||||
|
||||
#ifndef CK_F_EC64
|
||||
#define ck_ec_inc ck_ec32_inc
|
||||
#else
|
||||
static void ck_ec64_inc(struct ck_ec64 *ec, const struct ck_ec_mode *mode);
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
#define ck_ec_inc(EC, MODE) \
|
||||
(_Generic(*(EC), \
|
||||
struct ck_ec32 : ck_ec32_inc, \
|
||||
struct ck_ec64 : ck_ec64_inc)((EC), (MODE)))
|
||||
#endif /* __STDC_VERSION__ */
|
||||
#endif /* CK_F_EC64 */
|
||||
|
||||
/*
|
||||
* Increments the counter value in the event count by delta, wakes
|
||||
* up any waiter, and returns the previous counter value.
|
||||
*/
|
||||
static uint32_t ck_ec32_add(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint32_t delta);
|
||||
|
||||
#ifndef CK_F_EC64
|
||||
#define ck_ec_add ck_ec32_add
|
||||
#else
|
||||
static uint64_t ck_ec64_add(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint64_t delta);
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
#define ck_ec_add(EC, MODE, DELTA) \
|
||||
(_Generic(*(EC), \
|
||||
struct ck_ec32 : ck_ec32_add, \
|
||||
struct ck_ec64 : ck_ec64_add)((EC), (MODE), (DELTA)))
|
||||
#endif /* __STDC_VERSION__ */
|
||||
#endif /* CK_F_EC64 */
|
||||
|
||||
/*
|
||||
* Populates `new_deadline` with a deadline `timeout` in the future.
|
||||
* Returns 0 on success, and -1 if clock_gettime failed, in which
|
||||
* case errno is left as is.
|
||||
*/
|
||||
static int ck_ec_deadline(struct timespec *new_deadline,
|
||||
const struct ck_ec_mode *mode,
|
||||
const struct timespec *timeout);
|
||||
|
||||
/*
|
||||
* Waits until the counter value in the event count differs from
|
||||
* old_value, or, if deadline is non-NULL, until CLOCK_MONOTONIC is
|
||||
* past the deadline.
|
||||
*
|
||||
* Returns 0 on success, and -1 on timeout.
|
||||
*/
|
||||
static int ck_ec32_wait(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint32_t old_value,
|
||||
const struct timespec *deadline);
|
||||
|
||||
#ifndef CK_F_EC64
|
||||
#define ck_ec_wait ck_ec32_wait
|
||||
#else
|
||||
static int ck_ec64_wait(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint64_t old_value,
|
||||
const struct timespec *deadline);
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
#define ck_ec_wait(EC, MODE, OLD_VALUE, DEADLINE) \
|
||||
(_Generic(*(EC), \
|
||||
struct ck_ec32 : ck_ec32_wait, \
|
||||
struct ck_ec64 : ck_ec64_wait)((EC), (MODE), \
|
||||
(OLD_VALUE), (DEADLINE)))
|
||||
|
||||
#endif /* __STDC_VERSION__ */
|
||||
#endif /* CK_F_EC64 */
|
||||
|
||||
/*
|
||||
* Waits until the counter value in the event count differs from
|
||||
* old_value, pred returns non-zero, or, if deadline is non-NULL,
|
||||
* until CLOCK_MONOTONIC is past the deadline.
|
||||
*
|
||||
* Returns 0 on success, -1 on timeout, and the return value of pred
|
||||
* if it returns non-zero.
|
||||
*
|
||||
* A NULL pred represents a function that always returns 0.
|
||||
*/
|
||||
static int ck_ec32_wait_pred(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint32_t old_value,
|
||||
int (*pred)(const struct ck_ec_wait_state *,
|
||||
struct timespec *deadline),
|
||||
void *data,
|
||||
const struct timespec *deadline);
|
||||
|
||||
#ifndef CK_F_EC64
|
||||
#define ck_ec_wait_pred ck_ec32_wait_pred
|
||||
#else
|
||||
static int ck_ec64_wait_pred(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint64_t old_value,
|
||||
int (*pred)(const struct ck_ec_wait_state *,
|
||||
struct timespec *deadline),
|
||||
void *data,
|
||||
const struct timespec *deadline);
|
||||
|
||||
#if __STDC_VERSION__ >= 201112L
|
||||
#define ck_ec_wait_pred(EC, MODE, OLD_VALUE, PRED, DATA, DEADLINE) \
|
||||
(_Generic(*(EC), \
|
||||
struct ck_ec32 : ck_ec32_wait_pred, \
|
||||
struct ck_ec64 : ck_ec64_wait_pred) \
|
||||
((EC), (MODE), (OLD_VALUE), (PRED), (DATA), (DEADLINE)))
|
||||
#endif /* __STDC_VERSION__ */
|
||||
#endif /* CK_F_EC64 */
|
||||
|
||||
/*
|
||||
* Inline implementation details. 32 bit first, then 64 bit
|
||||
* conditionally.
|
||||
*/
|
||||
CK_CC_FORCE_INLINE void ck_ec32_init(struct ck_ec32 *ec, uint32_t value)
|
||||
{
|
||||
ec->counter = value & ~(1UL << 31);
|
||||
return;
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE uint32_t ck_ec32_value(const struct ck_ec32 *ec)
|
||||
{
|
||||
uint32_t ret = ck_pr_load_32(&ec->counter) & ~(1UL << 31);
|
||||
|
||||
ck_pr_fence_acquire();
|
||||
return ret;
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE bool ck_ec32_has_waiters(const struct ck_ec32 *ec)
|
||||
{
|
||||
return ck_pr_load_32(&ec->counter) & (1UL << 31);
|
||||
}
|
||||
|
||||
/* Slow path for ck_ec{32,64}_{inc,add} */
|
||||
void ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops);
|
||||
|
||||
CK_CC_FORCE_INLINE void ck_ec32_inc(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode)
|
||||
{
|
||||
#if !defined(CK_F_EC_SP)
|
||||
/* Nothing to specialize if we don't have EC_SP. */
|
||||
ck_ec32_add(ec, mode, 1);
|
||||
return;
|
||||
#else
|
||||
char flagged;
|
||||
|
||||
#if __GNUC__ >= 6
|
||||
/*
|
||||
* We don't want to wake if the sign bit is 0. We do want to
|
||||
* wake if the sign bit just flipped from 1 to 0. We don't
|
||||
* care what happens when our increment caused the sign bit to
|
||||
* flip from 0 to 1 (that's once per 2^31 increment).
|
||||
*
|
||||
* This leaves us with four cases:
|
||||
*
|
||||
* old sign bit | new sign bit | SF | OF | ZF
|
||||
* -------------------------------------------
|
||||
* 0 | 0 | 0 | 0 | ?
|
||||
* 0 | 1 | 1 | 0 | ?
|
||||
* 1 | 1 | 1 | 0 | ?
|
||||
* 1 | 0 | 0 | 0 | 1
|
||||
*
|
||||
* In the first case, we don't want to hit ck_ec32_wake. In
|
||||
* the last two cases, we do want to call ck_ec32_wake. In the
|
||||
* second case, we don't care, so we arbitrarily choose to
|
||||
* call ck_ec32_wake.
|
||||
*
|
||||
* The "le" condition checks if SF != OF, or ZF == 1, which
|
||||
* meets our requirements.
|
||||
*/
|
||||
#define CK_EC32_INC_ASM(PREFIX) \
|
||||
__asm__ volatile(PREFIX " incl %0" \
|
||||
: "+m"(ec->counter), "=@ccle"(flagged) \
|
||||
:: "cc", "memory")
|
||||
#else
|
||||
#define CK_EC32_INC_ASM(PREFIX) \
|
||||
__asm__ volatile(PREFIX " incl %0; setle %1" \
|
||||
: "+m"(ec->counter), "=r"(flagged) \
|
||||
:: "cc", "memory")
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
if (mode->single_producer == true) {
|
||||
ck_pr_fence_store();
|
||||
CK_EC32_INC_ASM("");
|
||||
} else {
|
||||
ck_pr_fence_store_atomic();
|
||||
CK_EC32_INC_ASM("lock");
|
||||
}
|
||||
#undef CK_EC32_INC_ASM
|
||||
|
||||
if (CK_CC_UNLIKELY(flagged)) {
|
||||
ck_ec32_wake(ec, mode->ops);
|
||||
}
|
||||
|
||||
return;
|
||||
#endif /* CK_F_EC_SP */
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE uint32_t ck_ec32_add_epilogue(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint32_t old)
|
||||
{
|
||||
const uint32_t flag_mask = 1U << 31;
|
||||
uint32_t ret;
|
||||
|
||||
ret = old & ~flag_mask;
|
||||
/* These two only differ if the flag bit is set. */
|
||||
if (CK_CC_UNLIKELY(old != ret)) {
|
||||
ck_ec32_wake(ec, mode->ops);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static CK_CC_INLINE uint32_t ck_ec32_add_mp(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint32_t delta)
|
||||
{
|
||||
uint32_t old;
|
||||
|
||||
ck_pr_fence_store_atomic();
|
||||
old = ck_pr_faa_32(&ec->counter, delta);
|
||||
return ck_ec32_add_epilogue(ec, mode, old);
|
||||
}
|
||||
|
||||
#ifdef CK_F_EC_SP
|
||||
static CK_CC_INLINE uint32_t ck_ec32_add_sp(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint32_t delta)
|
||||
{
|
||||
uint32_t old;
|
||||
|
||||
/*
|
||||
* Correctness of this racy write depends on actually
|
||||
* having an update to write. Exit here if the update
|
||||
* is a no-op.
|
||||
*/
|
||||
if (CK_CC_UNLIKELY(delta == 0)) {
|
||||
return ck_ec32_value(ec);
|
||||
}
|
||||
|
||||
ck_pr_fence_store();
|
||||
old = delta;
|
||||
__asm__ volatile("xaddl %1, %0"
|
||||
: "+m"(ec->counter), "+r"(old)
|
||||
:: "cc", "memory");
|
||||
return ck_ec32_add_epilogue(ec, mode, old);
|
||||
}
|
||||
#endif /* CK_F_EC_SP */
|
||||
|
||||
CK_CC_FORCE_INLINE uint32_t ck_ec32_add(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint32_t delta)
|
||||
{
|
||||
#ifdef CK_F_EC_SP
|
||||
if (mode->single_producer == true) {
|
||||
return ck_ec32_add_sp(ec, mode, delta);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ck_ec32_add_mp(ec, mode, delta);
|
||||
}
|
||||
|
||||
int ck_ec_deadline_impl(struct timespec *new_deadline,
|
||||
const struct ck_ec_ops *ops,
|
||||
const struct timespec *timeout);
|
||||
|
||||
CK_CC_FORCE_INLINE int ck_ec_deadline(struct timespec *new_deadline,
|
||||
const struct ck_ec_mode *mode,
|
||||
const struct timespec *timeout)
|
||||
{
|
||||
return ck_ec_deadline_impl(new_deadline, mode->ops, timeout);
|
||||
}
|
||||
|
||||
|
||||
int ck_ec32_wait_slow(struct ck_ec32 *ec,
|
||||
const struct ck_ec_ops *ops,
|
||||
uint32_t old_value,
|
||||
const struct timespec *deadline);
|
||||
|
||||
CK_CC_FORCE_INLINE int ck_ec32_wait(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint32_t old_value,
|
||||
const struct timespec *deadline)
|
||||
{
|
||||
if (ck_ec32_value(ec) != old_value) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ck_ec32_wait_slow(ec, mode->ops, old_value, deadline);
|
||||
}
|
||||
|
||||
int ck_ec32_wait_pred_slow(struct ck_ec32 *ec,
|
||||
const struct ck_ec_ops *ops,
|
||||
uint32_t old_value,
|
||||
int (*pred)(const struct ck_ec_wait_state *state,
|
||||
struct timespec *deadline),
|
||||
void *data,
|
||||
const struct timespec *deadline);
|
||||
|
||||
CK_CC_FORCE_INLINE int
|
||||
ck_ec32_wait_pred(struct ck_ec32 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint32_t old_value,
|
||||
int (*pred)(const struct ck_ec_wait_state *state,
|
||||
struct timespec *deadline),
|
||||
void *data,
|
||||
const struct timespec *deadline)
|
||||
{
|
||||
if (ck_ec32_value(ec) != old_value) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ck_ec32_wait_pred_slow(ec, mode->ops, old_value,
|
||||
pred, data, deadline);
|
||||
}
|
||||
|
||||
#ifdef CK_F_EC64
|
||||
CK_CC_FORCE_INLINE void ck_ec64_init(struct ck_ec64 *ec, uint64_t value)
|
||||
{
|
||||
ec->counter = value << 1;
|
||||
return;
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE uint64_t ck_ec64_value(const struct ck_ec64 *ec)
|
||||
{
|
||||
uint64_t ret = ck_pr_load_64(&ec->counter) >> 1;
|
||||
|
||||
ck_pr_fence_acquire();
|
||||
return ret;
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE bool ck_ec64_has_waiters(const struct ck_ec64 *ec)
|
||||
{
|
||||
return ck_pr_load_64(&ec->counter) & 1;
|
||||
}
|
||||
|
||||
void ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops);
|
||||
|
||||
CK_CC_FORCE_INLINE void ck_ec64_inc(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode)
|
||||
{
|
||||
/* We always xadd, so there's no special optimization here. */
|
||||
(void)ck_ec64_add(ec, mode, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE uint64_t ck_ec_add64_epilogue(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint64_t old)
|
||||
{
|
||||
uint64_t ret = old >> 1;
|
||||
|
||||
if (CK_CC_UNLIKELY(old & 1)) {
|
||||
ck_ec64_wake(ec, mode->ops);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static CK_CC_INLINE uint64_t ck_ec64_add_mp(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint64_t delta)
|
||||
{
|
||||
uint64_t inc = 2 * delta; /* The low bit is the flag bit. */
|
||||
|
||||
ck_pr_fence_store_atomic();
|
||||
return ck_ec_add64_epilogue(ec, mode, ck_pr_faa_64(&ec->counter, inc));
|
||||
}
|
||||
|
||||
#ifdef CK_F_EC_SP
|
||||
/* Single-producer specialisation. */
|
||||
static CK_CC_INLINE uint64_t ck_ec64_add_sp(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint64_t delta)
|
||||
{
|
||||
uint64_t old;
|
||||
|
||||
/*
|
||||
* Correctness of this racy write depends on actually
|
||||
* having an update to write. Exit here if the update
|
||||
* is a no-op.
|
||||
*/
|
||||
if (CK_CC_UNLIKELY(delta == 0)) {
|
||||
return ck_ec64_value(ec);
|
||||
}
|
||||
|
||||
ck_pr_fence_store();
|
||||
old = 2 * delta; /* The low bit is the flag bit. */
|
||||
__asm__ volatile("xaddq %1, %0"
|
||||
: "+m"(ec->counter), "+r"(old)
|
||||
:: "cc", "memory");
|
||||
return ck_ec_add64_epilogue(ec, mode, old);
|
||||
}
|
||||
#endif /* CK_F_EC_SP */
|
||||
|
||||
/*
|
||||
* Dispatch on mode->single_producer in this FORCE_INLINE function:
|
||||
* the end result is always small, but not all compilers have enough
|
||||
* foresight to inline and get the reduction.
|
||||
*/
|
||||
CK_CC_FORCE_INLINE uint64_t ck_ec64_add(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint64_t delta)
|
||||
{
|
||||
#ifdef CK_F_EC_SP
|
||||
if (mode->single_producer == true) {
|
||||
return ck_ec64_add_sp(ec, mode, delta);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ck_ec64_add_mp(ec, mode, delta);
|
||||
}
|
||||
|
||||
int ck_ec64_wait_slow(struct ck_ec64 *ec,
|
||||
const struct ck_ec_ops *ops,
|
||||
uint64_t old_value,
|
||||
const struct timespec *deadline);
|
||||
|
||||
CK_CC_FORCE_INLINE int ck_ec64_wait(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint64_t old_value,
|
||||
const struct timespec *deadline)
|
||||
{
|
||||
if (ck_ec64_value(ec) != old_value) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ck_ec64_wait_slow(ec, mode->ops, old_value, deadline);
|
||||
}
|
||||
|
||||
int ck_ec64_wait_pred_slow(struct ck_ec64 *ec,
|
||||
const struct ck_ec_ops *ops,
|
||||
uint64_t old_value,
|
||||
int (*pred)(const struct ck_ec_wait_state *state,
|
||||
struct timespec *deadline),
|
||||
void *data,
|
||||
const struct timespec *deadline);
|
||||
|
||||
|
||||
CK_CC_FORCE_INLINE int
|
||||
ck_ec64_wait_pred(struct ck_ec64 *ec,
|
||||
const struct ck_ec_mode *mode,
|
||||
uint64_t old_value,
|
||||
int (*pred)(const struct ck_ec_wait_state *state,
|
||||
struct timespec *deadline),
|
||||
void *data,
|
||||
const struct timespec *deadline)
|
||||
{
|
||||
if (ck_ec64_value(ec) != old_value) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ck_ec64_wait_pred_slow(ec, mode->ops, old_value,
|
||||
pred, data, deadline);
|
||||
}
|
||||
#endif /* CK_F_EC64 */
|
||||
#endif /* !CK_EC_H */
|
|
@ -83,6 +83,7 @@ struct ck_epoch_ref {
|
|||
};
|
||||
|
||||
struct ck_epoch_record {
|
||||
ck_stack_entry_t record_next;
|
||||
struct ck_epoch *global;
|
||||
unsigned int state;
|
||||
unsigned int epoch;
|
||||
|
@ -92,17 +93,16 @@ struct ck_epoch_record {
|
|||
} local CK_CC_CACHELINE;
|
||||
unsigned int n_pending;
|
||||
unsigned int n_peak;
|
||||
unsigned long n_dispatch;
|
||||
unsigned int n_dispatch;
|
||||
void *ct;
|
||||
ck_stack_t pending[CK_EPOCH_LENGTH];
|
||||
ck_stack_entry_t record_next;
|
||||
} CK_CC_CACHELINE;
|
||||
typedef struct ck_epoch_record ck_epoch_record_t;
|
||||
|
||||
struct ck_epoch {
|
||||
unsigned int epoch;
|
||||
char pad[CK_MD_CACHELINE - sizeof(unsigned int)];
|
||||
ck_stack_t records;
|
||||
unsigned int n_free;
|
||||
ck_stack_t records;
|
||||
};
|
||||
typedef struct ck_epoch ck_epoch_t;
|
||||
|
||||
|
@ -110,7 +110,14 @@ typedef struct ck_epoch ck_epoch_t;
|
|||
* Internal functions.
|
||||
*/
|
||||
void _ck_epoch_addref(ck_epoch_record_t *, ck_epoch_section_t *);
|
||||
void _ck_epoch_delref(ck_epoch_record_t *, ck_epoch_section_t *);
|
||||
bool _ck_epoch_delref(ck_epoch_record_t *, ck_epoch_section_t *);
|
||||
|
||||
CK_CC_FORCE_INLINE static void *
|
||||
ck_epoch_record_ct(const ck_epoch_record_t *record)
|
||||
{
|
||||
|
||||
return ck_pr_load_ptr(&record->ct);
|
||||
}
|
||||
|
||||
/*
|
||||
* Marks the beginning of an epoch-protected section.
|
||||
|
@ -160,9 +167,10 @@ ck_epoch_begin(ck_epoch_record_t *record, ck_epoch_section_t *section)
|
|||
}
|
||||
|
||||
/*
|
||||
* Marks the end of an epoch-protected section.
|
||||
* Marks the end of an epoch-protected section. Returns true if no more
|
||||
* sections exist for the caller.
|
||||
*/
|
||||
CK_CC_FORCE_INLINE static void
|
||||
CK_CC_FORCE_INLINE static bool
|
||||
ck_epoch_end(ck_epoch_record_t *record, ck_epoch_section_t *section)
|
||||
{
|
||||
|
||||
|
@ -170,15 +178,19 @@ ck_epoch_end(ck_epoch_record_t *record, ck_epoch_section_t *section)
|
|||
ck_pr_store_uint(&record->active, record->active - 1);
|
||||
|
||||
if (section != NULL)
|
||||
_ck_epoch_delref(record, section);
|
||||
return _ck_epoch_delref(record, section);
|
||||
|
||||
return;
|
||||
return record->active == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Defers the execution of the function pointed to by the "cb"
|
||||
* argument until an epoch counter loop. This allows for a
|
||||
* non-blocking deferral.
|
||||
*
|
||||
* We can get away without a fence here due to the monotonic nature
|
||||
* of the epoch counter. Worst case, this will result in some delays
|
||||
* before object destruction.
|
||||
*/
|
||||
CK_CC_FORCE_INLINE static void
|
||||
ck_epoch_call(ck_epoch_record_t *record,
|
||||
|
@ -195,13 +207,75 @@ ck_epoch_call(ck_epoch_record_t *record,
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Same as ck_epoch_call, but allows for records to be shared and is reentrant.
|
||||
*/
|
||||
CK_CC_FORCE_INLINE static void
|
||||
ck_epoch_call_strict(ck_epoch_record_t *record,
|
||||
ck_epoch_entry_t *entry,
|
||||
ck_epoch_cb_t *function)
|
||||
{
|
||||
struct ck_epoch *epoch = record->global;
|
||||
unsigned int e = ck_pr_load_uint(&epoch->epoch);
|
||||
unsigned int offset = e & (CK_EPOCH_LENGTH - 1);
|
||||
|
||||
ck_pr_inc_uint(&record->n_pending);
|
||||
entry->function = function;
|
||||
|
||||
/* Store fence is implied by push operation. */
|
||||
ck_stack_push_upmc(&record->pending[offset], &entry->stack_entry);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* This callback is used for synchronize_wait to allow for custom blocking
|
||||
* behavior.
|
||||
*/
|
||||
typedef void ck_epoch_wait_cb_t(ck_epoch_t *, ck_epoch_record_t *,
|
||||
void *);
|
||||
|
||||
/*
|
||||
* Return latest epoch value. This operation provides load ordering.
|
||||
*/
|
||||
CK_CC_FORCE_INLINE static unsigned int
|
||||
ck_epoch_value(const ck_epoch_t *ep)
|
||||
{
|
||||
|
||||
ck_pr_fence_load();
|
||||
return ck_pr_load_uint(&ep->epoch);
|
||||
}
|
||||
|
||||
void ck_epoch_init(ck_epoch_t *);
|
||||
ck_epoch_record_t *ck_epoch_recycle(ck_epoch_t *);
|
||||
void ck_epoch_register(ck_epoch_t *, ck_epoch_record_t *);
|
||||
|
||||
/*
|
||||
* Attempts to recycle an unused epoch record. If one is successfully
|
||||
* allocated, the record context pointer is also updated.
|
||||
*/
|
||||
ck_epoch_record_t *ck_epoch_recycle(ck_epoch_t *, void *);
|
||||
|
||||
/*
|
||||
* Registers an epoch record. An optional context pointer may be passed that
|
||||
* is retrievable with ck_epoch_record_ct.
|
||||
*/
|
||||
void ck_epoch_register(ck_epoch_t *, ck_epoch_record_t *, void *);
|
||||
|
||||
/*
|
||||
* Marks a record as available for re-use by a subsequent recycle operation.
|
||||
* Note that the record cannot be physically destroyed.
|
||||
*/
|
||||
void ck_epoch_unregister(ck_epoch_record_t *);
|
||||
|
||||
bool ck_epoch_poll(ck_epoch_record_t *);
|
||||
bool ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred);
|
||||
void ck_epoch_synchronize(ck_epoch_record_t *);
|
||||
void ck_epoch_synchronize_wait(ck_epoch_t *, ck_epoch_wait_cb_t *, void *);
|
||||
void ck_epoch_barrier(ck_epoch_record_t *);
|
||||
void ck_epoch_barrier_wait(ck_epoch_record_t *, ck_epoch_wait_cb_t *, void *);
|
||||
|
||||
/*
|
||||
* Reclaim entries associated with a record. This is safe to call only on
|
||||
* the caller's record or records that are using call_strict.
|
||||
*/
|
||||
void ck_epoch_reclaim(ck_epoch_record_t *);
|
||||
|
||||
#endif /* CK_EPOCH_H */
|
||||
|
|
|
@ -115,7 +115,7 @@ CK_CC_INLINE static void
|
|||
ck_fifo_spsc_deinit(struct ck_fifo_spsc *fifo, struct ck_fifo_spsc_entry **garbage)
|
||||
{
|
||||
|
||||
*garbage = fifo->head;
|
||||
*garbage = fifo->garbage;
|
||||
fifo->head = fifo->tail = NULL;
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -100,18 +100,28 @@ struct ck_hs_stat {
|
|||
struct ck_hs_iterator {
|
||||
void **cursor;
|
||||
unsigned long offset;
|
||||
struct ck_hs_map *map;
|
||||
};
|
||||
typedef struct ck_hs_iterator ck_hs_iterator_t;
|
||||
|
||||
#define CK_HS_ITERATOR_INITIALIZER { NULL, 0 }
|
||||
#define CK_HS_ITERATOR_INITIALIZER { NULL, 0, NULL }
|
||||
|
||||
/* Convenience wrapper to table hash function. */
|
||||
#define CK_HS_HASH(T, F, K) F((K), (T)->seed)
|
||||
|
||||
/* Computes the hash of n bytes of k for the specified hash map. */
|
||||
static inline unsigned long
|
||||
ck_hs_hash(const struct ck_hs *hs, const void *k)
|
||||
{
|
||||
|
||||
return hs->hf(k, hs->seed);
|
||||
}
|
||||
|
||||
typedef void *ck_hs_apply_fn_t(void *, void *);
|
||||
bool ck_hs_apply(ck_hs_t *, unsigned long, const void *, ck_hs_apply_fn_t *, void *);
|
||||
void ck_hs_iterator_init(ck_hs_iterator_t *);
|
||||
bool ck_hs_next(ck_hs_t *, ck_hs_iterator_t *, void **);
|
||||
bool ck_hs_next_spmc(ck_hs_t *, ck_hs_iterator_t *, void **);
|
||||
bool ck_hs_move(ck_hs_t *, ck_hs_t *, ck_hs_hash_cb_t *,
|
||||
ck_hs_compare_cb_t *, struct ck_malloc *);
|
||||
bool ck_hs_init(ck_hs_t *, unsigned int, ck_hs_hash_cb_t *,
|
||||
|
|
|
@ -47,7 +47,15 @@
|
|||
#define @POINTER_PACK_ENABLE@
|
||||
#endif /* @POINTER_PACK_ENABLE@ */
|
||||
|
||||
#ifndef @VMA_BITS@
|
||||
#ifndef @SSE_DISABLE@
|
||||
#define @SSE_DISABLE@
|
||||
#endif /* @SSE_DISABLE@ */
|
||||
|
||||
#ifndef @PPC32_LWSYNC_ENABLE@
|
||||
#define @PPC32_LWSYNC_ENABLE@
|
||||
#endif /* @PPC32_LWSYNC_ENABLE@ */
|
||||
|
||||
#ifndef @VMA_BITS@
|
||||
#define @VMA_BITS@ @VMA_BITS_VALUE@
|
||||
#endif /* @VMA_BITS@ */
|
||||
|
||||
|
|
|
@ -34,7 +34,20 @@
|
|||
#include <ck_stdint.h>
|
||||
#include <ck_stdbool.h>
|
||||
|
||||
#ifndef CK_USE_CC_BUILTINS
|
||||
/*
|
||||
* Default to using builtins for clang analyzer, coverity, and sparse:
|
||||
* inline assembly is often too opaque for useful analysis. Override
|
||||
* the defaults by defining CK_USE_CC_BUILTINS=0 or 1.
|
||||
*/
|
||||
#if !defined(CK_USE_CC_BUILTINS)
|
||||
#if defined(__clang_analyzer__) || defined(__COVERITY__) || defined(__CHECKER__)
|
||||
#define CK_USE_CC_BUILTINS 1
|
||||
#else
|
||||
#define CK_USE_CC_BUILTINS 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !CK_USE_CC_BUILTINS
|
||||
#if defined(__x86_64__)
|
||||
#include "gcc/x86_64/ck_pr.h"
|
||||
#elif defined(__x86__)
|
||||
|
@ -43,6 +56,8 @@
|
|||
#include "gcc/sparcv9/ck_pr.h"
|
||||
#elif defined(__ppc64__)
|
||||
#include "gcc/ppc64/ck_pr.h"
|
||||
#elif defined(__s390x__)
|
||||
#include "gcc/s390x/ck_pr.h"
|
||||
#elif defined(__ppc__)
|
||||
#include "gcc/ppc/ck_pr.h"
|
||||
#elif defined(__arm__)
|
||||
|
@ -613,8 +628,8 @@ CK_PR_BTX_S(bts, 16, uint16_t, |,)
|
|||
}
|
||||
|
||||
#define CK_PR_UNARY_Z(K, S, M, T, P, C, Z) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_##K##_##S##_zero(M *target, bool *zero) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_##K##_##S##_is_zero(M *target) \
|
||||
{ \
|
||||
T previous; \
|
||||
C punt; \
|
||||
|
@ -625,12 +640,21 @@ CK_PR_BTX_S(bts, 16, uint16_t, |,)
|
|||
(C)(previous P 1), \
|
||||
&previous) == false) \
|
||||
ck_pr_stall(); \
|
||||
*zero = previous == (T)Z; \
|
||||
return previous == (T)Z; \
|
||||
}
|
||||
|
||||
#define CK_PR_UNARY_Z_STUB(K, S, M) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_##K##_##S##_zero(M *target, bool *zero) \
|
||||
{ \
|
||||
*zero = ck_pr_##K##_##S##_is_zero(target); \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define CK_PR_UNARY_S(K, X, S, M) CK_PR_UNARY(K, X, S, M, M)
|
||||
#define CK_PR_UNARY_Z_S(K, S, M, P, Z) CK_PR_UNARY_Z(K, S, M, M, P, M, Z)
|
||||
#define CK_PR_UNARY_Z_S(K, S, M, P, Z) \
|
||||
CK_PR_UNARY_Z(K, S, M, M, P, M, Z) \
|
||||
CK_PR_UNARY_Z_STUB(K, S, M)
|
||||
|
||||
#if defined(CK_F_PR_LOAD_CHAR) && defined(CK_F_PR_CAS_CHAR_VALUE)
|
||||
|
||||
|
@ -642,6 +666,8 @@ CK_PR_UNARY_S(inc, add, char, char)
|
|||
#ifndef CK_F_PR_INC_CHAR_ZERO
|
||||
#define CK_F_PR_INC_CHAR_ZERO
|
||||
CK_PR_UNARY_Z_S(inc, char, char, +, -1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(inc, char, char)
|
||||
#endif /* CK_F_PR_INC_CHAR_ZERO */
|
||||
|
||||
#ifndef CK_F_PR_DEC_CHAR
|
||||
|
@ -652,6 +678,8 @@ CK_PR_UNARY_S(dec, sub, char, char)
|
|||
#ifndef CK_F_PR_DEC_CHAR_ZERO
|
||||
#define CK_F_PR_DEC_CHAR_ZERO
|
||||
CK_PR_UNARY_Z_S(dec, char, char, -, 1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(dec, char, char)
|
||||
#endif /* CK_F_PR_DEC_CHAR_ZERO */
|
||||
|
||||
#endif /* CK_F_PR_LOAD_CHAR && CK_F_PR_CAS_CHAR_VALUE */
|
||||
|
@ -666,6 +694,8 @@ CK_PR_UNARY_S(inc, add, int, int)
|
|||
#ifndef CK_F_PR_INC_INT_ZERO
|
||||
#define CK_F_PR_INC_INT_ZERO
|
||||
CK_PR_UNARY_Z_S(inc, int, int, +, -1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(inc, int, int)
|
||||
#endif /* CK_F_PR_INC_INT_ZERO */
|
||||
|
||||
#ifndef CK_F_PR_DEC_INT
|
||||
|
@ -676,6 +706,8 @@ CK_PR_UNARY_S(dec, sub, int, int)
|
|||
#ifndef CK_F_PR_DEC_INT_ZERO
|
||||
#define CK_F_PR_DEC_INT_ZERO
|
||||
CK_PR_UNARY_Z_S(dec, int, int, -, 1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(dec, int, int)
|
||||
#endif /* CK_F_PR_DEC_INT_ZERO */
|
||||
|
||||
#endif /* CK_F_PR_LOAD_INT && CK_F_PR_CAS_INT_VALUE */
|
||||
|
@ -705,6 +737,8 @@ CK_PR_UNARY_S(inc, add, uint, unsigned int)
|
|||
#ifndef CK_F_PR_INC_UINT_ZERO
|
||||
#define CK_F_PR_INC_UINT_ZERO
|
||||
CK_PR_UNARY_Z_S(inc, uint, unsigned int, +, UINT_MAX)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(inc, uint, unsigned int)
|
||||
#endif /* CK_F_PR_INC_UINT_ZERO */
|
||||
|
||||
#ifndef CK_F_PR_DEC_UINT
|
||||
|
@ -715,6 +749,8 @@ CK_PR_UNARY_S(dec, sub, uint, unsigned int)
|
|||
#ifndef CK_F_PR_DEC_UINT_ZERO
|
||||
#define CK_F_PR_DEC_UINT_ZERO
|
||||
CK_PR_UNARY_Z_S(dec, uint, unsigned int, -, 1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(dec, uint, unsigned int)
|
||||
#endif /* CK_F_PR_DEC_UINT_ZERO */
|
||||
|
||||
#endif /* CK_F_PR_LOAD_UINT && CK_F_PR_CAS_UINT_VALUE */
|
||||
|
@ -729,6 +765,8 @@ CK_PR_UNARY(inc, add, ptr, void, uintptr_t)
|
|||
#ifndef CK_F_PR_INC_PTR_ZERO
|
||||
#define CK_F_PR_INC_PTR_ZERO
|
||||
CK_PR_UNARY_Z(inc, ptr, void, uintptr_t, +, void *, UINT_MAX)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(inc, ptr, void)
|
||||
#endif /* CK_F_PR_INC_PTR_ZERO */
|
||||
|
||||
#ifndef CK_F_PR_DEC_PTR
|
||||
|
@ -739,6 +777,8 @@ CK_PR_UNARY(dec, sub, ptr, void, uintptr_t)
|
|||
#ifndef CK_F_PR_DEC_PTR_ZERO
|
||||
#define CK_F_PR_DEC_PTR_ZERO
|
||||
CK_PR_UNARY_Z(dec, ptr, void, uintptr_t, -, void *, 1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(dec, ptr, void)
|
||||
#endif /* CK_F_PR_DEC_PTR_ZERO */
|
||||
|
||||
#endif /* CK_F_PR_LOAD_PTR && CK_F_PR_CAS_PTR_VALUE */
|
||||
|
@ -753,6 +793,8 @@ CK_PR_UNARY_S(inc, add, 64, uint64_t)
|
|||
#ifndef CK_F_PR_INC_64_ZERO
|
||||
#define CK_F_PR_INC_64_ZERO
|
||||
CK_PR_UNARY_Z_S(inc, 64, uint64_t, +, UINT64_MAX)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(inc, 64, uint64_t)
|
||||
#endif /* CK_F_PR_INC_64_ZERO */
|
||||
|
||||
#ifndef CK_F_PR_DEC_64
|
||||
|
@ -763,6 +805,8 @@ CK_PR_UNARY_S(dec, sub, 64, uint64_t)
|
|||
#ifndef CK_F_PR_DEC_64_ZERO
|
||||
#define CK_F_PR_DEC_64_ZERO
|
||||
CK_PR_UNARY_Z_S(dec, 64, uint64_t, -, 1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(dec, 64, uint64_t)
|
||||
#endif /* CK_F_PR_DEC_64_ZERO */
|
||||
|
||||
#endif /* CK_F_PR_LOAD_64 && CK_F_PR_CAS_64_VALUE */
|
||||
|
@ -777,6 +821,8 @@ CK_PR_UNARY_S(inc, add, 32, uint32_t)
|
|||
#ifndef CK_F_PR_INC_32_ZERO
|
||||
#define CK_F_PR_INC_32_ZERO
|
||||
CK_PR_UNARY_Z_S(inc, 32, uint32_t, +, UINT32_MAX)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(inc, 32, uint32_t)
|
||||
#endif /* CK_F_PR_INC_32_ZERO */
|
||||
|
||||
#ifndef CK_F_PR_DEC_32
|
||||
|
@ -787,6 +833,8 @@ CK_PR_UNARY_S(dec, sub, 32, uint32_t)
|
|||
#ifndef CK_F_PR_DEC_32_ZERO
|
||||
#define CK_F_PR_DEC_32_ZERO
|
||||
CK_PR_UNARY_Z_S(dec, 32, uint32_t, -, 1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(dec, 32, uint32_t)
|
||||
#endif /* CK_F_PR_DEC_32_ZERO */
|
||||
|
||||
#endif /* CK_F_PR_LOAD_32 && CK_F_PR_CAS_32_VALUE */
|
||||
|
@ -801,6 +849,8 @@ CK_PR_UNARY_S(inc, add, 16, uint16_t)
|
|||
#ifndef CK_F_PR_INC_16_ZERO
|
||||
#define CK_F_PR_INC_16_ZERO
|
||||
CK_PR_UNARY_Z_S(inc, 16, uint16_t, +, UINT16_MAX)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(inc, 16, uint16_t)
|
||||
#endif /* CK_F_PR_INC_16_ZERO */
|
||||
|
||||
#ifndef CK_F_PR_DEC_16
|
||||
|
@ -811,6 +861,8 @@ CK_PR_UNARY_S(dec, sub, 16, uint16_t)
|
|||
#ifndef CK_F_PR_DEC_16_ZERO
|
||||
#define CK_F_PR_DEC_16_ZERO
|
||||
CK_PR_UNARY_Z_S(dec, 16, uint16_t, -, 1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(dec, 16, uint16_t)
|
||||
#endif /* CK_F_PR_DEC_16_ZERO */
|
||||
|
||||
#endif /* CK_F_PR_LOAD_16 && CK_F_PR_CAS_16_VALUE */
|
||||
|
@ -825,6 +877,8 @@ CK_PR_UNARY_S(inc, add, 8, uint8_t)
|
|||
#ifndef CK_F_PR_INC_8_ZERO
|
||||
#define CK_F_PR_INC_8_ZERO
|
||||
CK_PR_UNARY_Z_S(inc, 8, uint8_t, +, UINT8_MAX)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(inc, 8, uint8_t)
|
||||
#endif /* CK_F_PR_INC_8_ZERO */
|
||||
|
||||
#ifndef CK_F_PR_DEC_8
|
||||
|
@ -835,6 +889,8 @@ CK_PR_UNARY_S(dec, sub, 8, uint8_t)
|
|||
#ifndef CK_F_PR_DEC_8_ZERO
|
||||
#define CK_F_PR_DEC_8_ZERO
|
||||
CK_PR_UNARY_Z_S(dec, 8, uint8_t, -, 1)
|
||||
#else
|
||||
CK_PR_UNARY_Z_STUB(dec, 8, uint8_t)
|
||||
#endif /* CK_F_PR_DEC_8_ZERO */
|
||||
|
||||
#endif /* CK_F_PR_LOAD_8 && CK_F_PR_CAS_8_VALUE */
|
||||
|
|
|
@ -125,7 +125,7 @@
|
|||
*/
|
||||
#define CK_SLIST_HEAD(name, type) \
|
||||
struct name { \
|
||||
struct type *slh_first; /* first element */ \
|
||||
struct type *cslh_first; /* first element */ \
|
||||
}
|
||||
|
||||
#define CK_SLIST_HEAD_INITIALIZER(head) \
|
||||
|
@ -133,85 +133,95 @@ struct name { \
|
|||
|
||||
#define CK_SLIST_ENTRY(type) \
|
||||
struct { \
|
||||
struct type *sle_next; /* next element */ \
|
||||
struct type *csle_next; /* next element */ \
|
||||
}
|
||||
|
||||
/*
|
||||
* Singly-linked List functions.
|
||||
*/
|
||||
#define CK_SLIST_EMPTY(head) \
|
||||
(ck_pr_load_ptr(&(head)->slh_first) == NULL)
|
||||
(ck_pr_load_ptr(&(head)->cslh_first) == NULL)
|
||||
|
||||
#define CK_SLIST_FIRST(head) \
|
||||
(ck_pr_load_ptr(&(head)->slh_first))
|
||||
(ck_pr_load_ptr(&(head)->cslh_first))
|
||||
|
||||
#define CK_SLIST_NEXT(elm, field) \
|
||||
ck_pr_load_ptr(&((elm)->field.sle_next))
|
||||
ck_pr_load_ptr(&((elm)->field.csle_next))
|
||||
|
||||
#define CK_SLIST_FOREACH(var, head, field) \
|
||||
for ((var) = CK_SLIST_FIRST((head)); \
|
||||
(var) && (ck_pr_fence_load(), 1); \
|
||||
(var); \
|
||||
(var) = CK_SLIST_NEXT((var), field))
|
||||
|
||||
#define CK_SLIST_FOREACH_SAFE(var, head, field, tvar) \
|
||||
for ((var) = CK_SLIST_FIRST(head); \
|
||||
(var) && (ck_pr_fence_load(), (tvar) = CK_SLIST_NEXT(var, field), 1);\
|
||||
#define CK_SLIST_FOREACH_SAFE(var, head, field, tvar) \
|
||||
for ((var) = CK_SLIST_FIRST(head); \
|
||||
(var) && ((tvar) = CK_SLIST_NEXT(var, field), 1); \
|
||||
(var) = (tvar))
|
||||
|
||||
#define CK_SLIST_FOREACH_PREVPTR(var, varp, head, field) \
|
||||
for ((varp) = &(head)->slh_first; \
|
||||
((var) = ck_pr_load_ptr(varp)) != NULL && (ck_pr_fence_load(), 1); \
|
||||
(varp) = &(var)->field.sle_next)
|
||||
for ((varp) = &(head)->cslh_first; \
|
||||
((var) = ck_pr_load_ptr(varp)) != NULL; \
|
||||
(varp) = &(var)->field.csle_next)
|
||||
|
||||
#define CK_SLIST_INIT(head) do { \
|
||||
ck_pr_store_ptr(&(head)->slh_first, NULL); \
|
||||
ck_pr_store_ptr(&(head)->cslh_first, NULL); \
|
||||
ck_pr_fence_store(); \
|
||||
} while (0)
|
||||
|
||||
#define CK_SLIST_INSERT_AFTER(a, b, field) do { \
|
||||
(b)->field.sle_next = (a)->field.sle_next; \
|
||||
(b)->field.csle_next = (a)->field.csle_next; \
|
||||
ck_pr_fence_store(); \
|
||||
ck_pr_store_ptr(&(a)->field.sle_next, b); \
|
||||
ck_pr_store_ptr(&(a)->field.csle_next, b); \
|
||||
} while (0)
|
||||
|
||||
#define CK_SLIST_INSERT_HEAD(head, elm, field) do { \
|
||||
(elm)->field.sle_next = (head)->slh_first; \
|
||||
(elm)->field.csle_next = (head)->cslh_first; \
|
||||
ck_pr_fence_store(); \
|
||||
ck_pr_store_ptr(&(head)->slh_first, elm); \
|
||||
ck_pr_store_ptr(&(head)->cslh_first, elm); \
|
||||
} while (0)
|
||||
|
||||
#define CK_SLIST_INSERT_PREVPTR(prevp, slistelm, elm, field) do { \
|
||||
(elm)->field.csle_next = (slistelm); \
|
||||
ck_pr_fence_store(); \
|
||||
ck_pr_store_ptr(prevp, elm); \
|
||||
} while (0)
|
||||
|
||||
#define CK_SLIST_REMOVE_AFTER(elm, field) do { \
|
||||
ck_pr_store_ptr(&(elm)->field.sle_next, \
|
||||
(elm)->field.sle_next->field.sle_next); \
|
||||
ck_pr_store_ptr(&(elm)->field.csle_next, \
|
||||
(elm)->field.csle_next->field.csle_next); \
|
||||
} while (0)
|
||||
|
||||
#define CK_SLIST_REMOVE(head, elm, type, field) do { \
|
||||
if ((head)->slh_first == (elm)) { \
|
||||
if ((head)->cslh_first == (elm)) { \
|
||||
CK_SLIST_REMOVE_HEAD((head), field); \
|
||||
} else { \
|
||||
struct type *curelm = (head)->slh_first; \
|
||||
while (curelm->field.sle_next != (elm)) \
|
||||
curelm = curelm->field.sle_next; \
|
||||
struct type *curelm = (head)->cslh_first; \
|
||||
while (curelm->field.csle_next != (elm)) \
|
||||
curelm = curelm->field.csle_next; \
|
||||
CK_SLIST_REMOVE_AFTER(curelm, field); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CK_SLIST_REMOVE_HEAD(head, field) do { \
|
||||
ck_pr_store_ptr(&(head)->slh_first, \
|
||||
(head)->slh_first->field.sle_next); \
|
||||
ck_pr_store_ptr(&(head)->cslh_first, \
|
||||
(head)->cslh_first->field.csle_next); \
|
||||
} while (0)
|
||||
|
||||
#define CK_SLIST_REMOVE_PREVPTR(prevp, elm, field) do { \
|
||||
ck_pr_store_ptr(prevptr, (elm)->field.csle_next); \
|
||||
} while (0)
|
||||
|
||||
#define CK_SLIST_MOVE(head1, head2, field) do { \
|
||||
ck_pr_store_ptr(&(head1)->slh_first, (head2)->slh_first); \
|
||||
ck_pr_store_ptr(&(head1)->cslh_first, (head2)->cslh_first); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* This operation is not applied atomically.
|
||||
*/
|
||||
#define CK_SLIST_SWAP(a, b, type) do { \
|
||||
struct type *swap_first = (a)->slh_first; \
|
||||
(a)->slh_first = (b)->slh_first; \
|
||||
(b)->slh_first = swap_first; \
|
||||
struct type *swap_first = (a)->cslh_first; \
|
||||
(a)->cslh_first = (b)->cslh_first; \
|
||||
(b)->cslh_first = swap_first; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
@ -219,107 +229,107 @@ struct { \
|
|||
*/
|
||||
#define CK_STAILQ_HEAD(name, type) \
|
||||
struct name { \
|
||||
struct type *stqh_first;/* first element */ \
|
||||
struct type **stqh_last;/* addr of last next element */ \
|
||||
struct type *cstqh_first;/* first element */ \
|
||||
struct type **cstqh_last;/* addr of last next element */ \
|
||||
}
|
||||
|
||||
#define CK_STAILQ_HEAD_INITIALIZER(head) \
|
||||
{ NULL, &(head).stqh_first }
|
||||
{ NULL, &(head).cstqh_first }
|
||||
|
||||
#define CK_STAILQ_ENTRY(type) \
|
||||
struct { \
|
||||
struct type *stqe_next; /* next element */ \
|
||||
struct type *cstqe_next; /* next element */ \
|
||||
}
|
||||
|
||||
/*
|
||||
* Singly-linked Tail queue functions.
|
||||
*/
|
||||
#define CK_STAILQ_CONCAT(head1, head2) do { \
|
||||
if ((head2)->stqh_first == NULL) { \
|
||||
ck_pr_store_ptr((head1)->stqh_last, (head2)->stqh_first); \
|
||||
if ((head2)->cstqh_first != NULL) { \
|
||||
ck_pr_store_ptr((head1)->cstqh_last, (head2)->cstqh_first); \
|
||||
ck_pr_fence_store(); \
|
||||
(head1)->stqh_last = (head2)->stqh_last; \
|
||||
(head1)->cstqh_last = (head2)->cstqh_last; \
|
||||
CK_STAILQ_INIT((head2)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CK_STAILQ_EMPTY(head) (ck_pr_load_ptr(&(head)->stqh_first) == NULL)
|
||||
#define CK_STAILQ_EMPTY(head) (ck_pr_load_ptr(&(head)->cstqh_first) == NULL)
|
||||
|
||||
#define CK_STAILQ_FIRST(head) (ck_pr_load_ptr(&(head)->stqh_first))
|
||||
#define CK_STAILQ_FIRST(head) (ck_pr_load_ptr(&(head)->cstqh_first))
|
||||
|
||||
#define CK_STAILQ_FOREACH(var, head, field) \
|
||||
for((var) = CK_STAILQ_FIRST((head)); \
|
||||
(var) && (ck_pr_fence_load(), 1); \
|
||||
(var); \
|
||||
(var) = CK_STAILQ_NEXT((var), field))
|
||||
|
||||
#define CK_STAILQ_FOREACH_SAFE(var, head, field, tvar) \
|
||||
for ((var) = CK_STAILQ_FIRST((head)); \
|
||||
(var) && (ck_pr_fence_load(), (tvar) = \
|
||||
(var) && ((tvar) = \
|
||||
CK_STAILQ_NEXT((var), field), 1); \
|
||||
(var) = (tvar))
|
||||
|
||||
#define CK_STAILQ_INIT(head) do { \
|
||||
ck_pr_store_ptr(&(head)->stqh_first, NULL); \
|
||||
ck_pr_store_ptr(&(head)->cstqh_first, NULL); \
|
||||
ck_pr_fence_store(); \
|
||||
(head)->stqh_last = &(head)->stqh_first; \
|
||||
(head)->cstqh_last = &(head)->cstqh_first; \
|
||||
} while (0)
|
||||
|
||||
#define CK_STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \
|
||||
(elm)->field.stqe_next = (tqelm)->field.stqe_next; \
|
||||
(elm)->field.cstqe_next = (tqelm)->field.cstqe_next; \
|
||||
ck_pr_fence_store(); \
|
||||
ck_pr_store_ptr(&(tqelm)->field.stqe_next, elm); \
|
||||
if ((elm)->field.stqe_next == NULL) \
|
||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
||||
ck_pr_store_ptr(&(tqelm)->field.cstqe_next, elm); \
|
||||
if ((elm)->field.cstqe_next == NULL) \
|
||||
(head)->cstqh_last = &(elm)->field.cstqe_next; \
|
||||
} while (0)
|
||||
|
||||
#define CK_STAILQ_INSERT_HEAD(head, elm, field) do { \
|
||||
(elm)->field.stqe_next = (head)->stqh_first; \
|
||||
(elm)->field.cstqe_next = (head)->cstqh_first; \
|
||||
ck_pr_fence_store(); \
|
||||
ck_pr_store_ptr(&(head)->stqh_first, elm); \
|
||||
if ((elm)->field.stqe_next == NULL) \
|
||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
||||
ck_pr_store_ptr(&(head)->cstqh_first, elm); \
|
||||
if ((elm)->field.cstqe_next == NULL) \
|
||||
(head)->cstqh_last = &(elm)->field.cstqe_next; \
|
||||
} while (0)
|
||||
|
||||
#define CK_STAILQ_INSERT_TAIL(head, elm, field) do { \
|
||||
(elm)->field.stqe_next = NULL; \
|
||||
(elm)->field.cstqe_next = NULL; \
|
||||
ck_pr_fence_store(); \
|
||||
ck_pr_store_ptr((head)->stqh_last, (elm)); \
|
||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
||||
ck_pr_store_ptr((head)->cstqh_last, (elm)); \
|
||||
(head)->cstqh_last = &(elm)->field.cstqe_next; \
|
||||
} while (0)
|
||||
|
||||
#define CK_STAILQ_NEXT(elm, field) \
|
||||
(ck_pr_load_ptr(&(elm)->field.stqe_next))
|
||||
(ck_pr_load_ptr(&(elm)->field.cstqe_next))
|
||||
|
||||
#define CK_STAILQ_REMOVE(head, elm, type, field) do { \
|
||||
if ((head)->stqh_first == (elm)) { \
|
||||
if ((head)->cstqh_first == (elm)) { \
|
||||
CK_STAILQ_REMOVE_HEAD((head), field); \
|
||||
} else { \
|
||||
struct type *curelm = (head)->stqh_first; \
|
||||
while (curelm->field.stqe_next != (elm)) \
|
||||
curelm = curelm->field.stqe_next; \
|
||||
struct type *curelm = (head)->cstqh_first; \
|
||||
while (curelm->field.cstqe_next != (elm)) \
|
||||
curelm = curelm->field.cstqe_next; \
|
||||
CK_STAILQ_REMOVE_AFTER(head, curelm, field); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CK_STAILQ_REMOVE_AFTER(head, elm, field) do { \
|
||||
ck_pr_store_ptr(&(elm)->field.stqe_next, \
|
||||
(elm)->field.stqe_next->field.stqe_next); \
|
||||
if ((elm)->field.stqe_next == NULL) \
|
||||
(head)->stqh_last = &(elm)->field.stqe_next; \
|
||||
ck_pr_store_ptr(&(elm)->field.cstqe_next, \
|
||||
(elm)->field.cstqe_next->field.cstqe_next); \
|
||||
if ((elm)->field.cstqe_next == NULL) \
|
||||
(head)->cstqh_last = &(elm)->field.cstqe_next; \
|
||||
} while (0)
|
||||
|
||||
#define CK_STAILQ_REMOVE_HEAD(head, field) do { \
|
||||
ck_pr_store_ptr(&(head)->stqh_first, \
|
||||
(head)->stqh_first->field.stqe_next); \
|
||||
if ((head)->stqh_first == NULL) \
|
||||
(head)->stqh_last = &(head)->stqh_first; \
|
||||
ck_pr_store_ptr(&(head)->cstqh_first, \
|
||||
(head)->cstqh_first->field.cstqe_next); \
|
||||
if ((head)->cstqh_first == NULL) \
|
||||
(head)->cstqh_last = &(head)->cstqh_first; \
|
||||
} while (0)
|
||||
|
||||
#define CK_STAILQ_MOVE(head1, head2, field) do { \
|
||||
ck_pr_store_ptr(&(head1)->stqh_first, (head2)->stqh_first); \
|
||||
(head1)->stqh_last = (head2)->stqh_last; \
|
||||
if ((head2)->stqh_last == &(head2)->stqh_first) \
|
||||
(head1)->stqh_last = &(head1)->stqh_first; \
|
||||
ck_pr_store_ptr(&(head1)->cstqh_first, (head2)->cstqh_first); \
|
||||
(head1)->cstqh_last = (head2)->cstqh_last; \
|
||||
if ((head2)->cstqh_last == &(head2)->cstqh_first) \
|
||||
(head1)->cstqh_last = &(head1)->cstqh_first; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
@ -327,15 +337,15 @@ struct { \
|
|||
*/
|
||||
#define CK_STAILQ_SWAP(head1, head2, type) do { \
|
||||
struct type *swap_first = CK_STAILQ_FIRST(head1); \
|
||||
struct type **swap_last = (head1)->stqh_last; \
|
||||
struct type **swap_last = (head1)->cstqh_last; \
|
||||
CK_STAILQ_FIRST(head1) = CK_STAILQ_FIRST(head2); \
|
||||
(head1)->stqh_last = (head2)->stqh_last; \
|
||||
(head1)->cstqh_last = (head2)->cstqh_last; \
|
||||
CK_STAILQ_FIRST(head2) = swap_first; \
|
||||
(head2)->stqh_last = swap_last; \
|
||||
(head2)->cstqh_last = swap_last; \
|
||||
if (CK_STAILQ_EMPTY(head1)) \
|
||||
(head1)->stqh_last = &(head1)->stqh_first; \
|
||||
(head1)->cstqh_last = &(head1)->cstqh_first; \
|
||||
if (CK_STAILQ_EMPTY(head2)) \
|
||||
(head2)->stqh_last = &(head2)->stqh_first; \
|
||||
(head2)->cstqh_last = &(head2)->cstqh_first; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
@ -343,7 +353,7 @@ struct { \
|
|||
*/
|
||||
#define CK_LIST_HEAD(name, type) \
|
||||
struct name { \
|
||||
struct type *lh_first; /* first element */ \
|
||||
struct type *clh_first; /* first element */ \
|
||||
}
|
||||
|
||||
#define CK_LIST_HEAD_INITIALIZER(head) \
|
||||
|
@ -351,78 +361,78 @@ struct name { \
|
|||
|
||||
#define CK_LIST_ENTRY(type) \
|
||||
struct { \
|
||||
struct type *le_next; /* next element */ \
|
||||
struct type **le_prev; /* address of previous next element */ \
|
||||
struct type *cle_next; /* next element */ \
|
||||
struct type **cle_prev; /* address of previous next element */ \
|
||||
}
|
||||
|
||||
#define CK_LIST_FIRST(head) ck_pr_load_ptr(&(head)->lh_first)
|
||||
#define CK_LIST_FIRST(head) ck_pr_load_ptr(&(head)->clh_first)
|
||||
#define CK_LIST_EMPTY(head) (CK_LIST_FIRST(head) == NULL)
|
||||
#define CK_LIST_NEXT(elm, field) ck_pr_load_ptr(&(elm)->field.le_next)
|
||||
#define CK_LIST_NEXT(elm, field) ck_pr_load_ptr(&(elm)->field.cle_next)
|
||||
|
||||
#define CK_LIST_FOREACH(var, head, field) \
|
||||
for ((var) = CK_LIST_FIRST((head)); \
|
||||
(var) && (ck_pr_fence_load(), 1); \
|
||||
(var); \
|
||||
(var) = CK_LIST_NEXT((var), field))
|
||||
|
||||
#define CK_LIST_FOREACH_SAFE(var, head, field, tvar) \
|
||||
for ((var) = CK_LIST_FIRST((head)); \
|
||||
(var) && (ck_pr_fence_load(), (tvar) = CK_LIST_NEXT((var), field), 1);\
|
||||
(var) && ((tvar) = CK_LIST_NEXT((var), field), 1); \
|
||||
(var) = (tvar))
|
||||
|
||||
#define CK_LIST_INIT(head) do { \
|
||||
ck_pr_store_ptr(&(head)->lh_first, NULL); \
|
||||
ck_pr_store_ptr(&(head)->clh_first, NULL); \
|
||||
ck_pr_fence_store(); \
|
||||
} while (0)
|
||||
|
||||
#define CK_LIST_INSERT_AFTER(listelm, elm, field) do { \
|
||||
(elm)->field.le_next = (listelm)->field.le_next; \
|
||||
(elm)->field.le_prev = &(listelm)->field.le_next; \
|
||||
(elm)->field.cle_next = (listelm)->field.cle_next; \
|
||||
(elm)->field.cle_prev = &(listelm)->field.cle_next; \
|
||||
ck_pr_fence_store(); \
|
||||
if ((listelm)->field.le_next != NULL) \
|
||||
(listelm)->field.le_next->field.le_prev = &(elm)->field.le_next;\
|
||||
ck_pr_store_ptr(&(listelm)->field.le_next, elm); \
|
||||
if ((listelm)->field.cle_next != NULL) \
|
||||
(listelm)->field.cle_next->field.cle_prev = &(elm)->field.cle_next;\
|
||||
ck_pr_store_ptr(&(listelm)->field.cle_next, elm); \
|
||||
} while (0)
|
||||
|
||||
#define CK_LIST_INSERT_BEFORE(listelm, elm, field) do { \
|
||||
(elm)->field.le_prev = (listelm)->field.le_prev; \
|
||||
(elm)->field.le_next = (listelm); \
|
||||
(elm)->field.cle_prev = (listelm)->field.cle_prev; \
|
||||
(elm)->field.cle_next = (listelm); \
|
||||
ck_pr_fence_store(); \
|
||||
ck_pr_store_ptr((listelm)->field.le_prev, (elm)); \
|
||||
(listelm)->field.le_prev = &(elm)->field.le_next; \
|
||||
ck_pr_store_ptr((listelm)->field.cle_prev, (elm)); \
|
||||
(listelm)->field.cle_prev = &(elm)->field.cle_next; \
|
||||
} while (0)
|
||||
|
||||
#define CK_LIST_INSERT_HEAD(head, elm, field) do { \
|
||||
(elm)->field.le_next = (head)->lh_first; \
|
||||
(elm)->field.cle_next = (head)->clh_first; \
|
||||
ck_pr_fence_store(); \
|
||||
if ((elm)->field.le_next != NULL) \
|
||||
(head)->lh_first->field.le_prev = &(elm)->field.le_next; \
|
||||
ck_pr_store_ptr(&(head)->lh_first, elm); \
|
||||
(elm)->field.le_prev = &(head)->lh_first; \
|
||||
if ((elm)->field.cle_next != NULL) \
|
||||
(head)->clh_first->field.cle_prev = &(elm)->field.cle_next; \
|
||||
ck_pr_store_ptr(&(head)->clh_first, elm); \
|
||||
(elm)->field.cle_prev = &(head)->clh_first; \
|
||||
} while (0)
|
||||
|
||||
#define CK_LIST_REMOVE(elm, field) do { \
|
||||
ck_pr_store_ptr((elm)->field.le_prev, (elm)->field.le_next); \
|
||||
if ((elm)->field.le_next != NULL) \
|
||||
(elm)->field.le_next->field.le_prev = (elm)->field.le_prev; \
|
||||
ck_pr_store_ptr((elm)->field.cle_prev, (elm)->field.cle_next); \
|
||||
if ((elm)->field.cle_next != NULL) \
|
||||
(elm)->field.cle_next->field.cle_prev = (elm)->field.cle_prev; \
|
||||
} while (0)
|
||||
|
||||
#define CK_LIST_MOVE(head1, head2, field) do { \
|
||||
ck_pr_store_ptr(&(head1)->lh_first, (head2)->lh_first); \
|
||||
if ((head1)->lh_first != NULL) \
|
||||
(head1)->lh_first->field.le_prev = &(head1)->lh_first; \
|
||||
ck_pr_store_ptr(&(head1)->clh_first, (head2)->clh_first); \
|
||||
if ((head1)->clh_first != NULL) \
|
||||
(head1)->clh_first->field.cle_prev = &(head1)->clh_first; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* This operation is not applied atomically.
|
||||
*/
|
||||
#define CK_LIST_SWAP(head1, head2, type, field) do { \
|
||||
struct type *swap_tmp = (head1)->lh_first; \
|
||||
(head1)->lh_first = (head2)->lh_first; \
|
||||
(head2)->lh_first = swap_tmp; \
|
||||
if ((swap_tmp = (head1)->lh_first) != NULL) \
|
||||
swap_tmp->field.le_prev = &(head1)->lh_first; \
|
||||
if ((swap_tmp = (head2)->lh_first) != NULL) \
|
||||
swap_tmp->field.le_prev = &(head2)->lh_first; \
|
||||
struct type *swap_tmp = (head1)->clh_first; \
|
||||
(head1)->clh_first = (head2)->clh_first; \
|
||||
(head2)->clh_first = swap_tmp; \
|
||||
if ((swap_tmp = (head1)->clh_first) != NULL) \
|
||||
swap_tmp->field.cle_prev = &(head1)->clh_first; \
|
||||
if ((swap_tmp = (head2)->clh_first) != NULL) \
|
||||
swap_tmp->field.cle_prev = &(head2)->clh_first; \
|
||||
} while (0)
|
||||
|
||||
#endif /* CK_QUEUE_H */
|
||||
|
|
|
@ -66,9 +66,56 @@ ck_ring_size(const struct ck_ring *ring)
|
|||
CK_CC_INLINE static unsigned int
|
||||
ck_ring_capacity(const struct ck_ring *ring)
|
||||
{
|
||||
|
||||
return ring->size;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is only safe to call when there are no concurrent operations
|
||||
* on the ring. This is primarily meant for persistent ck_ring use-cases. The
|
||||
* function returns true if any mutations were performed on the ring.
|
||||
*/
|
||||
CK_CC_INLINE static bool
|
||||
ck_ring_repair(struct ck_ring *ring)
|
||||
{
|
||||
bool r = false;
|
||||
|
||||
if (ring->p_tail != ring->p_head) {
|
||||
ring->p_tail = ring->p_head;
|
||||
r = true;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* This can be called when no concurrent updates are occurring on the ring
|
||||
* structure to check for consistency. This is primarily meant to be used for
|
||||
* persistent storage of the ring. If this functions returns false, the ring
|
||||
* is in an inconsistent state.
|
||||
*/
|
||||
CK_CC_INLINE static bool
|
||||
ck_ring_valid(const struct ck_ring *ring)
|
||||
{
|
||||
unsigned int size = ring->size;
|
||||
unsigned int c_head = ring->c_head;
|
||||
unsigned int p_head = ring->p_head;
|
||||
|
||||
/* The ring must be a power of 2. */
|
||||
if (size & (size - 1))
|
||||
return false;
|
||||
|
||||
/* The consumer counter must always be smaller than the producer. */
|
||||
if (c_head > p_head)
|
||||
return false;
|
||||
|
||||
/* The producer may only be up to size slots ahead of consumer. */
|
||||
if (p_head - c_head >= size)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void
|
||||
ck_ring_init(struct ck_ring *ring, unsigned int size)
|
||||
{
|
||||
|
@ -84,6 +131,45 @@ ck_ring_init(struct ck_ring *ring, unsigned int size)
|
|||
/*
|
||||
* The _ck_ring_* namespace is internal only and must not used externally.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This function will return a region of memory to write for the next value
|
||||
* for a single producer.
|
||||
*/
|
||||
CK_CC_FORCE_INLINE static void *
|
||||
_ck_ring_enqueue_reserve_sp(struct ck_ring *ring,
|
||||
void *CK_CC_RESTRICT buffer,
|
||||
unsigned int ts,
|
||||
unsigned int *size)
|
||||
{
|
||||
const unsigned int mask = ring->mask;
|
||||
unsigned int consumer, producer, delta;
|
||||
|
||||
consumer = ck_pr_load_uint(&ring->c_head);
|
||||
producer = ring->p_tail;
|
||||
delta = producer + 1;
|
||||
if (size != NULL)
|
||||
*size = (producer - consumer) & mask;
|
||||
|
||||
if (CK_CC_UNLIKELY((delta & mask) == (consumer & mask)))
|
||||
return NULL;
|
||||
|
||||
return (char *)buffer + ts * (producer & mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is to be called to commit and make visible a region of previously
|
||||
* reserved with reverse_sp.
|
||||
*/
|
||||
CK_CC_FORCE_INLINE static void
|
||||
_ck_ring_enqueue_commit_sp(struct ck_ring *ring)
|
||||
{
|
||||
|
||||
ck_pr_fence_store();
|
||||
ck_pr_store_uint(&ring->p_tail, ring->p_tail + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE static bool
|
||||
_ck_ring_enqueue_sp(struct ck_ring *ring,
|
||||
void *CK_CC_RESTRICT buffer,
|
||||
|
@ -163,6 +249,65 @@ _ck_ring_dequeue_sc(struct ck_ring *ring,
|
|||
return true;
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE static void *
|
||||
_ck_ring_enqueue_reserve_mp(struct ck_ring *ring,
|
||||
void *buffer,
|
||||
unsigned int ts,
|
||||
unsigned int *ticket,
|
||||
unsigned int *size)
|
||||
{
|
||||
const unsigned int mask = ring->mask;
|
||||
unsigned int producer, consumer, delta;
|
||||
|
||||
producer = ck_pr_load_uint(&ring->p_head);
|
||||
|
||||
for (;;) {
|
||||
ck_pr_fence_load();
|
||||
consumer = ck_pr_load_uint(&ring->c_head);
|
||||
|
||||
delta = producer + 1;
|
||||
|
||||
if (CK_CC_LIKELY((producer - consumer) < mask)) {
|
||||
if (ck_pr_cas_uint_value(&ring->p_head,
|
||||
producer, delta, &producer) == true) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
unsigned int new_producer;
|
||||
|
||||
ck_pr_fence_load();
|
||||
new_producer = ck_pr_load_uint(&ring->p_head);
|
||||
|
||||
if (producer == new_producer) {
|
||||
if (size != NULL)
|
||||
*size = (producer - consumer) & mask;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
producer = new_producer;
|
||||
}
|
||||
}
|
||||
|
||||
*ticket = producer;
|
||||
if (size != NULL)
|
||||
*size = (producer - consumer) & mask;
|
||||
|
||||
return (char *)buffer + ts * (producer & mask);
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE static void
|
||||
_ck_ring_enqueue_commit_mp(struct ck_ring *ring, unsigned int producer)
|
||||
{
|
||||
|
||||
while (ck_pr_load_uint(&ring->p_tail) != producer)
|
||||
ck_pr_stall();
|
||||
|
||||
ck_pr_fence_store();
|
||||
ck_pr_store_uint(&ring->p_tail, producer + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
CK_CC_FORCE_INLINE static bool
|
||||
_ck_ring_enqueue_mp(struct ck_ring *ring,
|
||||
void *buffer,
|
||||
|
@ -176,23 +321,54 @@ _ck_ring_enqueue_mp(struct ck_ring *ring,
|
|||
|
||||
producer = ck_pr_load_uint(&ring->p_head);
|
||||
|
||||
do {
|
||||
for (;;) {
|
||||
/*
|
||||
* The snapshot of producer must be up to date with
|
||||
* respect to consumer.
|
||||
* The snapshot of producer must be up to date with respect to
|
||||
* consumer.
|
||||
*/
|
||||
ck_pr_fence_load();
|
||||
consumer = ck_pr_load_uint(&ring->c_head);
|
||||
|
||||
delta = producer + 1;
|
||||
if (CK_CC_UNLIKELY((delta & mask) == (consumer & mask))) {
|
||||
r = false;
|
||||
goto leave;
|
||||
|
||||
/*
|
||||
* Only try to CAS if the producer is not clearly stale (not
|
||||
* less than consumer) and the buffer is definitely not full.
|
||||
*/
|
||||
if (CK_CC_LIKELY((producer - consumer) < mask)) {
|
||||
if (ck_pr_cas_uint_value(&ring->p_head,
|
||||
producer, delta, &producer) == true) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
unsigned int new_producer;
|
||||
|
||||
/*
|
||||
* Slow path. Either the buffer is full or we have a
|
||||
* stale snapshot of p_head. Execute a second read of
|
||||
* p_read that must be ordered wrt the snapshot of
|
||||
* c_head.
|
||||
*/
|
||||
ck_pr_fence_load();
|
||||
new_producer = ck_pr_load_uint(&ring->p_head);
|
||||
|
||||
/*
|
||||
* Only fail if we haven't made forward progress in
|
||||
* production: the buffer must have been full when we
|
||||
* read new_producer (or we wrapped around UINT_MAX
|
||||
* during this iteration).
|
||||
*/
|
||||
if (producer == new_producer) {
|
||||
r = false;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
/*
|
||||
* p_head advanced during this iteration. Try again.
|
||||
*/
|
||||
producer = new_producer;
|
||||
}
|
||||
} while (ck_pr_cas_uint_value(&ring->p_head,
|
||||
producer,
|
||||
delta,
|
||||
&producer) == false);
|
||||
}
|
||||
|
||||
buffer = (char *)buffer + ts * (producer & mask);
|
||||
memcpy(buffer, entry, ts);
|
||||
|
@ -323,6 +499,33 @@ ck_ring_enqueue_spsc(struct ck_ring *ring,
|
|||
&entry, sizeof(entry), NULL);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void *
|
||||
ck_ring_enqueue_reserve_spsc_size(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer,
|
||||
unsigned int *size)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *),
|
||||
size);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void *
|
||||
ck_ring_enqueue_reserve_spsc(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *),
|
||||
NULL);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void
|
||||
ck_ring_enqueue_commit_spsc(struct ck_ring *ring)
|
||||
{
|
||||
|
||||
_ck_ring_enqueue_commit_sp(ring);
|
||||
return;
|
||||
}
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
ck_ring_dequeue_spsc(struct ck_ring *ring,
|
||||
const struct ck_ring_buffer *buffer,
|
||||
|
@ -344,8 +547,7 @@ ck_ring_enqueue_mpmc(struct ck_ring *ring,
|
|||
const void *entry)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_mp(ring, buffer, &entry,
|
||||
sizeof(entry), NULL);
|
||||
return _ck_ring_enqueue_mp(ring, buffer, &entry, sizeof(entry), NULL);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
|
@ -355,8 +557,37 @@ ck_ring_enqueue_mpmc_size(struct ck_ring *ring,
|
|||
unsigned int *size)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_mp_size(ring, buffer, &entry,
|
||||
sizeof(entry), size);
|
||||
return _ck_ring_enqueue_mp_size(ring, buffer, &entry, sizeof(entry),
|
||||
size);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void *
|
||||
ck_ring_enqueue_reserve_mpmc(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer,
|
||||
unsigned int *ticket)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *),
|
||||
ticket, NULL);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void *
|
||||
ck_ring_enqueue_reserve_mpmc_size(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer,
|
||||
unsigned int *ticket,
|
||||
unsigned int *size)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *),
|
||||
ticket, size);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void
|
||||
ck_ring_enqueue_commit_mpmc(struct ck_ring *ring, unsigned int ticket)
|
||||
{
|
||||
|
||||
_ck_ring_enqueue_commit_mp(ring, ticket);
|
||||
return;
|
||||
}
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
|
@ -384,6 +615,31 @@ ck_ring_dequeue_mpmc(struct ck_ring *ring,
|
|||
* ring buffer containing pointers. Correctness is provided for any number of
|
||||
* consumers with up to one concurrent producer.
|
||||
*/
|
||||
CK_CC_INLINE static void *
|
||||
ck_ring_enqueue_reserve_spmc_size(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer,
|
||||
unsigned int *size)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), size);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void *
|
||||
ck_ring_enqueue_reserve_spmc(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), NULL);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void
|
||||
ck_ring_enqueue_commit_spmc(struct ck_ring *ring)
|
||||
{
|
||||
|
||||
_ck_ring_enqueue_commit_sp(ring);
|
||||
return;
|
||||
}
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
ck_ring_enqueue_spmc_size(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer,
|
||||
|
@ -428,6 +684,35 @@ ck_ring_dequeue_spmc(struct ck_ring *ring,
|
|||
* ring buffer containing pointers. Correctness is provided for any number of
|
||||
* producers with up to one concurrent consumers.
|
||||
*/
|
||||
CK_CC_INLINE static void *
|
||||
ck_ring_enqueue_reserve_mpsc(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer,
|
||||
unsigned int *ticket)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *),
|
||||
ticket, NULL);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void *
|
||||
ck_ring_enqueue_reserve_mpsc_size(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer,
|
||||
unsigned int *ticket,
|
||||
unsigned int *size)
|
||||
{
|
||||
|
||||
return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *),
|
||||
ticket, size);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static void
|
||||
ck_ring_enqueue_commit_mpsc(struct ck_ring *ring, unsigned int ticket)
|
||||
{
|
||||
|
||||
_ck_ring_enqueue_commit_mp(ring, ticket);
|
||||
return;
|
||||
}
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
ck_ring_enqueue_mpsc(struct ck_ring *ring,
|
||||
struct ck_ring_buffer *buffer,
|
||||
|
@ -463,194 +748,290 @@ ck_ring_dequeue_mpsc(struct ck_ring *ring,
|
|||
* CK_RING_PROTOTYPE is used to define a type-safe interface for inlining
|
||||
* values of a particular type in the ring the buffer.
|
||||
*/
|
||||
#define CK_RING_PROTOTYPE(name, type) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_spsc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_sp_size(a, b, c, \
|
||||
sizeof(struct type), d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_spsc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_sp(a, b, c, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_dequeue_spsc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_dequeue_sc(a, b, c, \
|
||||
sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_spmc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_sp_size(a, b, c, \
|
||||
sizeof(struct type), d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_spmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_sp(a, b, c, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_trydequeue_spmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_trydequeue_mc(a, \
|
||||
b, c, sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_dequeue_spmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_dequeue_mc(a, b, c, \
|
||||
sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_mpsc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_mp(a, b, c, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_mpsc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_mp_size(a, b, c, \
|
||||
sizeof(struct type), d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_dequeue_mpsc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_dequeue_sc(a, b, c, \
|
||||
sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_mpmc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_mp_size(a, b, c, \
|
||||
sizeof(struct type), d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_mpmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_mp(a, b, c, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_trydequeue_mpmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_trydequeue_mc(a, \
|
||||
b, c, sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_dequeue_mpmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_dequeue_mc(a, b, c, \
|
||||
sizeof(struct type)); \
|
||||
#define CK_RING_PROTOTYPE(name, type) \
|
||||
CK_CC_INLINE static struct type * \
|
||||
ck_ring_enqueue_reserve_spsc_##name(struct ck_ring *a, \
|
||||
struct type *b) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_reserve_sp(a, b, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static struct type * \
|
||||
ck_ring_enqueue_reserve_spsc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
unsigned int *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_reserve_sp(a, b, \
|
||||
sizeof(struct type), c); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_spsc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_sp_size(a, b, c, \
|
||||
sizeof(struct type), d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_spsc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_sp(a, b, c, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_dequeue_spsc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_dequeue_sc(a, b, c, \
|
||||
sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static struct type * \
|
||||
ck_ring_enqueue_reserve_spmc_##name(struct ck_ring *a, \
|
||||
struct type *b) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_reserve_sp(a, b, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static struct type * \
|
||||
ck_ring_enqueue_reserve_spmc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
unsigned int *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_reserve_sp(a, b, \
|
||||
sizeof(struct type), c); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_spmc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_sp_size(a, b, c, \
|
||||
sizeof(struct type), d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_spmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_sp(a, b, c, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_trydequeue_spmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_trydequeue_mc(a, \
|
||||
b, c, sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_dequeue_spmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_dequeue_mc(a, b, c, \
|
||||
sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static struct type * \
|
||||
ck_ring_enqueue_reserve_mpsc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
unsigned int *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_reserve_mp(a, b, \
|
||||
sizeof(struct type), c, NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static struct type * \
|
||||
ck_ring_enqueue_reserve_mpsc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
unsigned int *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_reserve_mp(a, b, \
|
||||
sizeof(struct type), c, d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_mpsc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_mp(a, b, c, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_mpsc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_mp_size(a, b, c, \
|
||||
sizeof(struct type), d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_dequeue_mpsc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_dequeue_sc(a, b, c, \
|
||||
sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static struct type * \
|
||||
ck_ring_enqueue_reserve_mpmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
unsigned int *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_reserve_mp(a, b, \
|
||||
sizeof(struct type), c, NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static struct type * \
|
||||
ck_ring_enqueue_reserve_mpmc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
unsigned int *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_reserve_mp(a, b, \
|
||||
sizeof(struct type), c, d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_mpmc_size_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c, \
|
||||
unsigned int *d) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_mp_size(a, b, c, \
|
||||
sizeof(struct type), d); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_enqueue_mpmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_enqueue_mp(a, b, c, \
|
||||
sizeof(struct type), NULL); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_trydequeue_mpmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_trydequeue_mc(a, \
|
||||
b, c, sizeof(struct type)); \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_ring_dequeue_mpmc_##name(struct ck_ring *a, \
|
||||
struct type *b, \
|
||||
struct type *c) \
|
||||
{ \
|
||||
\
|
||||
return _ck_ring_dequeue_mc(a, b, c, \
|
||||
sizeof(struct type)); \
|
||||
}
|
||||
|
||||
/*
|
||||
* A single producer with one concurrent consumer.
|
||||
*/
|
||||
#define CK_RING_ENQUEUE_SPSC(name, a, b, c) \
|
||||
#define CK_RING_ENQUEUE_SPSC(name, a, b, c) \
|
||||
ck_ring_enqueue_spsc_##name(a, b, c)
|
||||
#define CK_RING_ENQUEUE_SPSC_SIZE(name, a, b, c, d) \
|
||||
#define CK_RING_ENQUEUE_SPSC_SIZE(name, a, b, c, d) \
|
||||
ck_ring_enqueue_spsc_size_##name(a, b, c, d)
|
||||
#define CK_RING_DEQUEUE_SPSC(name, a, b, c) \
|
||||
#define CK_RING_ENQUEUE_RESERVE_SPSC(name, a, b, c) \
|
||||
ck_ring_enqueue_reserve_spsc_##name(a, b, c)
|
||||
#define CK_RING_ENQUEUE_RESERVE_SPSC_SIZE(name, a, b, c, d) \
|
||||
ck_ring_enqueue_reserve_spsc_size_##name(a, b, c, d)
|
||||
#define CK_RING_DEQUEUE_SPSC(name, a, b, c) \
|
||||
ck_ring_dequeue_spsc_##name(a, b, c)
|
||||
|
||||
/*
|
||||
* A single producer with any number of concurrent consumers.
|
||||
*/
|
||||
#define CK_RING_ENQUEUE_SPMC(name, a, b, c) \
|
||||
#define CK_RING_ENQUEUE_SPMC(name, a, b, c) \
|
||||
ck_ring_enqueue_spmc_##name(a, b, c)
|
||||
#define CK_RING_ENQUEUE_SPMC_SIZE(name, a, b, c, d) \
|
||||
#define CK_RING_ENQUEUE_SPMC_SIZE(name, a, b, c, d) \
|
||||
ck_ring_enqueue_spmc_size_##name(a, b, c, d)
|
||||
#define CK_RING_TRYDEQUEUE_SPMC(name, a, b, c) \
|
||||
#define CK_RING_ENQUEUE_RESERVE_SPMC(name, a, b, c) \
|
||||
ck_ring_enqueue_reserve_spmc_##name(a, b, c)
|
||||
#define CK_RING_ENQUEUE_RESERVE_SPMC_SIZE(name, a, b, c, d) \
|
||||
ck_ring_enqueue_reserve_spmc_size_##name(a, b, c, d)
|
||||
#define CK_RING_TRYDEQUEUE_SPMC(name, a, b, c) \
|
||||
ck_ring_trydequeue_spmc_##name(a, b, c)
|
||||
#define CK_RING_DEQUEUE_SPMC(name, a, b, c) \
|
||||
#define CK_RING_DEQUEUE_SPMC(name, a, b, c) \
|
||||
ck_ring_dequeue_spmc_##name(a, b, c)
|
||||
|
||||
/*
|
||||
* Any number of concurrent producers with up to one
|
||||
* concurrent consumer.
|
||||
*/
|
||||
#define CK_RING_ENQUEUE_MPSC(name, a, b, c) \
|
||||
#define CK_RING_ENQUEUE_MPSC(name, a, b, c) \
|
||||
ck_ring_enqueue_mpsc_##name(a, b, c)
|
||||
#define CK_RING_ENQUEUE_MPSC_SIZE(name, a, b, c, d) \
|
||||
#define CK_RING_ENQUEUE_MPSC_SIZE(name, a, b, c, d) \
|
||||
ck_ring_enqueue_mpsc_size_##name(a, b, c, d)
|
||||
#define CK_RING_DEQUEUE_MPSC(name, a, b, c) \
|
||||
#define CK_RING_ENQUEUE_RESERVE_MPSC(name, a, b, c) \
|
||||
ck_ring_enqueue_reserve_mpsc_##name(a, b, c)
|
||||
#define CK_RING_ENQUEUE_RESERVE_MPSC_SIZE(name, a, b, c, d) \
|
||||
ck_ring_enqueue_reserve_mpsc_size_##name(a, b, c, d)
|
||||
#define CK_RING_DEQUEUE_MPSC(name, a, b, c) \
|
||||
ck_ring_dequeue_mpsc_##name(a, b, c)
|
||||
|
||||
/*
|
||||
* Any number of concurrent producers and consumers.
|
||||
*/
|
||||
#define CK_RING_ENQUEUE_MPMC(name, a, b, c) \
|
||||
#define CK_RING_ENQUEUE_MPMC(name, a, b, c) \
|
||||
ck_ring_enqueue_mpmc_##name(a, b, c)
|
||||
#define CK_RING_ENQUEUE_MPMC_SIZE(name, a, b, c, d) \
|
||||
#define CK_RING_ENQUEUE_MPMC_SIZE(name, a, b, c, d) \
|
||||
ck_ring_enqueue_mpmc_size_##name(a, b, c, d)
|
||||
#define CK_RING_TRYDEQUEUE_MPMC(name, a, b, c) \
|
||||
#define CK_RING_ENQUEUE_RESERVE_MPMC(name, a, b, c) \
|
||||
ck_ring_enqueue_reserve_mpmc_##name(a, b, c)
|
||||
#define CK_RING_ENQUEUE_RESERVE_MPMC_SIZE(name, a, b, c, d) \
|
||||
ck_ring_enqueue_reserve_mpmc_size_##name(a, b, c, d)
|
||||
#define CK_RING_TRYDEQUEUE_MPMC(name, a, b, c) \
|
||||
ck_ring_trydequeue_mpmc_##name(a, b, c)
|
||||
#define CK_RING_DEQUEUE_MPMC(name, a, b, c) \
|
||||
#define CK_RING_DEQUEUE_MPMC(name, a, b, c) \
|
||||
ck_ring_dequeue_mpmc_##name(a, b, c)
|
||||
|
||||
#endif /* CK_RING_H */
|
||||
|
|
133
include/freebsd/ck_md.h.in
Normal file
133
include/freebsd/ck_md.h.in
Normal file
|
@ -0,0 +1,133 @@
|
|||
/*
|
||||
* Copyright 2018 Samy Al Bahra.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
/*
|
||||
* This header file is meant for use of Concurrency Kit in the FreeBSD kernel.
|
||||
*/
|
||||
|
||||
#ifndef CK_MD_H
|
||||
#define CK_MD_H
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
#ifndef _KERNEL
|
||||
#error This header file is meant for the FreeBSD kernel.
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#ifndef CK_MD_CACHELINE
|
||||
/*
|
||||
* FreeBSD's CACHE_LINE macro is a compile-time maximum cache-line size for an
|
||||
* architecture, defined to be 128 bytes by default on x86*. Even in presence
|
||||
* of adjacent sector prefetch, this doesn't make sense from a modeling
|
||||
* perspective.
|
||||
*/
|
||||
#if defined(__amd64__) || defined(__i386__)
|
||||
#define CK_MD_CACHELINE (64)
|
||||
#else
|
||||
#define CK_MD_CACHELINE (CACHE_LINE_SIZE)
|
||||
#endif /* !__amd64__ && !__i386__ */
|
||||
#endif /* CK_MD_CACHELINE */
|
||||
|
||||
#ifndef CK_MD_PAGESIZE
|
||||
#define CK_MD_PAGESIZE (PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Once FreeBSD has a mechanism to detect RTM, this can be enabled and RTM
|
||||
* facilities can be called. These facilities refer to TSX.
|
||||
*/
|
||||
#ifndef CK_MD_RTM_DISABLE
|
||||
#define CK_MD_RTM_DISABLE
|
||||
#endif /* CK_MD_RTM_DISABLE */
|
||||
|
||||
/*
|
||||
* Do not enable pointer-packing-related (VMA) optimizations in kernel-space.
|
||||
*/
|
||||
#ifndef CK_MD_POINTER_PACK_DISABLE
|
||||
#define CK_MD_POINTER_PACK_DISABLE
|
||||
#endif /* CK_MD_POINTER_PACK_DISABLE */
|
||||
|
||||
/*
|
||||
* The following would be used for pointer-packing tricks, disabled for the
|
||||
* kernel.
|
||||
*/
|
||||
#ifndef CK_MD_VMA_BITS_UNKNOWN
|
||||
#define CK_MD_VMA_BITS_UNKNOWN
|
||||
#endif /* CK_MD_VMA_BITS_UNKNOWN */
|
||||
|
||||
/*
|
||||
* Do not enable double operations in kernel-space.
|
||||
*/
|
||||
#ifndef CK_PR_DISABLE_DOUBLE
|
||||
#define CK_PR_DISABLE_DOUBLE
|
||||
#endif /* CK_PR_DISABLE_DOUBLE */
|
||||
|
||||
/*
|
||||
* If building for a uni-processor target, then enable the uniprocessor
|
||||
* feature flag. This, among other things, will remove the lock prefix.
|
||||
*/
|
||||
#ifndef SMP
|
||||
#define CK_MD_UMP
|
||||
#endif /* SMP */
|
||||
|
||||
/*
|
||||
* Disable the use of compiler builtin functions.
|
||||
*/
|
||||
#define CK_MD_CC_BUILTIN_DISABLE 1
|
||||
|
||||
/*
|
||||
* CK expects those, which are normally defined by the build system.
|
||||
*/
|
||||
#if defined(__i386__) && !defined(__x86__)
|
||||
#define __x86__
|
||||
/*
|
||||
* If x86 becomes more relevant, we may want to consider importing in
|
||||
* __mbk() to avoid potential issues around false sharing.
|
||||
*/
|
||||
#define CK_MD_TSO
|
||||
#define CK_MD_SSE_DISABLE 1
|
||||
#elif defined(__amd64__)
|
||||
#define CK_MD_TSO
|
||||
#elif defined(__sparc64__) && !defined(__sparcv9__)
|
||||
#define __sparcv9__
|
||||
#define CK_MD_TSO
|
||||
#elif defined(__powerpc64__) && !defined(__ppc64__)
|
||||
#define __ppc64__
|
||||
#elif defined(__powerpc__) && !defined(__ppc__)
|
||||
#define __ppc__
|
||||
#endif
|
||||
|
||||
/* If no memory model has been defined, assume RMO. */
|
||||
#if !defined(CK_MD_RMO) && !defined(CK_MD_TSO) && !defined(CK_MD_PSO)
|
||||
#define CK_MD_RMO
|
||||
#endif
|
||||
|
||||
#define CK_VERSION "@VERSION@"
|
||||
#define CK_GIT_SHA "@GIT_SHA@"
|
||||
|
||||
#endif /* CK_MD_H */
|
|
@ -92,7 +92,7 @@ CK_PR_FENCE(unlock, CK_DMB_SY)
|
|||
ck_pr_md_load_##S(const M *target) \
|
||||
{ \
|
||||
long r = 0; \
|
||||
__asm__ __volatile__(I " %w0, [%1];" \
|
||||
__asm__ __volatile__(I " %w0, [%1]\n" \
|
||||
: "=r" (r) \
|
||||
: "r" (target) \
|
||||
: "memory"); \
|
||||
|
@ -103,7 +103,7 @@ CK_PR_FENCE(unlock, CK_DMB_SY)
|
|||
ck_pr_md_load_##S(const M *target) \
|
||||
{ \
|
||||
long r = 0; \
|
||||
__asm__ __volatile__(I " %0, [%1];" \
|
||||
__asm__ __volatile__(I " %0, [%1]\n" \
|
||||
: "=r" (r) \
|
||||
: "r" (target) \
|
||||
: "memory"); \
|
||||
|
@ -195,10 +195,10 @@ CK_PR_STORE_S_64(double, double, "str")
|
|||
T previous = 0; \
|
||||
T tmp = 0; \
|
||||
__asm__ __volatile__("1:" \
|
||||
"ldxr" W " %" R "0, [%2];" \
|
||||
"neg %" R "0, %" R "0;" \
|
||||
"stxr" W " %w1, %" R "0, [%2];" \
|
||||
"cbnz %w1, 1b;" \
|
||||
"ldxr" W " %" R "0, [%2]\n"\
|
||||
"neg %" R "0, %" R "0\n" \
|
||||
"stxr" W " %w1, %" R "0, [%2]\n" \
|
||||
"cbnz %w1, 1b\n" \
|
||||
: "=&r" (previous), \
|
||||
"=&r" (tmp) \
|
||||
: "r" (target) \
|
||||
|
|
|
@ -38,17 +38,17 @@ ck_pr_cas_64_2_value(uint64_t target[2], uint64_t compare[2], uint64_t set[2], u
|
|||
uint64_t tmp1, tmp2;
|
||||
|
||||
__asm__ __volatile__("1:"
|
||||
"ldxp %0, %1, [%4];"
|
||||
"mov %2, %0;"
|
||||
"mov %3, %1;"
|
||||
"eor %0, %0, %5;"
|
||||
"eor %1, %1, %6;"
|
||||
"orr %1, %0, %1;"
|
||||
"mov %w0, #0;"
|
||||
"cbnz %1, 2f;"
|
||||
"stxp %w0, %7, %8, [%4];"
|
||||
"cbnz %w0, 1b;"
|
||||
"mov %w0, #1;"
|
||||
"ldxp %0, %1, [%4]\n"
|
||||
"mov %2, %0\n"
|
||||
"mov %3, %1\n"
|
||||
"eor %0, %0, %5\n"
|
||||
"eor %1, %1, %6\n"
|
||||
"orr %1, %0, %1\n"
|
||||
"mov %w0, #0\n"
|
||||
"cbnz %1, 2f\n"
|
||||
"stxp %w0, %7, %8, [%4]\n"
|
||||
"cbnz %w0, 1b\n"
|
||||
"mov %w0, #1\n"
|
||||
"2:"
|
||||
: "=&r" (tmp1), "=&r" (tmp2), "=&r" (value[0]), "=&r" (value[1])
|
||||
: "r" (target), "r" (compare[0]), "r" (compare[1]), "r" (set[0]), "r" (set[1])
|
||||
|
@ -72,15 +72,15 @@ ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2])
|
|||
uint64_t tmp1, tmp2;
|
||||
|
||||
__asm__ __volatile__("1:"
|
||||
"ldxp %0, %1, [%2];"
|
||||
"eor %0, %0, %3;"
|
||||
"eor %1, %1, %4;"
|
||||
"orr %1, %0, %1;"
|
||||
"mov %w0, #0;"
|
||||
"cbnz %1, 2f;"
|
||||
"stxp %w0, %5, %6, [%2];"
|
||||
"cbnz %w0, 1b;"
|
||||
"mov %w0, #1;"
|
||||
"ldxp %0, %1, [%2]\n"
|
||||
"eor %0, %0, %3\n"
|
||||
"eor %1, %1, %4\n"
|
||||
"orr %1, %0, %1\n"
|
||||
"mov %w0, #0\n"
|
||||
"cbnz %1, 2f\n"
|
||||
"stxp %w0, %5, %6, [%2]\n"
|
||||
"cbnz %w0, 1b\n"
|
||||
"mov %w0, #1\n"
|
||||
"2:"
|
||||
: "=&r" (tmp1), "=&r" (tmp2)
|
||||
: "r" (target), "r" (compare[0]), "r" (compare[1]), "r" (set[0]), "r" (set[1])
|
||||
|
@ -103,12 +103,12 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set)
|
|||
{ \
|
||||
T previous; \
|
||||
T tmp; \
|
||||
__asm__ __volatile__("1:" \
|
||||
"ldxr" W " %" R "0, [%2];" \
|
||||
"cmp %" R "0, %" R "4;" \
|
||||
"b.ne 2f;" \
|
||||
"stxr" W " %w1, %" R "3, [%2];" \
|
||||
"cbnz %w1, 1b;" \
|
||||
__asm__ __volatile__("1:\n" \
|
||||
"ldxr" W " %" R "0, [%2]\n" \
|
||||
"cmp %" R "0, %" R "4\n" \
|
||||
"b.ne 2f\n" \
|
||||
"stxr" W " %w1, %" R "3, [%2]\n" \
|
||||
"cbnz %w1, 1b\n" \
|
||||
"2:" \
|
||||
: "=&r" (previous), \
|
||||
"=&r" (tmp) \
|
||||
|
@ -126,11 +126,11 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set)
|
|||
T tmp; \
|
||||
__asm__ __volatile__( \
|
||||
"1:" \
|
||||
"ldxr" W " %" R "0, [%2];" \
|
||||
"cmp %" R "0, %" R "4;" \
|
||||
"b.ne 2f;" \
|
||||
"stxr" W " %w1, %" R "3, [%2];" \
|
||||
"cbnz %w1, 1b;" \
|
||||
"ldxr" W " %" R "0, [%2]\n" \
|
||||
"cmp %" R "0, %" R "4\n" \
|
||||
"b.ne 2f\n" \
|
||||
"stxr" W " %w1, %" R "3, [%2]\n" \
|
||||
"cbnz %w1, 1b\n" \
|
||||
"2:" \
|
||||
: "=&r" (previous), \
|
||||
"=&r" (tmp) \
|
||||
|
@ -167,9 +167,9 @@ CK_PR_CAS_S(char, char, "b", "w")
|
|||
T previous; \
|
||||
T tmp; \
|
||||
__asm__ __volatile__("1:" \
|
||||
"ldxr" W " %" R "0, [%2];" \
|
||||
"stxr" W " %w1, %" R "3, [%2];"\
|
||||
"cbnz %w1, 1b;" \
|
||||
"ldxr" W " %" R "0, [%2]\n"\
|
||||
"stxr" W " %w1, %" R "3, [%2]\n"\
|
||||
"cbnz %w1, 1b\n" \
|
||||
: "=&r" (previous), \
|
||||
"=&r" (tmp) \
|
||||
: "r" (target), \
|
||||
|
@ -198,10 +198,10 @@ CK_PR_FAS(char, char, char, "b", "w")
|
|||
T previous = 0; \
|
||||
T tmp = 0; \
|
||||
__asm__ __volatile__("1:" \
|
||||
"ldxr" W " %" R "0, [%2];" \
|
||||
I ";" \
|
||||
"stxr" W " %w1, %" R "0, [%2];" \
|
||||
"cbnz %w1, 1b;" \
|
||||
"ldxr" W " %" R "0, [%2]\n"\
|
||||
I "\n" \
|
||||
"stxr" W " %w1, %" R "0, [%2]\n" \
|
||||
"cbnz %w1, 1b\n" \
|
||||
: "=&r" (previous), \
|
||||
"=&r" (tmp) \
|
||||
: "r" (target) \
|
||||
|
@ -239,10 +239,10 @@ CK_PR_UNARY_S(char, char, "b")
|
|||
T previous; \
|
||||
T tmp; \
|
||||
__asm__ __volatile__("1:" \
|
||||
"ldxr" W " %" R "0, [%2];"\
|
||||
I " %" R "0, %" R "0, %" R "3;" \
|
||||
"stxr" W " %w1, %" R "0, [%2];" \
|
||||
"cbnz %w1, 1b;" \
|
||||
"ldxr" W " %" R "0, [%2]\n"\
|
||||
I " %" R "0, %" R "0, %" R "3\n" \
|
||||
"stxr" W " %w1, %" R "0, [%2]\n" \
|
||||
"cbnz %w1, 1b\n" \
|
||||
: "=&r" (previous), \
|
||||
"=&r" (tmp) \
|
||||
: "r" (target), \
|
||||
|
@ -286,10 +286,10 @@ ck_pr_faa_ptr(void *target, uintptr_t delta)
|
|||
uintptr_t previous, r, tmp;
|
||||
|
||||
__asm__ __volatile__("1:"
|
||||
"ldxr %0, [%3];"
|
||||
"add %1, %4, %0;"
|
||||
"stxr %w2, %1, [%3];"
|
||||
"cbnz %w2, 1b;"
|
||||
"ldxr %0, [%3]\n"
|
||||
"add %1, %4, %0\n"
|
||||
"stxr %w2, %1, [%3]\n"
|
||||
"cbnz %w2, 1b\n"
|
||||
: "=&r" (previous),
|
||||
"=&r" (r),
|
||||
"=&r" (tmp)
|
||||
|
@ -306,9 +306,9 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta)
|
|||
uint64_t previous, r, tmp;
|
||||
|
||||
__asm__ __volatile__("1:"
|
||||
"ldxr %0, [%3];"
|
||||
"add %1, %4, %0;"
|
||||
"stxr %w2, %1, [%3];"
|
||||
"ldxr %0, [%3]\n"
|
||||
"add %1, %4, %0\n"
|
||||
"stxr %w2, %1, [%3]\n"
|
||||
"cbnz %w2, 1b;"
|
||||
: "=&r" (previous),
|
||||
"=&r" (r),
|
||||
|
@ -326,10 +326,10 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta)
|
|||
{ \
|
||||
T previous, r, tmp; \
|
||||
__asm__ __volatile__("1:" \
|
||||
"ldxr" W " %w0, [%3];" \
|
||||
"add %w1, %w4, %w0;" \
|
||||
"stxr" W " %w2, %w1, [%3];" \
|
||||
"cbnz %w2, 1b;" \
|
||||
"ldxr" W " %w0, [%3]\n" \
|
||||
"add %w1, %w4, %w0\n" \
|
||||
"stxr" W " %w2, %w1, [%3]\n" \
|
||||
"cbnz %w2, 1b\n" \
|
||||
: "=&r" (previous), \
|
||||
"=&r" (r), \
|
||||
"=&r" (tmp) \
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#ifndef CK_PR_AARCH64_LSE_H
|
||||
#define CK_PR_AARCH64_LSE_H
|
||||
|
||||
#error bite
|
||||
#ifndef CK_PR_H
|
||||
#error Do not include this file directly, use ck_pr.h
|
||||
#endif
|
||||
|
@ -43,10 +44,10 @@ ck_pr_cas_64_2_value(uint64_t target[2], uint64_t compare[2], uint64_t set[2], u
|
|||
register uint64_t x2 __asm__ ("x2") = set[0];
|
||||
register uint64_t x3 __asm__ ("x3") = set[1];
|
||||
|
||||
__asm__ __volatile__("casp %0, %1, %4, %5, [%6];"
|
||||
"eor %2, %0, %7;"
|
||||
"eor %3, %1, %8;"
|
||||
"orr %2, %2, %3;"
|
||||
__asm__ __volatile__("casp %0, %1, %4, %5, [%6]\n"
|
||||
"eor %2, %0, %7\n"
|
||||
"eor %3, %1, %8\n"
|
||||
"orr %2, %2, %3\n"
|
||||
: "+&r" (x0), "+&r" (x1), "=&r" (tmp1), "=&r" (tmp2)
|
||||
: "r" (x2), "r" (x3), "r" (target), "r" (compare[0]), "r" (compare[1])
|
||||
: "memory");
|
||||
|
@ -74,10 +75,10 @@ ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2])
|
|||
register uint64_t x2 __asm__ ("x2") = set[0];
|
||||
register uint64_t x3 __asm__ ("x3") = set[1];
|
||||
|
||||
__asm__ __volatile__("casp %0, %1, %2, %3, [%4];"
|
||||
"eor %0, %0, %5;"
|
||||
"eor %1, %1, %6;"
|
||||
"orr %0, %0, %1;"
|
||||
__asm__ __volatile__("casp %0, %1, %2, %3, [%4]\n"
|
||||
"eor %0, %0, %5\n"
|
||||
"eor %1, %1, %6\n"
|
||||
"orr %0, %0, %1\n"
|
||||
: "+&r" (x0), "+&r" (x1)
|
||||
: "r" (x2), "r" (x3), "r" (target), "r" (compare[0]), "r" (compare[1])
|
||||
: "memory");
|
||||
|
@ -99,7 +100,7 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set)
|
|||
{ \
|
||||
*(T *)value = compare; \
|
||||
__asm__ __volatile__( \
|
||||
"cas" W " %" R "0, %" R "2, [%1];" \
|
||||
"cas" W " %" R "0, %" R "2, [%1]\n"\
|
||||
: "+&r" (*(T *)value) \
|
||||
: "r" (target), \
|
||||
"r" (set) \
|
||||
|
@ -111,7 +112,7 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set)
|
|||
{ \
|
||||
T previous = compare; \
|
||||
__asm__ __volatile__( \
|
||||
"cas" W " %" R "0, %" R "2, [%1];" \
|
||||
"cas" W " %" R "0, %" R "2, [%1]\n"\
|
||||
: "+&r" (previous) \
|
||||
: "r" (target), \
|
||||
"r" (set) \
|
||||
|
@ -144,7 +145,7 @@ CK_PR_CAS_S(char, char, "b", "w")
|
|||
{ \
|
||||
T previous; \
|
||||
__asm__ __volatile__( \
|
||||
"swp" W " %" R "2, %" R "0, [%1];" \
|
||||
"swp" W " %" R "2, %" R "0, [%1]\n"\
|
||||
: "=&r" (previous) \
|
||||
: "r" (target), \
|
||||
"r" (v) \
|
||||
|
@ -169,8 +170,8 @@ CK_PR_FAS(char, char, char, "b", "w")
|
|||
CK_CC_INLINE static void \
|
||||
ck_pr_##O##_##N(M *target) \
|
||||
{ \
|
||||
__asm__ __volatile__(I ";" \
|
||||
"st" S W " " R "0, [%0];" \
|
||||
__asm__ __volatile__(I "\n" \
|
||||
"st" S W " " R "0, [%0]\n" \
|
||||
: \
|
||||
: "r" (target) \
|
||||
: "x0", "memory"); \
|
||||
|
@ -204,8 +205,8 @@ CK_PR_UNARY_S(char, char, "b")
|
|||
CK_CC_INLINE static void \
|
||||
ck_pr_##O##_##N(M *target, T delta) \
|
||||
{ \
|
||||
__asm__ __volatile__(I ";" \
|
||||
"st" S W " %" R "0, [%1];" \
|
||||
__asm__ __volatile__(I "\n" \
|
||||
"st" S W " %" R "0, [%1]\n"\
|
||||
: "+&r" (delta) \
|
||||
: "r" (target) \
|
||||
: "memory"); \
|
||||
|
@ -247,7 +248,7 @@ ck_pr_faa_ptr(void *target, uintptr_t delta)
|
|||
uintptr_t previous;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldadd %2, %0, [%1];"
|
||||
"ldadd %2, %0, [%1]\n"
|
||||
: "=r" (previous)
|
||||
: "r" (target),
|
||||
"r" (delta)
|
||||
|
@ -262,7 +263,7 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta)
|
|||
uint64_t previous;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldadd %2, %0, [%1];"
|
||||
"ldadd %2, %0, [%1]\n"
|
||||
: "=r" (previous)
|
||||
: "r" (target),
|
||||
"r" (delta)
|
||||
|
@ -277,7 +278,7 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta)
|
|||
{ \
|
||||
T previous; \
|
||||
__asm__ __volatile__( \
|
||||
"ldadd" W " %w2, %w0, [%1];" \
|
||||
"ldadd" W " %w2, %w0, [%1]\n" \
|
||||
: "=r" (previous) \
|
||||
: "r" (target), \
|
||||
"r" (delta) \
|
||||
|
|
|
@ -39,6 +39,15 @@
|
|||
#define CK_CC_UNUSED __attribute__((unused))
|
||||
#define CK_CC_USED __attribute__((used))
|
||||
#define CK_CC_IMM "i"
|
||||
|
||||
#define CK_CC_CONTAINER(F, T, M, N) \
|
||||
CK_CC_INLINE static T * \
|
||||
N(F *p) \
|
||||
{ \
|
||||
\
|
||||
return (T *)(void *)((char *)p - __builtin_offsetof(T, M)); \
|
||||
}
|
||||
|
||||
#if defined(__x86_64__) || defined(__x86__)
|
||||
#define CK_CC_IMM_U32 "Z"
|
||||
#define CK_CC_IMM_S32 "e"
|
||||
|
@ -103,28 +112,26 @@
|
|||
#define CK_CC_TYPEOF(X, DEFAULT) __typeof__(X)
|
||||
|
||||
/*
|
||||
* Portability wrappers for bitwise ops.
|
||||
* Portability wrappers for bitwise operations.
|
||||
*/
|
||||
|
||||
#ifndef CK_MD_CC_BUILTIN_DISABLE
|
||||
#define CK_F_CC_FFS
|
||||
#define CK_F_CC_CLZ
|
||||
#define CK_F_CC_CTZ
|
||||
#define CK_F_CC_POPCOUNT
|
||||
|
||||
CK_CC_INLINE static int
|
||||
ck_cc_ffs(unsigned int x)
|
||||
{
|
||||
|
||||
return __builtin_ffs(x);
|
||||
return __builtin_ffsl(x);
|
||||
}
|
||||
|
||||
#define CK_F_CC_FFSL
|
||||
CK_CC_INLINE static int
|
||||
ck_cc_clz(unsigned int x)
|
||||
ck_cc_ffsl(unsigned long x)
|
||||
{
|
||||
|
||||
return __builtin_clz(x);
|
||||
return __builtin_ffsll(x);
|
||||
}
|
||||
|
||||
#define CK_F_CC_CTZ
|
||||
CK_CC_INLINE static int
|
||||
ck_cc_ctz(unsigned int x)
|
||||
{
|
||||
|
@ -132,11 +139,12 @@ ck_cc_ctz(unsigned int x)
|
|||
return __builtin_ctz(x);
|
||||
}
|
||||
|
||||
#define CK_F_CC_POPCOUNT
|
||||
CK_CC_INLINE static int
|
||||
ck_cc_popcount(unsigned int x)
|
||||
{
|
||||
|
||||
return __builtin_popcount(x);
|
||||
}
|
||||
|
||||
#endif /* CK_MD_CC_BUILTIN_DISABLE */
|
||||
#endif /* CK_GCC_CC_H */
|
||||
|
|
|
@ -80,7 +80,7 @@ ck_pr_md_load_ptr(const void *target)
|
|||
void *r;
|
||||
|
||||
ck_pr_barrier();
|
||||
r = CK_CC_DECONST_PTR(CK_PR_ACCESS(target));
|
||||
r = CK_CC_DECONST_PTR(*(volatile void *const*)(target));
|
||||
ck_pr_barrier();
|
||||
|
||||
return r;
|
||||
|
@ -91,7 +91,7 @@ ck_pr_md_store_ptr(void *target, const void *v)
|
|||
{
|
||||
|
||||
ck_pr_barrier();
|
||||
CK_PR_ACCESS(target) = CK_CC_DECONST_PTR(v);
|
||||
*(volatile void **)target = CK_CC_DECONST_PTR(v);
|
||||
ck_pr_barrier();
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -67,21 +67,29 @@ ck_pr_stall(void)
|
|||
__asm__ __volatile__(I ::: "memory"); \
|
||||
}
|
||||
|
||||
CK_PR_FENCE(atomic, "lwsync")
|
||||
CK_PR_FENCE(atomic_store, "lwsync")
|
||||
#ifdef CK_MD_PPC32_LWSYNC
|
||||
#define CK_PR_LWSYNCOP "lwsync"
|
||||
#else /* CK_MD_PPC32_LWSYNC_DISABLE */
|
||||
#define CK_PR_LWSYNCOP "sync"
|
||||
#endif
|
||||
|
||||
CK_PR_FENCE(atomic, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(atomic_store, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(atomic_load, "sync")
|
||||
CK_PR_FENCE(store_atomic, "lwsync")
|
||||
CK_PR_FENCE(load_atomic, "lwsync")
|
||||
CK_PR_FENCE(store, "lwsync")
|
||||
CK_PR_FENCE(store_atomic, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(load_atomic, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(store, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(store_load, "sync")
|
||||
CK_PR_FENCE(load, "lwsync")
|
||||
CK_PR_FENCE(load_store, "lwsync")
|
||||
CK_PR_FENCE(load, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(load_store, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(memory, "sync")
|
||||
CK_PR_FENCE(acquire, "lwsync")
|
||||
CK_PR_FENCE(release, "lwsync")
|
||||
CK_PR_FENCE(acqrel, "lwsync")
|
||||
CK_PR_FENCE(lock, "lwsync")
|
||||
CK_PR_FENCE(unlock, "lwsync")
|
||||
CK_PR_FENCE(acquire, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(release, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(acqrel, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(lock, CK_PR_LWSYNCOP)
|
||||
CK_PR_FENCE(unlock, CK_PR_LWSYNCOP)
|
||||
|
||||
#undef CK_PR_LWSYNCOP
|
||||
|
||||
#undef CK_PR_FENCE
|
||||
|
||||
|
|
97
include/gcc/s390x/ck_f_pr.h
Normal file
97
include/gcc/s390x/ck_f_pr.h
Normal file
|
@ -0,0 +1,97 @@
|
|||
/* DO NOT EDIT. This is auto-generated from feature.sh */
|
||||
#define CK_F_PR_ADD_32
|
||||
#define CK_F_PR_ADD_64
|
||||
#define CK_F_PR_ADD_INT
|
||||
#define CK_F_PR_ADD_PTR
|
||||
#define CK_F_PR_ADD_UINT
|
||||
#define CK_F_PR_AND_32
|
||||
#define CK_F_PR_AND_64
|
||||
#define CK_F_PR_AND_INT
|
||||
#define CK_F_PR_AND_PTR
|
||||
#define CK_F_PR_AND_UINT
|
||||
#define CK_F_PR_CAS_32
|
||||
#define CK_F_PR_CAS_32_VALUE
|
||||
#define CK_F_PR_CAS_64
|
||||
#define CK_F_PR_CAS_64_VALUE
|
||||
#define CK_F_PR_CAS_INT
|
||||
#define CK_F_PR_CAS_INT_VALUE
|
||||
#define CK_F_PR_CAS_PTR
|
||||
#define CK_F_PR_CAS_PTR_VALUE
|
||||
#define CK_F_PR_CAS_UINT
|
||||
#define CK_F_PR_CAS_UINT_VALUE
|
||||
#define CK_F_PR_DEC_32
|
||||
#define CK_F_PR_DEC_64
|
||||
#define CK_F_PR_DEC_INT
|
||||
#define CK_F_PR_DEC_PTR
|
||||
#define CK_F_PR_DEC_UINT
|
||||
#define CK_F_PR_FAA_32
|
||||
#define CK_F_PR_FAA_64
|
||||
#define CK_F_PR_FAA_INT
|
||||
#define CK_F_PR_FAA_PTR
|
||||
#define CK_F_PR_FAA_UINT
|
||||
#define CK_F_PR_FAS_32
|
||||
#define CK_F_PR_FAS_64
|
||||
#define CK_F_PR_FAS_INT
|
||||
#define CK_F_PR_FAS_PTR
|
||||
#define CK_F_PR_FAS_UINT
|
||||
#define CK_F_PR_FAS_DOUBLE
|
||||
#define CK_F_PR_FENCE_LOAD
|
||||
#define CK_F_PR_FENCE_LOAD_DEPENDS
|
||||
#define CK_F_PR_FENCE_MEMORY
|
||||
#define CK_F_PR_FENCE_STORE
|
||||
#define CK_F_PR_FENCE_STRICT_LOAD
|
||||
#define CK_F_PR_FENCE_STRICT_LOAD_DEPENDS
|
||||
#define CK_F_PR_FENCE_STRICT_MEMORY
|
||||
#define CK_F_PR_FENCE_STRICT_STORE
|
||||
#define CK_F_PR_INC_32
|
||||
#define CK_F_PR_INC_64
|
||||
#define CK_F_PR_INC_INT
|
||||
#define CK_F_PR_INC_PTR
|
||||
#define CK_F_PR_INC_UINT
|
||||
#define CK_F_PR_LOAD_16
|
||||
#define CK_F_PR_LOAD_32
|
||||
#define CK_F_PR_LOAD_64
|
||||
#define CK_F_PR_LOAD_8
|
||||
#define CK_F_PR_LOAD_CHAR
|
||||
#define CK_F_PR_LOAD_DOUBLE
|
||||
#define CK_F_PR_LOAD_INT
|
||||
#define CK_F_PR_LOAD_PTR
|
||||
#define CK_F_PR_LOAD_SHORT
|
||||
#define CK_F_PR_LOAD_UINT
|
||||
#define CK_F_PR_NEG_32
|
||||
#define CK_F_PR_NEG_64
|
||||
#define CK_F_PR_NEG_INT
|
||||
#define CK_F_PR_NEG_PTR
|
||||
#define CK_F_PR_NEG_UINT
|
||||
#define CK_F_PR_NOT_32
|
||||
#define CK_F_PR_NOT_64
|
||||
#define CK_F_PR_NOT_INT
|
||||
#define CK_F_PR_NOT_PTR
|
||||
#define CK_F_PR_NOT_UINT
|
||||
#define CK_F_PR_OR_32
|
||||
#define CK_F_PR_OR_64
|
||||
#define CK_F_PR_OR_INT
|
||||
#define CK_F_PR_OR_PTR
|
||||
#define CK_F_PR_OR_UINT
|
||||
#define CK_F_PR_STALL
|
||||
#define CK_F_PR_STORE_16
|
||||
#define CK_F_PR_STORE_32
|
||||
#define CK_F_PR_STORE_64
|
||||
#define CK_F_PR_STORE_8
|
||||
#define CK_F_PR_STORE_CHAR
|
||||
#define CK_F_PR_STORE_DOUBLE
|
||||
#define CK_F_PR_STORE_INT
|
||||
#define CK_F_PR_STORE_PTR
|
||||
#define CK_F_PR_STORE_SHORT
|
||||
#define CK_F_PR_STORE_UINT
|
||||
#define CK_F_PR_SUB_32
|
||||
#define CK_F_PR_SUB_64
|
||||
#define CK_F_PR_SUB_INT
|
||||
#define CK_F_PR_SUB_PTR
|
||||
#define CK_F_PR_SUB_UINT
|
||||
#define CK_F_PR_XOR_32
|
||||
#define CK_F_PR_XOR_64
|
||||
#define CK_F_PR_XOR_INT
|
||||
#define CK_F_PR_XOR_PTR
|
||||
#define CK_F_PR_XOR_UINT
|
||||
|
373
include/gcc/s390x/ck_pr.h
Normal file
373
include/gcc/s390x/ck_pr.h
Normal file
|
@ -0,0 +1,373 @@
|
|||
/*
|
||||
* Copyright 2009-2015 Samy Al Bahra.
|
||||
* Copyright 2017 Neale Ferguson
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CK_PR_S390X_H
|
||||
#define CK_PR_S390X_H
|
||||
|
||||
#ifndef CK_PR_H
|
||||
#error Do not include this file directly, use ck_pr.h
|
||||
#endif
|
||||
|
||||
#include <ck_cc.h>
|
||||
#include <ck_md.h>
|
||||
|
||||
/*
|
||||
* The following represent supported atomic operations.
|
||||
* These operations may be emulated.
|
||||
*/
|
||||
#include "ck_f_pr.h"
|
||||
|
||||
/*
|
||||
* Minimum interface requirement met.
|
||||
*/
|
||||
#define CK_F_PR
|
||||
|
||||
/*
|
||||
* This bounces the hardware thread from low to medium
|
||||
* priority. I am unsure of the benefits of this approach
|
||||
* but it is used by the Linux kernel.
|
||||
*/
|
||||
CK_CC_INLINE static void
|
||||
ck_pr_stall(void)
|
||||
{
|
||||
__sync_synchronize();
|
||||
return;
|
||||
}
|
||||
|
||||
#define CK_PR_FENCE(T) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_fence_strict_##T(void) \
|
||||
{ \
|
||||
__sync_synchronize(); \
|
||||
}
|
||||
|
||||
/*
|
||||
* These are derived from:
|
||||
* http://www.ibm.com/developerworks/systems/articles/powerpc.html
|
||||
*/
|
||||
CK_PR_FENCE(atomic)
|
||||
CK_PR_FENCE(atomic_store)
|
||||
CK_PR_FENCE(atomic_load)
|
||||
CK_PR_FENCE(store_atomic)
|
||||
CK_PR_FENCE(load_atomic)
|
||||
CK_PR_FENCE(store)
|
||||
CK_PR_FENCE(store_load)
|
||||
CK_PR_FENCE(load)
|
||||
CK_PR_FENCE(load_store)
|
||||
CK_PR_FENCE(memory)
|
||||
CK_PR_FENCE(acquire)
|
||||
CK_PR_FENCE(release)
|
||||
CK_PR_FENCE(acqrel)
|
||||
CK_PR_FENCE(lock)
|
||||
CK_PR_FENCE(unlock)
|
||||
|
||||
#undef CK_PR_FENCE
|
||||
|
||||
#define CK_PR_LOAD(S, M, T, C, I) \
|
||||
CK_CC_INLINE static T \
|
||||
ck_pr_md_load_##S(const M *target) \
|
||||
{ \
|
||||
T r; \
|
||||
__asm__ __volatile__(I "\t%0, %1\n" \
|
||||
: "=r" (r) \
|
||||
: "Q" (*(const C *)target) \
|
||||
: "memory"); \
|
||||
return (r); \
|
||||
}
|
||||
|
||||
CK_PR_LOAD(ptr, void, void *, uint64_t, "lg")
|
||||
|
||||
#define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I)
|
||||
|
||||
CK_PR_LOAD_S(64, uint64_t, "lg")
|
||||
CK_PR_LOAD_S(32, uint32_t, "llgf")
|
||||
CK_PR_LOAD_S(16, uint16_t, "llgh")
|
||||
CK_PR_LOAD_S(8, uint8_t, "llgc")
|
||||
CK_PR_LOAD_S(uint, unsigned int, "llgf")
|
||||
CK_PR_LOAD_S(int, int, "llgf")
|
||||
CK_PR_LOAD_S(short, short, "lgh")
|
||||
CK_PR_LOAD_S(char, char, "lgb")
|
||||
#ifndef CK_PR_DISABLE_DOUBLE
|
||||
CK_CC_INLINE static double
|
||||
ck_pr_md_load_double(const double *target)
|
||||
{
|
||||
double r;
|
||||
__asm__ __volatile__("ld %0, %1\n"
|
||||
: "=f" (r)
|
||||
: "Q" (*(const double *)target)
|
||||
: "memory");
|
||||
return (r);
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef CK_PR_LOAD_S
|
||||
#undef CK_PR_LOAD
|
||||
|
||||
#define CK_PR_STORE(S, M, T, C, I) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_md_store_##S(M *target, T v) \
|
||||
{ \
|
||||
__asm__ __volatile__(I "\t%1, %0\n" \
|
||||
: "=Q" (*(C *)target) \
|
||||
: "r" (v) \
|
||||
: "memory"); \
|
||||
return; \
|
||||
}
|
||||
|
||||
CK_PR_STORE(ptr, void, const void *, uint64_t, "stg")
|
||||
|
||||
#define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I)
|
||||
|
||||
CK_PR_STORE_S(64, uint64_t, "stg")
|
||||
CK_PR_STORE_S(32, uint32_t, "st")
|
||||
CK_PR_STORE_S(16, uint16_t, "sth")
|
||||
CK_PR_STORE_S(8, uint8_t, "stc")
|
||||
CK_PR_STORE_S(uint, unsigned int, "st")
|
||||
CK_PR_STORE_S(int, int, "st")
|
||||
CK_PR_STORE_S(short, short, "sth")
|
||||
CK_PR_STORE_S(char, char, "stc")
|
||||
#ifndef CK_PR_DISABLE_DOUBLE
|
||||
CK_CC_INLINE static void
|
||||
ck_pr_md_store_double(double *target, double v)
|
||||
{
|
||||
__asm__ __volatile__(" std %1, %0\n"
|
||||
: "=Q" (*(double *)target)
|
||||
: "f" (v)
|
||||
: "0", "memory");
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef CK_PR_STORE_S
|
||||
#undef CK_PR_STORE
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
ck_pr_cas_64_value(uint64_t *target, uint64_t compare, uint64_t set, uint64_t *value)
|
||||
{
|
||||
*value = __sync_val_compare_and_swap(target,compare,set);
|
||||
return (*value == compare);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *value)
|
||||
{
|
||||
uintptr_t previous;
|
||||
|
||||
previous = __sync_val_compare_and_swap((uintptr_t *) target,
|
||||
(uintptr_t) compare,
|
||||
(uintptr_t) set);
|
||||
*((uintptr_t *) value) = previous;
|
||||
return (previous == (uintptr_t) compare);
|
||||
}
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
ck_pr_cas_64(uint64_t *target, uint64_t compare, uint64_t set)
|
||||
{
|
||||
return(__sync_bool_compare_and_swap(target,compare,set));
|
||||
}
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
ck_pr_cas_ptr(void *target, void *compare, void *set)
|
||||
{
|
||||
return(__sync_bool_compare_and_swap((uintptr_t *) target,
|
||||
(uintptr_t) compare,
|
||||
(uintptr_t) set));
|
||||
}
|
||||
|
||||
#define CK_PR_CAS(N, T) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##N##_value(T *target, T compare, T set, T *value) \
|
||||
{ \
|
||||
*value = __sync_val_compare_and_swap(target, \
|
||||
compare, \
|
||||
set); \
|
||||
return(*value == compare); \
|
||||
} \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##N(T *target, T compare, T set) \
|
||||
{ \
|
||||
return(__sync_bool_compare_and_swap(target, \
|
||||
compare, \
|
||||
set)); \
|
||||
}
|
||||
|
||||
CK_PR_CAS(32, uint32_t)
|
||||
CK_PR_CAS(uint, unsigned int)
|
||||
CK_PR_CAS(int, int)
|
||||
|
||||
#undef CK_PR_CAS
|
||||
|
||||
CK_CC_INLINE static void *
|
||||
ck_pr_fas_ptr(void *target, void *v)
|
||||
{
|
||||
return((void *)__atomic_exchange_n((uintptr_t *) target, (uintptr_t) v, __ATOMIC_ACQUIRE));
|
||||
}
|
||||
|
||||
#define CK_PR_FAS(N, M, T) \
|
||||
CK_CC_INLINE static T \
|
||||
ck_pr_fas_##N(M *target, T v) \
|
||||
{ \
|
||||
return(__atomic_exchange_n(target, v, __ATOMIC_ACQUIRE)); \
|
||||
}
|
||||
|
||||
CK_PR_FAS(64, uint64_t, uint64_t)
|
||||
CK_PR_FAS(32, uint32_t, uint32_t)
|
||||
CK_PR_FAS(int, int, int)
|
||||
CK_PR_FAS(uint, unsigned int, unsigned int)
|
||||
|
||||
#ifndef CK_PR_DISABLE_DOUBLE
|
||||
CK_CC_INLINE static double
|
||||
ck_pr_fas_double(double *target, double *v)
|
||||
{
|
||||
double previous;
|
||||
|
||||
__asm__ __volatile__ (" lg 1,%2\n"
|
||||
"0: lg 0,%1\n"
|
||||
" csg 0,1,%1\n"
|
||||
" jnz 0b\n"
|
||||
" ldgr %0,0\n"
|
||||
: "=f" (previous)
|
||||
: "Q" (target), "Q" (v)
|
||||
: "0", "1", "cc", "memory");
|
||||
return (previous);
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef CK_PR_FAS
|
||||
|
||||
/*
|
||||
* Atomic store-only binary operations.
|
||||
*/
|
||||
#define CK_PR_BINARY(K, S, M, T) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_##K##_##S(M *target, T d) \
|
||||
{ \
|
||||
d = __sync_fetch_and_##K((T *)target, d); \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define CK_PR_BINARY_S(K, S, T) CK_PR_BINARY(K, S, T, T)
|
||||
|
||||
#define CK_PR_GENERATE(K) \
|
||||
CK_PR_BINARY(K, ptr, void, void *) \
|
||||
CK_PR_BINARY_S(K, char, char) \
|
||||
CK_PR_BINARY_S(K, int, int) \
|
||||
CK_PR_BINARY_S(K, uint, unsigned int) \
|
||||
CK_PR_BINARY_S(K, 64, uint64_t) \
|
||||
CK_PR_BINARY_S(K, 32, uint32_t) \
|
||||
CK_PR_BINARY_S(K, 16, uint16_t) \
|
||||
CK_PR_BINARY_S(K, 8, uint8_t)
|
||||
|
||||
CK_PR_GENERATE(add)
|
||||
CK_PR_GENERATE(sub)
|
||||
CK_PR_GENERATE(and)
|
||||
CK_PR_GENERATE(or)
|
||||
CK_PR_GENERATE(xor)
|
||||
|
||||
#undef CK_PR_GENERATE
|
||||
#undef CK_PR_BINARY_S
|
||||
#undef CK_PR_BINARY
|
||||
|
||||
#define CK_PR_UNARY(S, M, T) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_inc_##S(M *target) \
|
||||
{ \
|
||||
ck_pr_add_##S(target, (T)1); \
|
||||
return; \
|
||||
} \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_dec_##S(M *target) \
|
||||
{ \
|
||||
ck_pr_sub_##S(target, (T)1); \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define CK_PR_UNARY_X(S, M) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_not_##S(M *target) \
|
||||
{ \
|
||||
M newval; \
|
||||
do { \
|
||||
newval = ~(*target); \
|
||||
} while (!__sync_bool_compare_and_swap(target, \
|
||||
*target, \
|
||||
newval)); \
|
||||
} \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_neg_##S(M *target) \
|
||||
{ \
|
||||
M newval; \
|
||||
do { \
|
||||
newval = -(*target); \
|
||||
} while (!__sync_bool_compare_and_swap(target, \
|
||||
*target, \
|
||||
newval)); \
|
||||
}
|
||||
|
||||
#define CK_PR_UNARY_S(S, M) CK_PR_UNARY(S, M, M) \
|
||||
CK_PR_UNARY_X(S, M)
|
||||
|
||||
CK_PR_UNARY(ptr, void, void *)
|
||||
CK_PR_UNARY_S(char, char)
|
||||
CK_PR_UNARY_S(int, int)
|
||||
CK_PR_UNARY_S(uint, unsigned int)
|
||||
CK_PR_UNARY_S(64, uint64_t)
|
||||
CK_PR_UNARY_S(32, uint32_t)
|
||||
CK_PR_UNARY_S(16, uint16_t)
|
||||
CK_PR_UNARY_S(8, uint8_t)
|
||||
|
||||
#undef CK_PR_UNARY_S
|
||||
#undef CK_PR_UNARY
|
||||
|
||||
CK_CC_INLINE static void *
|
||||
ck_pr_faa_ptr(void *target, uintptr_t delta)
|
||||
{
|
||||
uintptr_t previous;
|
||||
|
||||
previous = __sync_fetch_and_add((uintptr_t *) target, delta);
|
||||
|
||||
return (void *)(previous);
|
||||
}
|
||||
|
||||
#define CK_PR_FAA(S, T) \
|
||||
CK_CC_INLINE static T \
|
||||
ck_pr_faa_##S(T *target, T delta) \
|
||||
{ \
|
||||
T previous; \
|
||||
\
|
||||
previous = __sync_fetch_and_add(target, delta); \
|
||||
\
|
||||
return (previous); \
|
||||
}
|
||||
|
||||
CK_PR_FAA(64, uint64_t)
|
||||
CK_PR_FAA(32, uint32_t)
|
||||
CK_PR_FAA(uint, unsigned int)
|
||||
CK_PR_FAA(int, int)
|
||||
|
||||
#undef CK_PR_FAA
|
||||
|
||||
#endif /* CK_PR_S390X_H */
|
|
@ -76,7 +76,7 @@ CK_PR_FENCE(store, "membar #StoreStore")
|
|||
CK_PR_FENCE(store_load, "membar #StoreLoad")
|
||||
CK_PR_FENCE(load, "membar #LoadLoad")
|
||||
CK_PR_FENCE(load_store, "membar #LoadStore")
|
||||
CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
|
||||
CK_PR_FENCE(memory, "membar #MemIssue")
|
||||
CK_PR_FENCE(acquire, "membar #LoadLoad | #LoadStore")
|
||||
CK_PR_FENCE(release, "membar #LoadStore | #StoreStore")
|
||||
CK_PR_FENCE(acqrel, "membar #LoadLoad | #LoadStore | #StoreStore")
|
||||
|
@ -136,11 +136,26 @@ CK_PR_STORE_S(int, int, "stsw")
|
|||
#undef CK_PR_STORE_S
|
||||
#undef CK_PR_STORE
|
||||
|
||||
/* Use the appropriate address space for atomics within the FreeBSD kernel. */
|
||||
#if defined(__FreeBSD__) && defined(_KERNEL)
|
||||
#include <sys/cdefs.h>
|
||||
#include <machine/atomic.h>
|
||||
#define CK_PR_INS_CAS "casa"
|
||||
#define CK_PR_INS_CASX "casxa"
|
||||
#define CK_PR_INS_SWAP "swapa"
|
||||
#define CK_PR_ASI_ATOMIC __XSTRING(__ASI_ATOMIC)
|
||||
#else
|
||||
#define CK_PR_INS_CAS "cas"
|
||||
#define CK_PR_INS_CASX "casx"
|
||||
#define CK_PR_INS_SWAP "swap"
|
||||
#define CK_PR_ASI_ATOMIC ""
|
||||
#endif
|
||||
|
||||
CK_CC_INLINE static bool
|
||||
ck_pr_cas_64_value(uint64_t *target, uint64_t compare, uint64_t set, uint64_t *value)
|
||||
{
|
||||
|
||||
__asm__ __volatile__("casx [%1], %2, %0"
|
||||
__asm__ __volatile__(CK_PR_INS_CASX " [%1] " CK_PR_ASI_ATOMIC ", %2, %0"
|
||||
: "+&r" (set)
|
||||
: "r" (target),
|
||||
"r" (compare)
|
||||
|
@ -154,7 +169,7 @@ CK_CC_INLINE static bool
|
|||
ck_pr_cas_64(uint64_t *target, uint64_t compare, uint64_t set)
|
||||
{
|
||||
|
||||
__asm__ __volatile__("casx [%1], %2, %0"
|
||||
__asm__ __volatile__(CK_PR_INS_CASX " [%1] " CK_PR_ASI_ATOMIC ", %2, %0"
|
||||
: "+&r" (set)
|
||||
: "r" (target),
|
||||
"r" (compare)
|
||||
|
@ -181,7 +196,7 @@ ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *previous)
|
|||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##N##_value(T *target, T compare, T set, T *value) \
|
||||
{ \
|
||||
__asm__ __volatile__("cas [%1], %2, %0" \
|
||||
__asm__ __volatile__(CK_PR_INS_CAS " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" \
|
||||
: "+&r" (set) \
|
||||
: "r" (target), \
|
||||
"r" (compare) \
|
||||
|
@ -192,7 +207,7 @@ ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *previous)
|
|||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##N(T *target, T compare, T set) \
|
||||
{ \
|
||||
__asm__ __volatile__("cas [%1], %2, %0" \
|
||||
__asm__ __volatile__(CK_PR_INS_CAS " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" \
|
||||
: "+&r" (set) \
|
||||
: "r" (target), \
|
||||
"r" (compare) \
|
||||
|
@ -211,7 +226,7 @@ CK_PR_CAS(int, int)
|
|||
ck_pr_fas_##N(T *target, T update) \
|
||||
{ \
|
||||
\
|
||||
__asm__ __volatile__("swap [%1], %0" \
|
||||
__asm__ __volatile__(CK_PR_INS_SWAP " [%1] " CK_PR_ASI_ATOMIC ", %0" \
|
||||
: "+&r" (update) \
|
||||
: "r" (target) \
|
||||
: "memory"); \
|
||||
|
@ -224,5 +239,10 @@ CK_PR_FAS(32, uint32_t)
|
|||
|
||||
#undef CK_PR_FAS
|
||||
|
||||
#undef CK_PR_INS_CAS
|
||||
#undef CK_PR_INS_CASX
|
||||
#undef CK_PR_INS_SWAP
|
||||
#undef CK_PR_ASI_ATOMIC
|
||||
|
||||
#endif /* CK_PR_SPARCV9_H */
|
||||
|
||||
|
|
|
@ -45,15 +45,9 @@
|
|||
/* Minimum requirements for the CK_PR interface are met. */
|
||||
#define CK_F_PR
|
||||
|
||||
#ifdef CK_MD_UMP
|
||||
#define CK_PR_LOCK_PREFIX
|
||||
#else
|
||||
#define CK_PR_LOCK_PREFIX "lock "
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Prevent speculative execution in busy-wait loops (P4 <=)
|
||||
* or "predefined delay".
|
||||
* Prevent speculative execution in busy-wait loops (P4 <=) or "predefined
|
||||
* delay".
|
||||
*/
|
||||
CK_CC_INLINE static void
|
||||
ck_pr_stall(void)
|
||||
|
@ -62,28 +56,52 @@ ck_pr_stall(void)
|
|||
return;
|
||||
}
|
||||
|
||||
#ifdef CK_MD_UMP
|
||||
#define CK_PR_LOCK_PREFIX
|
||||
#define CK_PR_FENCE(T, I) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_fence_strict_##T(void) \
|
||||
{ \
|
||||
__asm__ __volatile__("" ::: "memory"); \
|
||||
return; \
|
||||
}
|
||||
#else
|
||||
#define CK_PR_LOCK_PREFIX "lock "
|
||||
#define CK_PR_FENCE(T, I) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_fence_strict_##T(void) \
|
||||
{ \
|
||||
__asm__ __volatile__(I ::: "memory"); \
|
||||
return; \
|
||||
}
|
||||
#endif /* CK_MD_UMP */
|
||||
|
||||
CK_PR_FENCE(atomic, "sfence")
|
||||
CK_PR_FENCE(atomic_store, "sfence")
|
||||
CK_PR_FENCE(atomic_load, "mfence")
|
||||
CK_PR_FENCE(store_atomic, "sfence")
|
||||
CK_PR_FENCE(load_atomic, "mfence")
|
||||
CK_PR_FENCE(load, "lfence")
|
||||
CK_PR_FENCE(load_store, "mfence")
|
||||
CK_PR_FENCE(store, "sfence")
|
||||
CK_PR_FENCE(store_load, "mfence")
|
||||
CK_PR_FENCE(memory, "mfence")
|
||||
CK_PR_FENCE(release, "mfence")
|
||||
CK_PR_FENCE(acquire, "mfence")
|
||||
CK_PR_FENCE(acqrel, "mfence")
|
||||
CK_PR_FENCE(lock, "mfence")
|
||||
CK_PR_FENCE(unlock, "mfence")
|
||||
#if defined(CK_MD_SSE_DISABLE)
|
||||
/* If SSE is disabled, then use atomic operations for serialization. */
|
||||
#define CK_MD_X86_MFENCE "lock addl $0, (%%esp)"
|
||||
#define CK_MD_X86_SFENCE CK_MD_X86_MFENCE
|
||||
#define CK_MD_X86_LFENCE CK_MD_X86_MFENCE
|
||||
#else
|
||||
#define CK_MD_X86_SFENCE "sfence"
|
||||
#define CK_MD_X86_LFENCE "lfence"
|
||||
#define CK_MD_X86_MFENCE "mfence"
|
||||
#endif /* !CK_MD_SSE_DISABLE */
|
||||
|
||||
CK_PR_FENCE(atomic, "")
|
||||
CK_PR_FENCE(atomic_store, "")
|
||||
CK_PR_FENCE(atomic_load, "")
|
||||
CK_PR_FENCE(store_atomic, "")
|
||||
CK_PR_FENCE(load_atomic, "")
|
||||
CK_PR_FENCE(load, CK_MD_X86_LFENCE)
|
||||
CK_PR_FENCE(load_store, CK_MD_X86_MFENCE)
|
||||
CK_PR_FENCE(store, CK_MD_X86_SFENCE)
|
||||
CK_PR_FENCE(store_load, CK_MD_X86_MFENCE)
|
||||
CK_PR_FENCE(memory, CK_MD_X86_MFENCE)
|
||||
CK_PR_FENCE(release, CK_MD_X86_MFENCE)
|
||||
CK_PR_FENCE(acquire, CK_MD_X86_MFENCE)
|
||||
CK_PR_FENCE(acqrel, CK_MD_X86_MFENCE)
|
||||
CK_PR_FENCE(lock, CK_MD_X86_MFENCE)
|
||||
CK_PR_FENCE(unlock, CK_MD_X86_MFENCE)
|
||||
|
||||
#undef CK_PR_FENCE
|
||||
|
||||
|
@ -215,18 +233,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb")
|
|||
}
|
||||
|
||||
#define CK_PR_UNARY_V(K, S, T, C, I) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_##K##_##S##_zero(T *target, bool *r) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_##K##_##S##_is_zero(T *target) \
|
||||
{ \
|
||||
bool ret; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=m" (*r) \
|
||||
"=qm" (ret) \
|
||||
: \
|
||||
: "memory", "cc"); \
|
||||
return; \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
|
||||
#define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I)
|
||||
|
||||
#define CK_PR_GENERATE(K) \
|
||||
|
@ -289,8 +307,38 @@ CK_PR_GENERATE(xor)
|
|||
#undef CK_PR_BINARY
|
||||
|
||||
/*
|
||||
* Atomic compare and swap.
|
||||
* Atomic compare and swap, with a variant that sets *v to the old value of target.
|
||||
*/
|
||||
#ifdef __GCC_ASM_FLAG_OUTPUTS__
|
||||
#define CK_PR_CAS(S, M, T, C, I) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S(M *target, T compare, T set) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=@ccz" (z), \
|
||||
/* RAX is clobbered by cmpxchg. */ \
|
||||
"+a" (compare) \
|
||||
: "q" (set) \
|
||||
: "memory", "cc"); \
|
||||
return z; \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=@ccz" (z), \
|
||||
"+a" (compare) \
|
||||
: "q" (set) \
|
||||
: "memory", "cc"); \
|
||||
*(T *)v = compare; \
|
||||
return z; \
|
||||
}
|
||||
#else
|
||||
#define CK_PR_CAS(S, M, T, C, I) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S(M *target, T compare, T set) \
|
||||
|
@ -303,7 +351,23 @@ CK_PR_GENERATE(xor)
|
|||
"a" (compare) \
|
||||
: "memory", "cc"); \
|
||||
return z; \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
|
||||
"setz %1;" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=q" (z), \
|
||||
"+a" (compare) \
|
||||
: "q" (set) \
|
||||
: "memory", "cc"); \
|
||||
*(T *)v = compare; \
|
||||
return z; \
|
||||
}
|
||||
#endif
|
||||
|
||||
CK_PR_CAS(ptr, void, void *, char, "cmpxchgl")
|
||||
|
||||
|
@ -319,41 +383,6 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb")
|
|||
#undef CK_PR_CAS_S
|
||||
#undef CK_PR_CAS
|
||||
|
||||
/*
|
||||
* Compare and swap, set *v to old value of target.
|
||||
*/
|
||||
#define CK_PR_CAS_O(S, M, T, C, I, R) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \
|
||||
"mov %% " R ", %2;" \
|
||||
"setz %1;" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=a" (z), \
|
||||
"=m" (*(C *)v) \
|
||||
: "q" (set), \
|
||||
"a" (compare) \
|
||||
: "memory", "cc"); \
|
||||
return (bool)z; \
|
||||
}
|
||||
|
||||
CK_PR_CAS_O(ptr, void, void *, char, "l", "eax")
|
||||
|
||||
#define CK_PR_CAS_O_S(S, T, I, R) \
|
||||
CK_PR_CAS_O(S, T, T, T, I, R)
|
||||
|
||||
CK_PR_CAS_O_S(char, char, "b", "al")
|
||||
CK_PR_CAS_O_S(int, int, "l", "eax")
|
||||
CK_PR_CAS_O_S(uint, unsigned int, "l", "eax")
|
||||
CK_PR_CAS_O_S(32, uint32_t, "l", "eax")
|
||||
CK_PR_CAS_O_S(16, uint16_t, "w", "ax")
|
||||
CK_PR_CAS_O_S(8, uint8_t, "b", "al")
|
||||
|
||||
#undef CK_PR_CAS_O_S
|
||||
#undef CK_PR_CAS_O
|
||||
|
||||
/*
|
||||
* Atomic bit test operations.
|
||||
*/
|
||||
|
|
|
@ -58,8 +58,8 @@
|
|||
#endif
|
||||
|
||||
/*
|
||||
* Prevent speculative execution in busy-wait loops (P4 <=)
|
||||
* or "predefined delay".
|
||||
* Prevent speculative execution in busy-wait loops (P4 <=) or "predefined
|
||||
* delay".
|
||||
*/
|
||||
CK_CC_INLINE static void
|
||||
ck_pr_stall(void)
|
||||
|
@ -75,18 +75,39 @@ ck_pr_stall(void)
|
|||
__asm__ __volatile__(I ::: "memory"); \
|
||||
}
|
||||
|
||||
CK_PR_FENCE(atomic, "sfence")
|
||||
CK_PR_FENCE(atomic_store, "sfence")
|
||||
CK_PR_FENCE(atomic_load, "mfence")
|
||||
CK_PR_FENCE(store_atomic, "sfence")
|
||||
CK_PR_FENCE(load_atomic, "mfence")
|
||||
/* Atomic operations are always serializing. */
|
||||
CK_PR_FENCE(atomic, "")
|
||||
CK_PR_FENCE(atomic_store, "")
|
||||
CK_PR_FENCE(atomic_load, "")
|
||||
CK_PR_FENCE(store_atomic, "")
|
||||
CK_PR_FENCE(load_atomic, "")
|
||||
|
||||
/* Traditional fence interface. */
|
||||
CK_PR_FENCE(load, "lfence")
|
||||
CK_PR_FENCE(load_store, "mfence")
|
||||
CK_PR_FENCE(store, "sfence")
|
||||
CK_PR_FENCE(store_load, "mfence")
|
||||
CK_PR_FENCE(memory, "mfence")
|
||||
|
||||
/* Below are stdatomic-style fences. */
|
||||
|
||||
/*
|
||||
* Provides load-store and store-store ordering. However, Intel specifies that
|
||||
* the WC memory model is relaxed. It is likely an sfence *is* sufficient (in
|
||||
* particular, stores are not re-ordered with respect to prior loads and it is
|
||||
* really just the stores that are subject to re-ordering). However, we take
|
||||
* the conservative route as the manuals are too ambiguous for my taste.
|
||||
*/
|
||||
CK_PR_FENCE(release, "mfence")
|
||||
|
||||
/*
|
||||
* Provides load-load and load-store ordering. The lfence instruction ensures
|
||||
* all prior load operations are complete before any subsequent instructions
|
||||
* actually begin execution. However, the manual also ends up going to describe
|
||||
* WC memory as a relaxed model.
|
||||
*/
|
||||
CK_PR_FENCE(acquire, "mfence")
|
||||
|
||||
CK_PR_FENCE(acqrel, "mfence")
|
||||
CK_PR_FENCE(lock, "mfence")
|
||||
CK_PR_FENCE(unlock, "mfence")
|
||||
|
@ -311,18 +332,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb")
|
|||
}
|
||||
|
||||
#define CK_PR_UNARY_V(K, S, T, C, I) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_##K##_##S##_zero(T *target, bool *r) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_##K##_##S##_is_zero(T *target) \
|
||||
{ \
|
||||
bool ret; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=m" (*r) \
|
||||
"=rm" (ret) \
|
||||
: \
|
||||
: "memory", "cc"); \
|
||||
return; \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
|
||||
#define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I)
|
||||
|
||||
#define CK_PR_GENERATE(K) \
|
||||
|
@ -387,8 +408,38 @@ CK_PR_GENERATE(xor)
|
|||
#undef CK_PR_BINARY
|
||||
|
||||
/*
|
||||
* Atomic compare and swap.
|
||||
* Atomic compare and swap, with a variant that sets *v to the old value of target.
|
||||
*/
|
||||
#ifdef __GCC_ASM_FLAG_OUTPUTS__
|
||||
#define CK_PR_CAS(S, M, T, C, I) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S(M *target, T compare, T set) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=@ccz" (z), \
|
||||
/* RAX is clobbered by cmpxchg. */ \
|
||||
"+a" (compare) \
|
||||
: "q" (set) \
|
||||
: "memory", "cc"); \
|
||||
return z; \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=@ccz" (z), \
|
||||
"+a" (compare) \
|
||||
: "q" (set) \
|
||||
: "memory", "cc"); \
|
||||
*(T *)v = compare; \
|
||||
return z; \
|
||||
}
|
||||
#else
|
||||
#define CK_PR_CAS(S, M, T, C, I) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S(M *target, T compare, T set) \
|
||||
|
@ -401,7 +452,23 @@ CK_PR_GENERATE(xor)
|
|||
"a" (compare) \
|
||||
: "memory", "cc"); \
|
||||
return z; \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
|
||||
"setz %1;" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=q" (z), \
|
||||
"+a" (compare) \
|
||||
: "q" (set) \
|
||||
: "memory", "cc"); \
|
||||
*(T *)v = compare; \
|
||||
return z; \
|
||||
}
|
||||
#endif
|
||||
|
||||
CK_PR_CAS(ptr, void, void *, char, "cmpxchgq")
|
||||
|
||||
|
@ -421,45 +488,6 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb")
|
|||
#undef CK_PR_CAS_S
|
||||
#undef CK_PR_CAS
|
||||
|
||||
/*
|
||||
* Compare and swap, set *v to old value of target.
|
||||
*/
|
||||
#define CK_PR_CAS_O(S, M, T, C, I, R) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \
|
||||
"mov %% " R ", %2;" \
|
||||
"setz %1;" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=a" (z), \
|
||||
"=m" (*(C *)v) \
|
||||
: "q" (set), \
|
||||
"a" (compare) \
|
||||
: "memory", "cc"); \
|
||||
return z; \
|
||||
}
|
||||
|
||||
CK_PR_CAS_O(ptr, void, void *, char, "q", "rax")
|
||||
|
||||
#define CK_PR_CAS_O_S(S, T, I, R) \
|
||||
CK_PR_CAS_O(S, T, T, T, I, R)
|
||||
|
||||
CK_PR_CAS_O_S(char, char, "b", "al")
|
||||
CK_PR_CAS_O_S(int, int, "l", "eax")
|
||||
CK_PR_CAS_O_S(uint, unsigned int, "l", "eax")
|
||||
#ifndef CK_PR_DISABLE_DOUBLE
|
||||
CK_PR_CAS_O_S(double, double, "q", "rax")
|
||||
#endif
|
||||
CK_PR_CAS_O_S(64, uint64_t, "q", "rax")
|
||||
CK_PR_CAS_O_S(32, uint32_t, "l", "eax")
|
||||
CK_PR_CAS_O_S(16, uint16_t, "w", "ax")
|
||||
CK_PR_CAS_O_S(8, uint8_t, "b", "al")
|
||||
|
||||
#undef CK_PR_CAS_O_S
|
||||
#undef CK_PR_CAS_O
|
||||
|
||||
/*
|
||||
* Contrary to C-interface, alignment requirements are that of uint64_t[2].
|
||||
*/
|
||||
|
|
|
@ -111,7 +111,8 @@ ck_spinlock_dec_lock_eb(struct ck_spinlock_dec *lock)
|
|||
if (r == true)
|
||||
break;
|
||||
|
||||
ck_backoff_eb(&backoff);
|
||||
while (ck_pr_load_uint(&lock->value) != 1)
|
||||
ck_backoff_eb(&backoff);
|
||||
}
|
||||
|
||||
ck_pr_fence_lock();
|
||||
|
|
|
@ -77,10 +77,11 @@ CK_CC_INLINE static void
|
|||
ck_spinlock_fas_lock(struct ck_spinlock_fas *lock)
|
||||
{
|
||||
|
||||
while (ck_pr_fas_uint(&lock->value, true) == true) {
|
||||
while (ck_pr_load_uint(&lock->value) == true)
|
||||
ck_pr_stall();
|
||||
}
|
||||
while (CK_CC_UNLIKELY(ck_pr_fas_uint(&lock->value, true) == true)) {
|
||||
do {
|
||||
ck_pr_stall();
|
||||
} while (ck_pr_load_uint(&lock->value) == true);
|
||||
}
|
||||
|
||||
ck_pr_fence_lock();
|
||||
return;
|
||||
|
|
|
@ -81,6 +81,8 @@ ck_spinlock_hclh_lock(struct ck_spinlock_hclh **glob_queue,
|
|||
thread->wait = true;
|
||||
thread->splice = false;
|
||||
thread->cluster_id = (*local_queue)->cluster_id;
|
||||
/* Make sure previous->previous doesn't appear to be NULL */
|
||||
thread->previous = *local_queue;
|
||||
|
||||
/* Serialize with respect to update of local queue. */
|
||||
ck_pr_fence_store_atomic();
|
||||
|
@ -91,13 +93,15 @@ ck_spinlock_hclh_lock(struct ck_spinlock_hclh **glob_queue,
|
|||
|
||||
/* Wait until previous thread from the local queue is done with lock. */
|
||||
ck_pr_fence_load();
|
||||
if (previous->previous != NULL &&
|
||||
previous->cluster_id == thread->cluster_id) {
|
||||
while (ck_pr_load_uint(&previous->wait) == true)
|
||||
if (previous->previous != NULL) {
|
||||
while (ck_pr_load_uint(&previous->wait) == true &&
|
||||
ck_pr_load_int(&previous->cluster_id) == thread->cluster_id &&
|
||||
ck_pr_load_uint(&previous->splice) == false)
|
||||
ck_pr_stall();
|
||||
|
||||
/* We're head of the global queue, we're done */
|
||||
if (ck_pr_load_uint(&previous->splice) == false)
|
||||
if (ck_pr_load_int(&previous->cluster_id) == thread->cluster_id &&
|
||||
ck_pr_load_uint(&previous->splice) == false)
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue