Merging upstream version 0.7.1 (Closes: #991419).
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
05c588e9d7
commit
9e09e0ef69
99 changed files with 6727 additions and 943 deletions
|
@ -58,8 +58,8 @@
|
|||
#endif
|
||||
|
||||
/*
|
||||
* Prevent speculative execution in busy-wait loops (P4 <=)
|
||||
* or "predefined delay".
|
||||
* Prevent speculative execution in busy-wait loops (P4 <=) or "predefined
|
||||
* delay".
|
||||
*/
|
||||
CK_CC_INLINE static void
|
||||
ck_pr_stall(void)
|
||||
|
@ -75,18 +75,39 @@ ck_pr_stall(void)
|
|||
__asm__ __volatile__(I ::: "memory"); \
|
||||
}
|
||||
|
||||
CK_PR_FENCE(atomic, "sfence")
|
||||
CK_PR_FENCE(atomic_store, "sfence")
|
||||
CK_PR_FENCE(atomic_load, "mfence")
|
||||
CK_PR_FENCE(store_atomic, "sfence")
|
||||
CK_PR_FENCE(load_atomic, "mfence")
|
||||
/* Atomic operations are always serializing. */
|
||||
CK_PR_FENCE(atomic, "")
|
||||
CK_PR_FENCE(atomic_store, "")
|
||||
CK_PR_FENCE(atomic_load, "")
|
||||
CK_PR_FENCE(store_atomic, "")
|
||||
CK_PR_FENCE(load_atomic, "")
|
||||
|
||||
/* Traditional fence interface. */
|
||||
CK_PR_FENCE(load, "lfence")
|
||||
CK_PR_FENCE(load_store, "mfence")
|
||||
CK_PR_FENCE(store, "sfence")
|
||||
CK_PR_FENCE(store_load, "mfence")
|
||||
CK_PR_FENCE(memory, "mfence")
|
||||
|
||||
/* Below are stdatomic-style fences. */
|
||||
|
||||
/*
|
||||
* Provides load-store and store-store ordering. However, Intel specifies that
|
||||
* the WC memory model is relaxed. It is likely an sfence *is* sufficient (in
|
||||
* particular, stores are not re-ordered with respect to prior loads and it is
|
||||
* really just the stores that are subject to re-ordering). However, we take
|
||||
* the conservative route as the manuals are too ambiguous for my taste.
|
||||
*/
|
||||
CK_PR_FENCE(release, "mfence")
|
||||
|
||||
/*
|
||||
* Provides load-load and load-store ordering. The lfence instruction ensures
|
||||
* all prior load operations are complete before any subsequent instructions
|
||||
* actually begin execution. However, the manual also ends up going to describe
|
||||
* WC memory as a relaxed model.
|
||||
*/
|
||||
CK_PR_FENCE(acquire, "mfence")
|
||||
|
||||
CK_PR_FENCE(acqrel, "mfence")
|
||||
CK_PR_FENCE(lock, "mfence")
|
||||
CK_PR_FENCE(unlock, "mfence")
|
||||
|
@ -311,18 +332,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb")
|
|||
}
|
||||
|
||||
#define CK_PR_UNARY_V(K, S, T, C, I) \
|
||||
CK_CC_INLINE static void \
|
||||
ck_pr_##K##_##S##_zero(T *target, bool *r) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_##K##_##S##_is_zero(T *target) \
|
||||
{ \
|
||||
bool ret; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=m" (*r) \
|
||||
"=rm" (ret) \
|
||||
: \
|
||||
: "memory", "cc"); \
|
||||
return; \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
|
||||
#define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I)
|
||||
|
||||
#define CK_PR_GENERATE(K) \
|
||||
|
@ -387,8 +408,38 @@ CK_PR_GENERATE(xor)
|
|||
#undef CK_PR_BINARY
|
||||
|
||||
/*
|
||||
* Atomic compare and swap.
|
||||
* Atomic compare and swap, with a variant that sets *v to the old value of target.
|
||||
*/
|
||||
#ifdef __GCC_ASM_FLAG_OUTPUTS__
|
||||
#define CK_PR_CAS(S, M, T, C, I) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S(M *target, T compare, T set) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=@ccz" (z), \
|
||||
/* RAX is clobbered by cmpxchg. */ \
|
||||
"+a" (compare) \
|
||||
: "q" (set) \
|
||||
: "memory", "cc"); \
|
||||
return z; \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=@ccz" (z), \
|
||||
"+a" (compare) \
|
||||
: "q" (set) \
|
||||
: "memory", "cc"); \
|
||||
*(T *)v = compare; \
|
||||
return z; \
|
||||
}
|
||||
#else
|
||||
#define CK_PR_CAS(S, M, T, C, I) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S(M *target, T compare, T set) \
|
||||
|
@ -401,7 +452,23 @@ CK_PR_GENERATE(xor)
|
|||
"a" (compare) \
|
||||
: "memory", "cc"); \
|
||||
return z; \
|
||||
} \
|
||||
\
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
|
||||
"setz %1;" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=q" (z), \
|
||||
"+a" (compare) \
|
||||
: "q" (set) \
|
||||
: "memory", "cc"); \
|
||||
*(T *)v = compare; \
|
||||
return z; \
|
||||
}
|
||||
#endif
|
||||
|
||||
CK_PR_CAS(ptr, void, void *, char, "cmpxchgq")
|
||||
|
||||
|
@ -421,45 +488,6 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb")
|
|||
#undef CK_PR_CAS_S
|
||||
#undef CK_PR_CAS
|
||||
|
||||
/*
|
||||
* Compare and swap, set *v to old value of target.
|
||||
*/
|
||||
#define CK_PR_CAS_O(S, M, T, C, I, R) \
|
||||
CK_CC_INLINE static bool \
|
||||
ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
|
||||
{ \
|
||||
bool z; \
|
||||
__asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \
|
||||
"mov %% " R ", %2;" \
|
||||
"setz %1;" \
|
||||
: "+m" (*(C *)target), \
|
||||
"=a" (z), \
|
||||
"=m" (*(C *)v) \
|
||||
: "q" (set), \
|
||||
"a" (compare) \
|
||||
: "memory", "cc"); \
|
||||
return z; \
|
||||
}
|
||||
|
||||
CK_PR_CAS_O(ptr, void, void *, char, "q", "rax")
|
||||
|
||||
#define CK_PR_CAS_O_S(S, T, I, R) \
|
||||
CK_PR_CAS_O(S, T, T, T, I, R)
|
||||
|
||||
CK_PR_CAS_O_S(char, char, "b", "al")
|
||||
CK_PR_CAS_O_S(int, int, "l", "eax")
|
||||
CK_PR_CAS_O_S(uint, unsigned int, "l", "eax")
|
||||
#ifndef CK_PR_DISABLE_DOUBLE
|
||||
CK_PR_CAS_O_S(double, double, "q", "rax")
|
||||
#endif
|
||||
CK_PR_CAS_O_S(64, uint64_t, "q", "rax")
|
||||
CK_PR_CAS_O_S(32, uint32_t, "l", "eax")
|
||||
CK_PR_CAS_O_S(16, uint16_t, "w", "ax")
|
||||
CK_PR_CAS_O_S(8, uint8_t, "b", "al")
|
||||
|
||||
#undef CK_PR_CAS_O_S
|
||||
#undef CK_PR_CAS_O
|
||||
|
||||
/*
|
||||
* Contrary to C-interface, alignment requirements are that of uint64_t[2].
|
||||
*/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue