Fix __atomic to not implement atomic loads with CAS.

Message ID	1485802440.16721.118.camel@redhat.com
State	New
Headers	show Return-Path: <gcc-patches-return-447491-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:subject:from:to:cc:date:content-type:mime-version; q=dns; s=default; b=Yi6MihxcQIN7DYDPHbCUvPKCOm7zXQlRA4oZPYwkoJ0 kC8teXl21WLEbwJdB4NcDZTyX8JtTN/5G8E6fqNp5l1qQf03DWVkMFiJ2Z9/RHED qvMD1OZOWC12JQGdAvz2P/mRtC9thO4vqLFQAn4H1hZgLea3GMV+AI6dYYd18Wik = Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Message-ID: <1485802440.16721.118.camel@redhat.com> Subject: [PATCH] Fix __atomic to not implement atomic loads with CAS. From: Torvald Riegel <triegel@redhat.com> To: GCC Patches <gcc-patches@gcc.gnu.org> Cc: Richard Henderson <rth@redhat.com>, Jakub Jelinek <jakub@redhat.com>, Szabolcs Nagy <szabolcs.nagy@arm.com> Date: Mon, 30 Jan 2017 19:54:00 +0100 Content-Type: multipart/mixed; boundary="=-PhCTvnd0cvjRKzek06ps" Mime-Version: 1.0

commit 1db13cb386e673d5265bcaf2d70fc25dda22e5fd Author: Torvald Riegel <triegel@redhat.com> Date: Fri Jan 27 17:40:44 2017 +0100 Fix __atomic to not implement atomic loads with CAS. gcc/ * builtins.c (fold_builtin_atomic_always_lock_free): Make "lock-free" conditional on existance of a fast atomic load. * optabs-query.c (can_atomic_load_p): New function. * optabs-query.h (can_atomic_load_p): Declare it. * optabs.c (expand_atomic_exchange): Always delegate to libatomic if no fast atomic load is available for the particular size of access. (expand_atomic_compare_and_swap): Likewise. (expand_atomic_load): Likewise. (expand_atomic_store): Likewise. (expand_atomic_fetch_op): Likewise. * testsuite/lib/target-supports.exp (check_effective_target_sync_int_128): Remove x86 because it provides no fast atomic load. (check_effective_target_sync_int_128_runtime): Likewise. libatomic/ * acinclude.m4: Add #define FAST_ATOMIC_LDST_*. * auto-config.h.in: Regenerate. * config/x86/host-config.h (FAST_ATOMIC_LDST_16): Define to 0. (atomic_compare_exchange_n): New. * glfree.c (EXACT, LARGER): Change condition and add comments. diff --git a/gcc/builtins.c b/gcc/builtins.c index bf68e31..0a0e8b9 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -6157,8 +6157,9 @@ fold_builtin_atomic_always_lock_free (tree arg0, tree arg1) /* Check if a compare_and_swap pattern exists for the mode which represents the required size. The pattern is not allowed to fail, so the existence - of the pattern indicates support is present. */ - if (can_compare_and_swap_p (mode, true)) + of the pattern indicates support is present. Also require that an + atomic load exists for the required size. */ + if (can_compare_and_swap_p (mode, true) && can_atomic_load_p (mode)) return boolean_true_node; else return boolean_false_node; diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 6c34a4e..4899333 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -584,6 +584,25 @@ can_atomic_exchange_p (machine_mode mode, bool allow_libcall) return can_compare_and_swap_p (mode, allow_libcall); } +/* Return true if an atomic load can be performed without falling back to + a compare-and-swap. */ + +bool +can_atomic_load_p (machine_mode mode) +{ + enum insn_code icode; + + /* Does the target supports the load directly? */ + icode = direct_optab_handler (atomic_load_optab, mode); + if (icode != CODE_FOR_nothing) + return true; + + /* If the size of the object is greater than word size on this target, + then we assume that a load will not be atomic. Also see + expand_atomic_load. */ + return GET_MODE_PRECISION (mode) <= BITS_PER_WORD; +} + /* Determine whether "1 << x" is relatively cheap in word_mode. */ bool diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index a80a0e7..e85a7f1 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -176,6 +176,7 @@ int can_mult_highpart_p (machine_mode, bool); bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool); bool can_compare_and_swap_p (machine_mode, bool); bool can_atomic_exchange_p (machine_mode, bool); +bool can_atomic_load_p (machine_mode); bool lshift_cheap_p (bool); #endif diff --git a/gcc/optabs.c b/gcc/optabs.c index d8831a8..1afd593 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -6086,8 +6086,15 @@ expand_atomic_test_and_set (rtx target, rtx mem, enum memmodel model) rtx expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model) { + machine_mode mode = GET_MODE (mem); rtx ret; + /* If loads are not atomic for the required size and we are not called to + provide a __sync builtin, do not do anything so that we stay consistent + with atomic loads of the same size. */ + if (!can_atomic_load_p (mode) && !is_mm_sync (model)) + return NULL_RTX; + ret = maybe_emit_atomic_exchange (target, mem, val, model); /* Next try a compare-and-swap loop for the exchange. */ @@ -6121,6 +6128,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, rtx target_oval, target_bool = NULL_RTX; rtx libfunc; + /* If loads are not atomic for the required size and we are not called to + provide a __sync builtin, do not do anything so that we stay consistent + with atomic loads of the same size. */ + if (!can_atomic_load_p (mode) && !is_mm_sync (succ_model)) + return false; + /* Load expected into a register for the compare and swap. */ if (MEM_P (expected)) expected = copy_to_reg (expected); @@ -6316,19 +6329,13 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model) } /* If the size of the object is greater than word size on this target, - then we assume that a load will not be atomic. */ + then we assume that a load will not be atomic. We could try to + emulate a load with a compare-and-swap operation, but the store that + doing this could result in would be incorrect if this is a volatile + atomic load or targetting read-only-mapped memory. */ if (GET_MODE_PRECISION (mode) > BITS_PER_WORD) - { - /* Issue val = compare_and_swap (mem, 0, 0). - This may cause the occasional harmless store of 0 when the value is - already 0, but it seems to be OK according to the standards guys. */ - if (expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx, - const0_rtx, false, model, model)) - return target; - else - /* Otherwise there is no atomic load, leave the library call. */ - return NULL_RTX; - } + /* If there is no atomic load, leave the library call. */ + return NULL_RTX; /* Otherwise assume loads are atomic, and emit the proper barriers. */ if (!target || target == const0_rtx) @@ -6370,7 +6377,9 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) return const0_rtx; } - /* If using __sync_lock_release is a viable alternative, try it. */ + /* If using __sync_lock_release is a viable alternative, try it. + Note that this will not be set to true if we are expanding a generic + __atomic_store_n. */ if (use_release) { icode = direct_optab_handler (sync_lock_release_optab, mode); @@ -6389,16 +6398,22 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) } /* If the size of the object is greater than word size on this target, - a default store will not be atomic, Try a mem_exchange and throw away - the result. If that doesn't work, don't do anything. */ + a default store will not be atomic. */ if (GET_MODE_PRECISION (mode) > BITS_PER_WORD) { - rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model); - if (!target) - target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, val); - if (target) - return const0_rtx; - else + /* If loads are atomic or we are called to provide a __sync builtin, + we can try a atomic_exchange and throw away the result. Otherwise, + don't do anything so that we do not create an inconsistency between + loads and stores. */ + if (can_atomic_load_p (mode) || is_mm_sync (model)) + { + rtx target = maybe_emit_atomic_exchange (NULL_RTX, mem, val, model); + if (!target) + target = maybe_emit_compare_and_swap_exchange_loop (NULL_RTX, mem, + val); + if (target) + return const0_rtx; + } return NULL_RTX; } @@ -6713,6 +6728,12 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code, rtx result; bool unused_result = (target == const0_rtx); + /* If loads are not atomic for the required size and we are not called to + provide a __sync builtin, do not do anything so that we stay consistent + with atomic loads of the same size. */ + if (!can_atomic_load_p (mode) && !is_mm_sync (model)) + return NULL_RTX; + result = expand_atomic_fetch_op_no_fallback (target, mem, val, code, model, after); diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 95a1c50..7a26008 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6514,9 +6514,7 @@ proc check_effective_target_section_anchors { } { # Return 1 if the target supports atomic operations on "int_128" values. proc check_effective_target_sync_int_128 { } { - if { (([istarget i?86-*-*] || [istarget x86_64-*-*]) - && ![is-effective-target ia32]) - || [istarget spu-*-*] } { + if { [istarget spu-*-*] } { return 1 } else { return 0 @@ -6525,23 +6523,10 @@ proc check_effective_target_sync_int_128 { } { # Return 1 if the target supports atomic operations on "int_128" values # and can execute them. +# This requires support for both compare-and-swap and true atomic loads. proc check_effective_target_sync_int_128_runtime { } { - if { (([istarget i?86-*-*] || [istarget x86_64-*-*]) - && ![is-effective-target ia32] - && [check_cached_effective_target sync_int_128_available { - check_runtime_nocache sync_int_128_available { - #include "cpuid.h" - int main () - { - unsigned int eax, ebx, ecx, edx; - if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)) - return !(ecx & bit_CMPXCHG16B); - return 1; - } - } "" - }]) - || [istarget spu-*-*] } { + if { [istarget spu-*-*] } { return 1 } else { return 0 diff --git a/libatomic/acinclude.m4 b/libatomic/acinclude.m4 index a86e52b..485d731 100644 --- a/libatomic/acinclude.m4 +++ b/libatomic/acinclude.m4 @@ -96,6 +96,7 @@ AC_DEFUN([LIBAT_HAVE_ATOMIC_LOADSTORE],[ LIBAT_DEFINE_YESNO([HAVE_ATOMIC_LDST_$2], [$libat_cv_have_at_ldst_$2], [Have __atomic_load/store for $2 byte integers.]) AH_BOTTOM([#define MAYBE_HAVE_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2]) + AH_BOTTOM([#define FAST_ATOMIC_LDST_$2 HAVE_ATOMIC_LDST_$2]) ]) dnl diff --git a/libatomic/auto-config.h.in b/libatomic/auto-config.h.in index 83e54e2..d5b8a26 100644 --- a/libatomic/auto-config.h.in +++ b/libatomic/auto-config.h.in @@ -222,6 +222,16 @@ #define MAYBE_HAVE_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1 +#define FAST_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16 + +#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1 + +#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2 + +#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4 + +#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8 + #define MAYBE_HAVE_ATOMIC_TAS_16 HAVE_ATOMIC_TAS_16 #define MAYBE_HAVE_ATOMIC_EXCHANGE_1 HAVE_ATOMIC_EXCHANGE_1 @@ -232,6 +242,8 @@ #define MAYBE_HAVE_ATOMIC_EXCHANGE_8 HAVE_ATOMIC_EXCHANGE_8 +#define FAST_ATOMIC_LDST_1 HAVE_ATOMIC_LDST_1 + #define MAYBE_HAVE_ATOMIC_EXCHANGE_16 HAVE_ATOMIC_EXCHANGE_16 #define MAYBE_HAVE_ATOMIC_CAS_1 HAVE_ATOMIC_CAS_1 @@ -242,8 +254,6 @@ #define MAYBE_HAVE_ATOMIC_CAS_8 HAVE_ATOMIC_CAS_8 -#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2 - #define MAYBE_HAVE_ATOMIC_CAS_16 HAVE_ATOMIC_CAS_16 #define MAYBE_HAVE_ATOMIC_FETCH_ADD_1 HAVE_ATOMIC_FETCH_ADD_1 @@ -254,6 +264,8 @@ #define MAYBE_HAVE_ATOMIC_FETCH_ADD_8 HAVE_ATOMIC_FETCH_ADD_8 +#define MAYBE_HAVE_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2 + #define MAYBE_HAVE_ATOMIC_FETCH_ADD_16 HAVE_ATOMIC_FETCH_ADD_16 #define MAYBE_HAVE_ATOMIC_FETCH_OP_1 HAVE_ATOMIC_FETCH_OP_1 @@ -264,22 +276,20 @@ #define MAYBE_HAVE_ATOMIC_FETCH_OP_8 HAVE_ATOMIC_FETCH_OP_8 -#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4 - #define MAYBE_HAVE_ATOMIC_FETCH_OP_16 HAVE_ATOMIC_FETCH_OP_16 #ifndef WORDS_BIGENDIAN #define WORDS_BIGENDIAN 0 #endif -#define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8 +#define FAST_ATOMIC_LDST_2 HAVE_ATOMIC_LDST_2 -#define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16 +#define MAYBE_HAVE_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4 -#define MAYBE_HAVE_ATOMIC_TAS_1 HAVE_ATOMIC_TAS_1 +#define FAST_ATOMIC_LDST_4 HAVE_ATOMIC_LDST_4 -#define MAYBE_HAVE_ATOMIC_TAS_2 HAVE_ATOMIC_TAS_2 +#define MAYBE_HAVE_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8 -#define MAYBE_HAVE_ATOMIC_TAS_4 HAVE_ATOMIC_TAS_4 +#define FAST_ATOMIC_LDST_8 HAVE_ATOMIC_LDST_8 -#define MAYBE_HAVE_ATOMIC_TAS_8 HAVE_ATOMIC_TAS_8 +#define MAYBE_HAVE_ATOMIC_LDST_16 HAVE_ATOMIC_LDST_16 diff --git a/libatomic/config/x86/host-config.h b/libatomic/config/x86/host-config.h index 5754db4..2e9f85a 100644 --- a/libatomic/config/x86/host-config.h +++ b/libatomic/config/x86/host-config.h @@ -47,6 +47,9 @@ extern unsigned int libat_feat1_edx HIDDEN; # define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_1 # undef MAYBE_HAVE_ATOMIC_LDST_16 # define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_1 +/* Since load and store are implemented with CAS, they are not fast. */ +# undef FAST_ATOMIC_LDST_16 +# define FAST_ATOMIC_LDST_16 0 # if IFUNC_ALT == 1 # undef HAVE_ATOMIC_CAS_16 # define HAVE_ATOMIC_CAS_16 1 @@ -64,6 +67,21 @@ extern unsigned int libat_feat1_edx HIDDEN; # endif #endif +#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1 +static inline bool +atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval, + bool weak_p UNUSED, int sm UNUSED, int fm UNUSED) +{ + UTYPE cmpval = *eptr; + UTYPE oldval = __sync_val_compare_and_swap_16 (mptr, cmpval, newval); + if (oldval == cmpval) + return true; + *eptr = oldval; + return false; +} +# define atomic_compare_exchange_n atomic_compare_exchange_n +#endif /* Have CAS 16 */ + #endif /* HAVE_IFUNC */ #include_next <host-config.h> diff --git a/libatomic/glfree.c b/libatomic/glfree.c index b68dec7..59fe533 100644 --- a/libatomic/glfree.c +++ b/libatomic/glfree.c @@ -24,26 +24,41 @@ #include "libatomic_i.h" - +/* Accesses with a power-of-two size are not lock-free if we don't have an + integer type of this size or if they are not naturally aligned. They + are lock-free if such a naturally aligned access is always lock-free + according to the compiler, which requires that both atomic loads and CAS + are available. + In all other cases, we fall through to LARGER (see below). */ #define EXACT(N) \ do { \ if (!C2(HAVE_INT,N)) break; \ if ((uintptr_t)ptr & (N - 1)) break; \ if (__atomic_always_lock_free(N, 0)) return true; \ - if (C2(MAYBE_HAVE_ATOMIC_CAS_,N)) return true; \ + if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break; \ + if (C2(FAST_ATOMIC_LDST_,N)) return true; \ } while (0) +/* We next check to see if an access of a larger size is lock-free. We use + a similar check as in EXACT, except that we also check that the alignment + of the access is so that the data to be accessed is completely covered + by the larger access. */ #define LARGER(N) \ do { \ uintptr_t r = (uintptr_t)ptr & (N - 1); \ if (!C2(HAVE_INT,N)) break; \ - if (!C2(HAVE_ATOMIC_LDST_,N)) break; \ + if (!C2(FAST_ATOMIC_LDST_,N)) break; \ if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break; \ if (r + n <= N) return true; \ } while (0) +/* Note that this can return that a size/alignment is not lock-free even if + all the operations that we use to implement the respective accesses provide + lock-free forward progress as specified in C++14: Users likely expect + "lock-free" to also mean "fast", which is why we do not return true if, for + example, we implement loads with this size/alignment using a CAS. */ bool libat_is_lock_free (size_t n, void *ptr) {

Fix __atomic to not implement atomic loads with CAS.

Commit Message

Comments

Patch