Patchwork Support __sync libcalls

login
register
mail settings
Submitter Richard Henderson
Date Nov. 7, 2011, 11:25 p.m.
Message ID <4EB868E4.90101@redhat.com>
Download mbox | patch
Permalink /patch/124214/
State New
Headers show

Comments

Richard Henderson - Nov. 7, 2011, 11:25 p.m.
A slight revision of the previous version.  Updated for mainline,
and tested on x86_64 and arm.

This time I also saw some more references to the sync_c_a_s optab
in the java front end.  Now updated to use can_compare_and_swap_p.

Follow-on patches will invoke init_sync_libfuncs for arm, pa, mips, sh.

Committed.


r~
commit 29139cdc71a9f22d6eee8c3d7a5bcc1539c63b0b
Author: rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Nov 7 23:11:55 2011 +0000

    Allow libcalls to be installed for legacy __sync optabs.
    
    This allows a target which implements the __sync interfaces
    in libgcc to continue to use them transparently with the
    new __atomic builtins.
    
    It is assumed that these libgcc routines DO NOT use spinlocks.
    This is true of all extant libgcc instances.
    
            * optabs.h (OTI_sync_compare_and_swap, OTI_sync_lock_test_and_set,
            OTI_sync_old_add, OTI_sync_old_sub, OTI_sync_old_ior,
            OTI_sync_old_and, OTI_sync_old_xor, OTI_sync_old_nand,
            OTI_sync_new_add, OTI_sync_new_sub, OTI_sync_new_ior,
            OTI_sync_new_and, OTI_sync_new_xor, OTI_sync_new_nand): Move and
            rename from the direct_optab_index enum.
            (sync_compare_and_swap_optab, sync_lock_test_and_set_optab,
            sync_old_add_optab, sync_old_sub_optab, sync_old_ior_optab,
            sync_old_and_optab, sync_old_xor_optab, sync_old_nand_optab,
            sync_new_add_optab, sync_new_sub_optab, sync_new_ior_optab,
            sync_new_and_optab, sync_new_xor_optab, sync_new_nand_optab): Read
            from the optab_table, not the direct_optab_table.
            (init_sync_libfuncs): Declare.
            (can_compare_and_swap_p): Update parameters.
            * optabs.c (init_sync_libfuncs_1, init_sync_libfuncs): New.
            (can_compare_and_swap_p): Add allow_libcall parameter; if true,
            test for the legacy compare-and-swap libcall.
            (expand_atomic_exchange): Use the legacy test-and-set libcall.
            (expand_atomic_compare_and_swap): Use the legacy CAS libcall.
            (struct atomic_op_functions): Update for optab type changes.
            (maybe_emit_op): Likewise.
            (expand_atomic_fetch_op): Use the legacy fetch-op libcalls.
            * builtins.c (fold_builtin_atomic_always_lock_free): Update call
            to can_compare_and_swap_p.
            * omp-low.c (expand_omp_atomic_fetch_op): Likewise.
            (expand_omp_atomic_pipeline): Likewise.
            * genopinit.c (optabs): Make sync_old_*_optab, sync_new_*_optab,
            sync_compare_and_swap_optab, sync_lock_test_and_set_optab regular
            optabs.
	    * doc/md.texi (sync_compare_and_swap): Update docs for libcalls.

Patch

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 205d586..5162927 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5512,7 +5512,7 @@  fold_builtin_atomic_always_lock_free (tree arg0, tree arg1)
   /* Check if a compare_and_swap pattern exists for the mode which represents
      the required size.  The pattern is not allowed to fail, so the existence
      of the pattern indicates support is present.  */
-  if (can_compare_and_swap_p (mode))
+  if (can_compare_and_swap_p (mode, true))
     return integer_one_node;
   else
     return integer_zero_node;
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6b75f2b..64ad833 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5595,6 +5595,17 @@  be able to take the destination of the @code{MODE_CC} set and pass it
 to the @code{cbranchcc4} or @code{cstorecc4} pattern as the first
 operand of the comparison (the second will be @code{(const_int 0)}).
 
+For targets where the operating system may provide support for this
+operation via library calls, the @code{sync_compare_and_swap_optab}
+may be initialized to a function with the same interface as the
+@code{__sync_val_compare_and_swap_@var{n}} built-in.  If the entire
+set of @var{__sync} builtins are supported via library calls, the
+target can initialize all of the optabs at once with
+@code{init_sync_libfuncs}.
+For the purposes of C++11 @code{std::atomic::is_lock_free}, it is
+assumed that these library calls do @emph{not} use any kind of
+interruptable locking.
+
 @cindex @code{sync_add@var{mode}} instruction pattern
 @cindex @code{sync_sub@var{mode}} instruction pattern
 @cindex @code{sync_ior@var{mode}} instruction pattern
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index 44eba24..63c58a8 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -228,20 +228,20 @@  static const char * const optabs[] =
   "set_direct_optab_handler (sync_and_optab, $A, CODE_FOR_$(sync_and$I$a$))",
   "set_direct_optab_handler (sync_xor_optab, $A, CODE_FOR_$(sync_xor$I$a$))",
   "set_direct_optab_handler (sync_nand_optab, $A, CODE_FOR_$(sync_nand$I$a$))",
-  "set_direct_optab_handler (sync_old_add_optab, $A, CODE_FOR_$(sync_old_add$I$a$))",
-  "set_direct_optab_handler (sync_old_sub_optab, $A, CODE_FOR_$(sync_old_sub$I$a$))",
-  "set_direct_optab_handler (sync_old_ior_optab, $A, CODE_FOR_$(sync_old_ior$I$a$))",
-  "set_direct_optab_handler (sync_old_and_optab, $A, CODE_FOR_$(sync_old_and$I$a$))",
-  "set_direct_optab_handler (sync_old_xor_optab, $A, CODE_FOR_$(sync_old_xor$I$a$))",
-  "set_direct_optab_handler (sync_old_nand_optab, $A, CODE_FOR_$(sync_old_nand$I$a$))",
-  "set_direct_optab_handler (sync_new_add_optab, $A, CODE_FOR_$(sync_new_add$I$a$))",
-  "set_direct_optab_handler (sync_new_sub_optab, $A, CODE_FOR_$(sync_new_sub$I$a$))",
-  "set_direct_optab_handler (sync_new_ior_optab, $A, CODE_FOR_$(sync_new_ior$I$a$))",
-  "set_direct_optab_handler (sync_new_and_optab, $A, CODE_FOR_$(sync_new_and$I$a$))",
-  "set_direct_optab_handler (sync_new_xor_optab, $A, CODE_FOR_$(sync_new_xor$I$a$))",
-  "set_direct_optab_handler (sync_new_nand_optab, $A, CODE_FOR_$(sync_new_nand$I$a$))",
-  "set_direct_optab_handler (sync_compare_and_swap_optab, $A, CODE_FOR_$(sync_compare_and_swap$I$a$))",
-  "set_direct_optab_handler (sync_lock_test_and_set_optab, $A, CODE_FOR_$(sync_lock_test_and_set$I$a$))",
+  "set_optab_handler (sync_old_add_optab, $A, CODE_FOR_$(sync_old_add$I$a$))",
+  "set_optab_handler (sync_old_sub_optab, $A, CODE_FOR_$(sync_old_sub$I$a$))",
+  "set_optab_handler (sync_old_ior_optab, $A, CODE_FOR_$(sync_old_ior$I$a$))",
+  "set_optab_handler (sync_old_and_optab, $A, CODE_FOR_$(sync_old_and$I$a$))",
+  "set_optab_handler (sync_old_xor_optab, $A, CODE_FOR_$(sync_old_xor$I$a$))",
+  "set_optab_handler (sync_old_nand_optab, $A, CODE_FOR_$(sync_old_nand$I$a$))",
+  "set_optab_handler (sync_new_add_optab, $A, CODE_FOR_$(sync_new_add$I$a$))",
+  "set_optab_handler (sync_new_sub_optab, $A, CODE_FOR_$(sync_new_sub$I$a$))",
+  "set_optab_handler (sync_new_ior_optab, $A, CODE_FOR_$(sync_new_ior$I$a$))",
+  "set_optab_handler (sync_new_and_optab, $A, CODE_FOR_$(sync_new_and$I$a$))",
+  "set_optab_handler (sync_new_xor_optab, $A, CODE_FOR_$(sync_new_xor$I$a$))",
+  "set_optab_handler (sync_new_nand_optab, $A, CODE_FOR_$(sync_new_nand$I$a$))",
+  "set_optab_handler (sync_compare_and_swap_optab, $A, CODE_FOR_$(sync_compare_and_swap$I$a$))",
+  "set_optab_handler (sync_lock_test_and_set_optab, $A, CODE_FOR_$(sync_lock_test_and_set$I$a$))",
   "set_direct_optab_handler (sync_lock_release_optab, $A, CODE_FOR_$(sync_lock_release$I$a$))",
   "set_direct_optab_handler (atomic_exchange_optab, $A, CODE_FOR_$(atomic_exchange$I$a$))",
   "set_direct_optab_handler (atomic_compare_and_swap_optab, $A, CODE_FOR_$(atomic_compare_and_swap$I$a$))",
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog
index 715d1f5..27a44f1 100644
--- a/gcc/java/ChangeLog
+++ b/gcc/java/ChangeLog
@@ -1,3 +1,10 @@ 
+2011-11-07  Richard Henderson  <rth@redhat.com>
+
+	* builtins.c (compareAndSwapInt_builtin): Use can_compare_and_swap_p.
+	(compareAndSwapLong_builtin): Likewise.
+	(compareAndSwapObject_builtin): Likewise.
+	(VMSupportsCS8_builtin): Likewise.
+
 2011-11-02  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	* Make-lang.in (jvspec.o): Pass SHLIB instead of SHLIB_LINK.
diff --git a/gcc/java/builtins.c b/gcc/java/builtins.c
index 5ab345d..66addcb 100644
--- a/gcc/java/builtins.c
+++ b/gcc/java/builtins.c
@@ -319,9 +319,7 @@  compareAndSwapInt_builtin (tree method_return_type ATTRIBUTE_UNUSED,
 			   tree orig_call)
 {
   enum machine_mode mode = TYPE_MODE (int_type_node);
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing
-      || flag_use_atomic_builtins)
+  if (can_compare_and_swap_p (mode, flag_use_atomic_builtins))
     {
       tree addr, stmt;
       enum built_in_function fncode = BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4;
@@ -342,13 +340,12 @@  compareAndSwapLong_builtin (tree method_return_type ATTRIBUTE_UNUSED,
 			    tree orig_call)
 {
   enum machine_mode mode = TYPE_MODE (long_type_node);
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing
-      || (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode)
-	  && flag_use_atomic_builtins))
-    /* We don't trust flag_use_atomic_builtins for multi-word
-       compareAndSwap.  Some machines such as ARM have atomic libfuncs
-       but not the multi-word versions.  */
+  /* We don't trust flag_use_atomic_builtins for multi-word compareAndSwap.
+     Some machines such as ARM have atomic libfuncs but not the multi-word
+     versions.  */
+  if (can_compare_and_swap_p (mode,
+			      (flag_use_atomic_builtins
+			       && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)))
     {
       tree addr, stmt;
       enum built_in_function fncode = BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8;
@@ -368,9 +365,7 @@  compareAndSwapObject_builtin (tree method_return_type ATTRIBUTE_UNUSED,
 			      tree orig_call)
 {
   enum machine_mode mode = TYPE_MODE (ptr_type_node);
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing
-      || flag_use_atomic_builtins)
+  if (can_compare_and_swap_p (mode, flag_use_atomic_builtins))
   {
     tree addr, stmt;
     enum built_in_function builtin;
@@ -448,8 +443,7 @@  VMSupportsCS8_builtin (tree method_return_type,
 {
   enum machine_mode mode = TYPE_MODE (long_type_node);
   gcc_assert (method_return_type == boolean_type_node);
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing)
+  if (can_compare_and_swap_p (mode, false))
     return boolean_true_node;
   else
     return boolean_false_node;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index d8e7ce3..8145957 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5097,7 +5097,7 @@  expand_omp_atomic_fetch_op (basic_block load_bb,
      matter is that (with the exception of i486 vs i586 and xadd) all targets
      that support any atomic operaton optab also implements compare-and-swap.
      Let optabs.c take care of expanding any compare-and-swap loop.  */
-  if (!can_compare_and_swap_p (imode))
+  if (!can_compare_and_swap_p (imode, true))
     return false;
 
   gsi = gsi_last_bb (load_bb);
@@ -5168,7 +5168,7 @@  expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
   type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
 
-  if (!can_compare_and_swap_p (TYPE_MODE (itype)))
+  if (!can_compare_and_swap_p (TYPE_MODE (itype), true))
     return false;
 
   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
diff --git a/gcc/optabs.c b/gcc/optabs.c
index a466e56..b2388ec 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6586,6 +6586,57 @@  init_optabs (void)
   targetm.init_libfuncs ();
 }
 
+/* A helper function for init_sync_libfuncs.  Using the basename BASE,
+   install libfuncs into TAB for BASE_N for 1 <= N <= MAX.  */
+
+static void
+init_sync_libfuncs_1 (optab tab, const char *base, int max)
+{
+  enum machine_mode mode;
+  char buf[64];
+  size_t len = strlen (base);
+  int i;
+
+  gcc_assert (max <= 8);
+  gcc_assert (len + 3 < sizeof (buf));
+
+  memcpy (buf, base, len);
+  buf[len] = '_';
+  buf[len + 1] = '0';
+  buf[len + 2] = '\0';
+
+  mode = QImode;
+  for (i = 1; i < max; i *= 2)
+    {
+      buf[len + 1] = '0' + i;
+      set_optab_libfunc (tab, mode, buf);
+      mode = GET_MODE_2XWIDER_MODE (mode);
+    }
+}
+
+void
+init_sync_libfuncs (int max)
+{
+  init_sync_libfuncs_1 (sync_compare_and_swap_optab,
+			"__sync_val_compare_and_swap", max);
+  init_sync_libfuncs_1 (sync_lock_test_and_set_optab,
+			"__sync_lock_test_and_set", max);
+
+  init_sync_libfuncs_1 (sync_old_add_optab, "__sync_fetch_and_add", max);
+  init_sync_libfuncs_1 (sync_old_sub_optab, "__sync_fetch_and_sub", max);
+  init_sync_libfuncs_1 (sync_old_ior_optab, "__sync_fetch_and_ior", max);
+  init_sync_libfuncs_1 (sync_old_and_optab, "__sync_fetch_and_and", max);
+  init_sync_libfuncs_1 (sync_old_xor_optab, "__sync_fetch_and_xor", max);
+  init_sync_libfuncs_1 (sync_old_nand_optab, "__sync_fetch_and_nand", max);
+
+  init_sync_libfuncs_1 (sync_new_add_optab, "__sync_add_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_sub_optab, "__sync_sub_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_ior_optab, "__sync_ior_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_and_optab, "__sync_and_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_xor_optab, "__sync_xor_and_fetch", max);
+  init_sync_libfuncs_1 (sync_new_nand_optab, "__sync_nand_and_fetch", max);
+}
+
 /* Print information about the current contents of the optabs on
    STDERR.  */
 
@@ -7165,19 +7216,21 @@  expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
 /* Return true if there is a compare_and_swap pattern.  */
 
 bool
-can_compare_and_swap_p (enum machine_mode mode)
+can_compare_and_swap_p (enum machine_mode mode, bool allow_libcall)
 {
   enum insn_code icode;
 
-  /* Check for __sync_compare_and_swap.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
-  if (icode != CODE_FOR_nothing)
-      return true;
-
   /* Check for __atomic_compare_and_swap.  */
   icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
   if (icode != CODE_FOR_nothing)
-      return true;
+    return true;
+
+  /* Check for __sync_compare_and_swap.  */
+  icode = optab_handler (sync_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+    return true;
+  if (allow_libcall && optab_libfunc (sync_compare_and_swap_optab, mode))
+    return true;
 
   /* No inline compare and swap.  */
   return false;
@@ -7266,7 +7319,6 @@  expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model,
 {
   enum machine_mode mode = GET_MODE (mem);
   enum insn_code icode;
-  rtx last_insn;
 
   /* If the target supports the exchange directly, great.  */
   icode = direct_optab_handler (atomic_exchange_optab, mode);
@@ -7296,16 +7348,17 @@  expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model,
 
   if (use_test_and_set)
     {
-      icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
-      last_insn = get_last_insn ();
-      if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || 
-					  model == MEMMODEL_RELEASE ||
-					  model == MEMMODEL_ACQ_REL))
-	expand_builtin_mem_thread_fence (model);
+      icode = optab_handler (sync_lock_test_and_set_optab, mode);
 
       if (icode != CODE_FOR_nothing)
 	{
 	  struct expand_operand ops[3];
+	  rtx last_insn = get_last_insn ();
+
+	  if (model == MEMMODEL_SEQ_CST
+	      || model == MEMMODEL_RELEASE
+	      || model == MEMMODEL_ACQ_REL)
+	    expand_builtin_mem_thread_fence (model);
 
 	  create_output_operand (&ops[0], target, mode);
 	  create_fixed_operand (&ops[1], mem);
@@ -7313,16 +7366,35 @@  expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model,
 	  create_convert_operand_to (&ops[2], val, mode, true);
 	  if (maybe_expand_insn (icode, 3, ops))
 	    return ops[0].value;
+
+	  delete_insns_since (last_insn);
 	}
 
-      /* Remove any fence that was inserted since a compare and swap loop is
-	 already a full memory barrier.  */
-      if (last_insn != get_last_insn ())
-	delete_insns_since (last_insn);
+      /* If an external test-and-set libcall is provided, use that instead of
+	 any external compare-and-swap that we might get from the compare-and-
+	 swap-loop expansion below.  */
+      if (!can_compare_and_swap_p (mode, false))
+	{
+	  rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, mode);
+	  if (libfunc != NULL)
+	    {
+	      rtx addr;
+
+	      if (model == MEMMODEL_SEQ_CST
+		  || model == MEMMODEL_RELEASE
+		  || model == MEMMODEL_ACQ_REL)
+		expand_builtin_mem_thread_fence (model);
+
+	      addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
+	      return emit_library_call_value (libfunc, target, LCT_NORMAL,
+					      mode, 2, addr, ptr_mode,
+					      val, mode);
+	    }
+	}
     }
 
   /* Otherwise, use a compare-and-swap loop for the exchange.  */
-  if (can_compare_and_swap_p (mode))
+  if (can_compare_and_swap_p (mode, true))
     {
       if (!target || !register_operand (target, mode))
 	target = gen_reg_rtx (mode);
@@ -7356,7 +7428,8 @@  expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
   enum machine_mode mode = GET_MODE (mem);
   struct expand_operand ops[8];
   enum insn_code icode;
-  rtx target_bool, target_oval;
+  rtx target_oval, target_bool = NULL_RTX;
+  rtx libfunc;
 
   /* Load expected into a register for the compare and swap.  */
   if (MEM_P (expected))
@@ -7400,7 +7473,7 @@  expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
 
   /* Otherwise fall back to the original __sync_val_compare_and_swap
      which is always seq-cst.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
+  icode = optab_handler (sync_compare_and_swap_optab, mode);
   if (icode != CODE_FOR_nothing)
     {
       rtx cc_reg;
@@ -7413,7 +7486,6 @@  expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
 	return false;
 
       target_oval = ops[0].value;
-      target_bool = NULL_RTX;
 
       /* If the caller isn't interested in the boolean return value,
 	 skip the computation of it.  */
@@ -7424,17 +7496,37 @@  expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
       cc_reg = NULL_RTX;
       if (have_insn_for (COMPARE, CCmode))
 	note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
+      if (cc_reg)
+	{
+	  target_bool = emit_store_flag_force (target_bool, EQ, cc_reg,
+					       const0_rtx, VOIDmode, 0, 1);
+	  goto success;
+	}
+      goto success_bool_from_val;
+    }
 
-      target_bool
-	= (cc_reg
-	   ? emit_store_flag_force (target_bool, EQ, cc_reg,
-				    const0_rtx, VOIDmode, 0, 1)
-	   : emit_store_flag_force (target_bool, EQ, target_oval,
-				    expected, VOIDmode, 1, 1));
-      goto success;
+  /* Also check for library support for __sync_val_compare_and_swap.  */
+  libfunc = optab_libfunc (sync_compare_and_swap_optab, mode);
+  if (libfunc != NULL)
+    {
+      rtx addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
+      target_oval = emit_library_call_value (libfunc, target_oval, LCT_NORMAL,
+					     mode, 3, addr, ptr_mode,
+					     expected, mode, desired, mode);
+
+      /* Compute the boolean return value only if requested.  */
+      if (ptarget_bool)
+	goto success_bool_from_val;
+      else
+	goto success;
     }
+
+  /* Failure.  */
   return false;
 
+ success_bool_from_val:
+   target_bool = emit_store_flag_force (target_bool, EQ, target_oval,
+					expected, VOIDmode, 1, 1);
  success:
   /* Make sure that the oval output winds up where the caller asked.  */
   if (ptarget_oval)
@@ -7570,12 +7662,12 @@  expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
 
 struct atomic_op_functions
 {
-  struct direct_optab_d *mem_fetch_before;
-  struct direct_optab_d *mem_fetch_after;
-  struct direct_optab_d *mem_no_result;
-  struct direct_optab_d *fetch_before;
-  struct direct_optab_d *fetch_after;
-  struct direct_optab_d *no_result;
+  direct_optab mem_fetch_before;
+  direct_optab mem_fetch_after;
+  direct_optab mem_no_result;
+  optab fetch_before;
+  optab fetch_after;
+  direct_optab no_result;
   enum rtx_code reverse_code;
 };
 
@@ -7666,7 +7758,6 @@  maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
 	       rtx val, bool use_memmodel, enum memmodel model, bool after)
 {
   enum machine_mode mode = GET_MODE (mem);
-  struct direct_optab_d *this_optab;
   struct expand_operand ops[4];
   enum insn_code icode;
   int op_counter = 0;
@@ -7677,13 +7768,13 @@  maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
     {
       if (use_memmodel)
         {
-	  this_optab = optab->mem_no_result;
+	  icode = direct_optab_handler (optab->mem_no_result, mode);
 	  create_integer_operand (&ops[2], model);
 	  num_ops = 3;
 	}
       else
         {
-	  this_optab = optab->no_result;
+	  icode = direct_optab_handler (optab->no_result, mode);
 	  num_ops = 2;
 	}
     }
@@ -7692,19 +7783,19 @@  maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
     {
       if (use_memmodel)
         {
-	  this_optab = after ? optab->mem_fetch_after : optab->mem_fetch_before;
+	  icode = direct_optab_handler (after ? optab->mem_fetch_after
+					: optab->mem_fetch_before, mode);
 	  create_integer_operand (&ops[3], model);
-	  num_ops= 4;
+	  num_ops = 4;
 	}
       else
 	{
-	  this_optab = after ? optab->fetch_after : optab->fetch_before;
+	  icode = optab_handler (after ? optab->fetch_after
+				 : optab->fetch_before, mode);
 	  num_ops = 3;
 	}
       create_output_operand (&ops[op_counter++], target, mode);
     }
-
-  icode = direct_optab_handler (this_optab, mode);
   if (icode == CODE_FOR_nothing)
     return NULL_RTX;
 
@@ -7713,7 +7804,7 @@  maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
   create_convert_operand_to (&ops[op_counter++], val, mode, true);
 
   if (maybe_expand_insn (icode, num_ops, ops))
-    return ((target == const0_rtx) ? const0_rtx : ops[0].value);
+    return (target == const0_rtx ? const0_rtx : ops[0].value);
 
   return NULL_RTX;
 } 
@@ -7767,7 +7858,7 @@  expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
 
   /* If the fetch value can be calculated from the other variation of fetch,
      try that operation.  */
-  if (after || optab.reverse_code != UNKNOWN || target == const0_rtx) 
+  if (after || unused_result || optab.reverse_code != UNKNOWN)
     {
       /* Try the __atomic version, then the older __sync version.  */
       result = maybe_emit_op (&optab, target, mem, val, true, model, !after);
@@ -7784,14 +7875,46 @@  expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
 	     Fetch_before == after REVERSE_OP val.  */
 	  if (!after)
 	    code = optab.reverse_code;
-	  result = expand_simple_binop (mode, code, result, val, NULL_RTX, true,
+	  result = expand_simple_binop (mode, code, result, val, target, true,
 					OPTAB_LIB_WIDEN);
 	  return result;
 	}
     }
 
+  /* Try the __sync libcalls only if we can't do compare-and-swap inline.  */
+  if (!can_compare_and_swap_p (mode, false))
+    {
+      rtx libfunc;
+      bool fixup = false;
+
+      libfunc = optab_libfunc (after ? optab.fetch_after
+			       : optab.fetch_before, mode);
+      if (libfunc == NULL
+	  && (after || unused_result || optab.reverse_code != UNKNOWN))
+	{
+	  fixup = true;
+	  if (!after)
+	    code = optab.reverse_code;
+	  libfunc = optab_libfunc (after ? optab.fetch_before
+				   : optab.fetch_after, mode);
+	}
+      if (libfunc != NULL)
+	{
+	  rtx addr = convert_memory_address (ptr_mode, XEXP (mem, 0));
+	  result = emit_library_call_value (libfunc, NULL, LCT_NORMAL, mode,
+					    2, addr, ptr_mode, val, mode);
+
+	  if (unused_result)
+	    return target;
+	  if (fixup)
+	    result = expand_simple_binop (mode, code, result, val, target,
+					  true, OPTAB_LIB_WIDEN);
+	  return result;
+	}
+    }
+
   /* If nothing else has succeeded, default to a compare and swap loop.  */
-  if (can_compare_and_swap_p (mode))
+  if (can_compare_and_swap_p (mode, true))
     {
       rtx insn;
       rtx t0 = gen_reg_rtx (mode), t1;
diff --git a/gcc/optabs.h b/gcc/optabs.h
index d70b3fa..bc705dc 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -386,6 +386,30 @@  enum optab_index
   /* Perform a raise to the power of integer.  */
   OTI_powi,
 
+  /* Atomic compare and swap.  */
+  OTI_sync_compare_and_swap,
+
+  /* Atomic exchange with acquire semantics.  */
+  OTI_sync_lock_test_and_set,
+
+  /* This second set is atomic operations in which we return the value
+     that existed in memory before the operation.  */
+  OTI_sync_old_add,
+  OTI_sync_old_sub,
+  OTI_sync_old_ior,
+  OTI_sync_old_and,
+  OTI_sync_old_xor,
+  OTI_sync_old_nand,
+
+  /* This third set is atomic operations in which we return the value
+     that resulted after performing the operation.  */
+  OTI_sync_new_add,
+  OTI_sync_new_sub,
+  OTI_sync_new_ior,
+  OTI_sync_new_and,
+  OTI_sync_new_xor,
+  OTI_sync_new_nand,
+
   OTI_MAX
 };
 
@@ -570,6 +594,23 @@  enum optab_index
 
 #define powi_optab (&optab_table[OTI_powi])
 
+#define sync_compare_and_swap_optab \
+  (&optab_table[(int) OTI_sync_compare_and_swap])
+#define sync_lock_test_and_set_optab \
+  (&optab_table[(int) OTI_sync_lock_test_and_set])
+#define sync_old_add_optab (&optab_table[(int) OTI_sync_old_add])
+#define sync_old_sub_optab (&optab_table[(int) OTI_sync_old_sub])
+#define sync_old_ior_optab (&optab_table[(int) OTI_sync_old_ior])
+#define sync_old_and_optab (&optab_table[(int) OTI_sync_old_and])
+#define sync_old_xor_optab (&optab_table[(int) OTI_sync_old_xor])
+#define sync_old_nand_optab (&optab_table[(int) OTI_sync_old_nand])
+#define sync_new_add_optab (&optab_table[(int) OTI_sync_new_add])
+#define sync_new_sub_optab (&optab_table[(int) OTI_sync_new_sub])
+#define sync_new_ior_optab (&optab_table[(int) OTI_sync_new_ior])
+#define sync_new_and_optab (&optab_table[(int) OTI_sync_new_and])
+#define sync_new_xor_optab (&optab_table[(int) OTI_sync_new_xor])
+#define sync_new_nand_optab (&optab_table[(int) OTI_sync_new_nand])
+
 /* Conversion optabs have their own table and indexes.  */
 enum convert_optab_index
 {
@@ -659,8 +700,10 @@  enum direct_optab_index
   DOI_cmpstrn,
   DOI_cmpmem,
 
-  /* Synchronization primitives.  This first set is atomic operation for
-     which we don't care about the resulting value.  */
+  /* Atomic clear with release semantics.  */
+  DOI_sync_lock_release,
+
+  /* Atomic operation with no resulting value.  */
   DOI_sync_add,
   DOI_sync_sub,
   DOI_sync_ior,
@@ -668,33 +711,6 @@  enum direct_optab_index
   DOI_sync_xor,
   DOI_sync_nand,
 
-  /* This second set is atomic operations in which we return the value
-     that existed in memory before the operation.  */
-  DOI_sync_old_add,
-  DOI_sync_old_sub,
-  DOI_sync_old_ior,
-  DOI_sync_old_and,
-  DOI_sync_old_xor,
-  DOI_sync_old_nand,
-
-  /* This third set is atomic operations in which we return the value
-     that resulted after performing the operation.  */
-  DOI_sync_new_add,
-  DOI_sync_new_sub,
-  DOI_sync_new_ior,
-  DOI_sync_new_and,
-  DOI_sync_new_xor,
-  DOI_sync_new_nand,
-
-  /* Atomic compare and swap.  */
-  DOI_sync_compare_and_swap,
-
-  /* Atomic exchange with acquire semantics.  */
-  DOI_sync_lock_test_and_set,
-
-  /* Atomic clear with release semantics.  */
-  DOI_sync_lock_release,
-
   /* Atomic operations with memory model parameters. */
   DOI_atomic_exchange,
   DOI_atomic_compare_and_swap,
@@ -748,30 +764,14 @@  typedef struct direct_optab_d *direct_optab;
 #define cmpstr_optab (&direct_optab_table[(int) DOI_cmpstr])
 #define cmpstrn_optab (&direct_optab_table[(int) DOI_cmpstrn])
 #define cmpmem_optab (&direct_optab_table[(int) DOI_cmpmem])
+#define sync_lock_release_optab \
+  (&direct_optab_table[(int) DOI_sync_lock_release])
 #define sync_add_optab (&direct_optab_table[(int) DOI_sync_add])
 #define sync_sub_optab (&direct_optab_table[(int) DOI_sync_sub])
 #define sync_ior_optab (&direct_optab_table[(int) DOI_sync_ior])
 #define sync_and_optab (&direct_optab_table[(int) DOI_sync_and])
 #define sync_xor_optab (&direct_optab_table[(int) DOI_sync_xor])
 #define sync_nand_optab (&direct_optab_table[(int) DOI_sync_nand])
-#define sync_old_add_optab (&direct_optab_table[(int) DOI_sync_old_add])
-#define sync_old_sub_optab (&direct_optab_table[(int) DOI_sync_old_sub])
-#define sync_old_ior_optab (&direct_optab_table[(int) DOI_sync_old_ior])
-#define sync_old_and_optab (&direct_optab_table[(int) DOI_sync_old_and])
-#define sync_old_xor_optab (&direct_optab_table[(int) DOI_sync_old_xor])
-#define sync_old_nand_optab (&direct_optab_table[(int) DOI_sync_old_nand])
-#define sync_new_add_optab (&direct_optab_table[(int) DOI_sync_new_add])
-#define sync_new_sub_optab (&direct_optab_table[(int) DOI_sync_new_sub])
-#define sync_new_ior_optab (&direct_optab_table[(int) DOI_sync_new_ior])
-#define sync_new_and_optab (&direct_optab_table[(int) DOI_sync_new_and])
-#define sync_new_xor_optab (&direct_optab_table[(int) DOI_sync_new_xor])
-#define sync_new_nand_optab (&direct_optab_table[(int) DOI_sync_new_nand])
-#define sync_compare_and_swap_optab \
-  (&direct_optab_table[(int) DOI_sync_compare_and_swap])
-#define sync_lock_test_and_set_optab \
-  (&direct_optab_table[(int) DOI_sync_lock_test_and_set])
-#define sync_lock_release_optab \
-  (&direct_optab_table[(int) DOI_sync_lock_release])
 
 #define atomic_exchange_optab \
   (&direct_optab_table[(int) DOI_atomic_exchange])
@@ -956,6 +956,9 @@  extern void set_optab_libfunc (optab, enum machine_mode, const char *);
 extern void set_conv_libfunc (convert_optab, enum machine_mode,
 			      enum machine_mode, const char *);
 
+/* Call this to install all of the __sync libcalls up to size MAX.  */
+extern void init_sync_libfuncs (int max);
+
 /* Generate code for a FIXED_CONVERT_EXPR.  */
 extern void expand_fixed_convert (rtx, rtx, int, int);
 
@@ -966,7 +969,7 @@  extern void expand_float (rtx, rtx, int);
 enum insn_code can_float_p (enum machine_mode, enum machine_mode, int);
 
 /* Return true if there is an inline compare and swap pattern.  */
-extern bool can_compare_and_swap_p (enum machine_mode);
+extern bool can_compare_and_swap_p (enum machine_mode, bool);
 
 /* Generate code for a compare and swap.  */
 extern bool expand_atomic_compare_and_swap (rtx *, rtx *, rtx, rtx, rtx, bool,