Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 173831)
+++ doc/extend.texi	(working copy)
@@ -6719,6 +6719,22 @@ speculated to) before the builtin, but p
 be globally visible yet, and previous memory loads may not yet be
 satisfied.
 
+@item @var{type} __sync_swap_full (@var{type} *ptr, @var{type} value, ...)
+@itemx @var{type} __sync_swap_acq (@var{type} *ptr, @var{type} value, ...)
+@itemx @var{type} __sync_swap_rel (@var{type} *ptr, @var{type} value, ...)
+@findex __sync_swap_full
+@findex __sync_swap_acq
+@findex __sync_swap_rel
+These builtins implement an atomic exchange operation.  They write
+@var{value} into @code{*@var{ptr}}, and return the previous contents
+of @code{*@var{ptr}}.  The different variants provide a full barrier,
+acquire barrier, or a release barrier respectively depending on the
+suffix.
+
+If the acquire or release variants of these operations are not
+available on the given target, the compiler will fall back to a full
+barrier.
+
 @item void __sync_lock_release (@var{type} *ptr, ...)
 @findex __sync_lock_release
 This builtin releases the lock acquired by @code{__sync_lock_test_and_set}.
Index: c-family/c-common.c
===================================================================
--- c-family/c-common.c	(revision 173831)
+++ c-family/c-common.c	(working copy)
@@ -9035,6 +9035,9 @@ resolve_overloaded_builtin (location_t l
     case BUILT_IN_VAL_COMPARE_AND_SWAP_N:
     case BUILT_IN_LOCK_TEST_AND_SET_N:
     case BUILT_IN_LOCK_RELEASE_N:
+    case BUILT_IN_SWAP_FULL_N:
+    case BUILT_IN_SWAP_ACQ_N:
+    case BUILT_IN_SWAP_REL_N:
       {
 	int n = sync_resolve_size (function, params);
 	tree new_function, first_param, result;
Index: optabs.c
===================================================================
--- optabs.c	(revision 173831)
+++ optabs.c	(working copy)
@@ -6988,6 +6988,70 @@ expand_sync_lock_test_and_set (rtx mem, 
 
   return NULL_RTX;
 }
+
+/* This function expands an atomic exchange operation: atomically store
+   VAL in MEM and return the previous value in MEM.
+
+   TARGET is an option place to stick the return value.
+   MBMODE is the memory barrier type to use for the operation.  */
+
+rtx
+expand_sync_swap (rtx mem, rtx val, rtx target, enum membar_mode mbmode)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  enum insn_code icode;
+  direct_optab op;
+
+  switch (mbmode)
+    {
+    case MEMBAR_MODE_ACQUIRE:
+      op = sync_swap_acq_optab;
+      break;
+    case MEMBAR_MODE_RELEASE:
+      op = sync_swap_rel_optab;
+      break;
+    case MEMBAR_MODE_FULL:
+      op = sync_swap_full_optab;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  /* If no variant is found, try the full barrier.  */
+  if (direct_optab_handler (op, mode) == CODE_FOR_nothing)
+    op = sync_swap_full_optab;
+
+  /* If the target supports the swap directly, great.  */
+  icode = direct_optab_handler (op, mode);
+  if (icode != CODE_FOR_nothing)
+    {
+      struct expand_operand ops[3];
+
+      create_output_operand (&ops[0], target, mode);
+      create_fixed_operand (&ops[1], mem);
+      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
+      create_convert_operand_to (&ops[2], val, mode, true);
+      if (maybe_expand_insn (icode, 3, ops))
+	return ops[0].value;
+    }
+
+  /* Otherwise, use a compare-and-swap loop for the exchange.  */
+  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
+      != CODE_FOR_nothing)
+    {
+      if (!target || !register_operand (target, mode))
+	target = gen_reg_rtx (mode);
+      if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode)
+	val = convert_modes (mode, GET_MODE (val), val, 1);
+      if (expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
+	{
+	  /* Issue a full barrier.  */
+	  expand_builtin_synchronize ();
+	  return target;
+	}
+    }
+
+  return NULL_RTX;
+}
 
 /* Return true if OPERAND is suitable for operand number OPNO of
    instruction ICODE.  */
Index: optabs.h
===================================================================
--- optabs.h	(revision 173831)
+++ optabs.h	(working copy)
@@ -669,9 +669,19 @@ enum direct_optab_index
   /* Atomic compare and swap.  */
   DOI_sync_compare_and_swap,
 
-  /* Atomic exchange with acquire semantics.  */
+  /* Atomic exchange with acquire semantics.  Exchange not fully
+     guaranteed.  Some targets may only support a store of 1.  */
   DOI_sync_lock_test_and_set,
 
+  /* Atomic exchange with acquire semantics.  */
+  DOI_sync_swap_acq,
+
+  /* Atomic exchange with release semantics.  */
+  DOI_sync_swap_rel,
+
+  /* Atomic exchange with full barrier semantics.  */
+  DOI_sync_swap_full,
+
   /* Atomic clear with release semantics.  */
   DOI_sync_lock_release,
 
@@ -720,6 +730,12 @@ typedef struct direct_optab_d *direct_op
   (&direct_optab_table[(int) DOI_sync_compare_and_swap])
 #define sync_lock_test_and_set_optab \
   (&direct_optab_table[(int) DOI_sync_lock_test_and_set])
+#define sync_swap_acq_optab \
+  (&direct_optab_table[(int) DOI_sync_swap_acq])
+#define sync_swap_rel_optab \
+  (&direct_optab_table[(int) DOI_sync_swap_rel])
+#define sync_swap_full_optab \
+  (&direct_optab_table[(int) DOI_sync_swap_full])
 #define sync_lock_release_optab \
   (&direct_optab_table[(int) DOI_sync_lock_release])
 
Index: genopinit.c
===================================================================
--- genopinit.c	(revision 173831)
+++ genopinit.c	(working copy)
@@ -239,6 +239,9 @@ static const char * const optabs[] =
   "set_direct_optab_handler (sync_new_nand_optab, $A, CODE_FOR_$(sync_new_nand$I$a$))",
   "set_direct_optab_handler (sync_compare_and_swap_optab, $A, CODE_FOR_$(sync_compare_and_swap$I$a$))",
   "set_direct_optab_handler (sync_lock_test_and_set_optab, $A, CODE_FOR_$(sync_lock_test_and_set$I$a$))",
+  "set_direct_optab_handler (sync_swap_acq_optab, $A, CODE_FOR_$(sync_swap_acq$I$a$))",
+  "set_direct_optab_handler (sync_swap_rel_optab, $A, CODE_FOR_$(sync_swap_rel$I$a$))",
+  "set_direct_optab_handler (sync_swap_full_optab, $A, CODE_FOR_$(sync_swap_full$I$a$))",
   "set_direct_optab_handler (sync_lock_release_optab, $A, CODE_FOR_$(sync_lock_release$I$a$))",
   "set_optab_handler (vec_set_optab, $A, CODE_FOR_$(vec_set$a$))",
   "set_optab_handler (vec_extract_optab, $A, CODE_FOR_$(vec_extract$a$))",
Index: builtins.c
===================================================================
--- builtins.c	(revision 173831)
+++ builtins.c	(working copy)
@@ -5682,9 +5682,35 @@ expand_builtin_lock_test_and_set (enum m
   return expand_sync_lock_test_and_set (mem, val, target);
 }
 
+/* Expand the __sync_swap_* intrinsics.
+
+   EXP is the CALL_EXPR.
+   TARGET is an optional place for us to store the results.
+   MBMODE is the memory barrier mode to use.  */
+
+static rtx
+expand_builtin_swap (enum machine_mode mode, tree exp, rtx target,
+		     enum membar_mode mbmode)
+{
+  rtx val, mem;
+  enum machine_mode old_mode;
+
+  /* Expand the operands.  */
+  mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
+  val = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, mode, EXPAND_NORMAL);
+  /* If VAL is promoted to a wider mode, convert it back to MODE.  Take care
+     of CONST_INTs, where we know the old_mode only from the call argument.  */
+  old_mode = GET_MODE (val);
+  if (old_mode == VOIDmode)
+    old_mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp, 1)));
+  val = convert_modes (mode, old_mode, val, 1);
+
+  return expand_sync_swap (mem, val, target, mbmode);
+}
+
 /* Expand the __sync_synchronize intrinsic.  */
 
-static void
+void
 expand_builtin_synchronize (void)
 {
   gimple x;
@@ -6495,6 +6521,39 @@ expand_builtin (tree exp, rtx target, rt
 	return target;
       break;
 
+    case BUILT_IN_SWAP_ACQ_1:
+    case BUILT_IN_SWAP_ACQ_2:
+    case BUILT_IN_SWAP_ACQ_4:
+    case BUILT_IN_SWAP_ACQ_8:
+    case BUILT_IN_SWAP_ACQ_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_SWAP_ACQ_1);
+      target = expand_builtin_swap (mode, exp, target, MEMBAR_MODE_ACQUIRE);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_SWAP_REL_1:
+    case BUILT_IN_SWAP_REL_2:
+    case BUILT_IN_SWAP_REL_4:
+    case BUILT_IN_SWAP_REL_8:
+    case BUILT_IN_SWAP_REL_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_SWAP_REL_1);
+      target = expand_builtin_swap (mode, exp, target, MEMBAR_MODE_RELEASE);
+      if (target)
+	return target;
+      break;
+
+    case BUILT_IN_SWAP_FULL_1:
+    case BUILT_IN_SWAP_FULL_2:
+    case BUILT_IN_SWAP_FULL_4:
+    case BUILT_IN_SWAP_FULL_8:
+    case BUILT_IN_SWAP_FULL_16:
+      mode = get_builtin_sync_mode (fcode - BUILT_IN_SWAP_FULL_1);
+      target = expand_builtin_swap (mode, exp, target, MEMBAR_MODE_FULL);
+      if (target)
+	return target;
+      break;
+
     case BUILT_IN_LOCK_TEST_AND_SET_1:
     case BUILT_IN_LOCK_TEST_AND_SET_2:
     case BUILT_IN_LOCK_TEST_AND_SET_4:
Index: sync-builtins.def
===================================================================
--- sync-builtins.def	(revision 173831)
+++ sync-builtins.def	(working copy)
@@ -235,6 +235,63 @@ DEF_SYNC_BUILTIN (BUILT_IN_LOCK_TEST_AND
 DEF_SYNC_BUILTIN (BUILT_IN_LOCK_TEST_AND_SET_16, "__sync_lock_test_and_set_16",
 		  BT_FN_I16_VPTR_I16, ATTR_NOTHROW_LEAF_LIST)
 
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_ACQ_N,
+		  "__sync_swap_acq",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_ACQ_1,
+		  "__sync_swap_acq_1",
+		  BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_ACQ_2,
+		  "__sync_swap_acq_2",
+		  BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_ACQ_4,
+		  "__sync_swap_acq_4",
+		  BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_ACQ_8,
+		  "__sync_swap_acq_8",
+		  BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_ACQ_16,
+		  "__sync_swap_acq_16",
+		  BT_FN_I16_VPTR_I16, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_REL_N,
+		  "__sync_swap_rel",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_REL_1,
+		  "__sync_swap_rel_1",
+		  BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_REL_2,
+		  "__sync_swap_rel_2",
+		  BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_REL_4,
+		  "__sync_swap_rel_4",
+		  BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_REL_8,
+		  "__sync_swap_rel_8",
+		  BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_REL_16,
+		  "__sync_swap_rel_16",
+		  BT_FN_I16_VPTR_I16, ATTR_NOTHROW_LEAF_LIST)
+
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_FULL_N,
+		  "__sync_swap_full",
+		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_FULL_1,
+		  "__sync_swap_full_1",
+		  BT_FN_I1_VPTR_I1, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_FULL_2,
+		  "__sync_swap_full_2",
+		  BT_FN_I2_VPTR_I2, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_FULL_4,
+		  "__sync_swap_full_4",
+		  BT_FN_I4_VPTR_I4, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_FULL_8,
+		  "__sync_swap_full_8",
+		  BT_FN_I8_VPTR_I8, ATTR_NOTHROW_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SWAP_FULL_16,
+		  "__sync_swap_full_16",
+		  BT_FN_I16_VPTR_I16, ATTR_NOTHROW_LEAF_LIST)
+
 DEF_SYNC_BUILTIN (BUILT_IN_LOCK_RELEASE_N, "__sync_lock_release",
 		  BT_FN_VOID_VAR, ATTR_NOTHROW_LEAF_LIST)
 DEF_SYNC_BUILTIN (BUILT_IN_LOCK_RELEASE_1, "__sync_lock_release_1",
Index: expr.h
===================================================================
--- expr.h	(revision 173831)
+++ expr.h	(working copy)
@@ -161,6 +161,14 @@ enum optab_methods
   OPTAB_MUST_WIDEN
 };
 
+/* Memory barrier type.  */
+enum membar_mode
+{
+  MEMBAR_MODE_RELEASE,
+  MEMBAR_MODE_ACQUIRE,
+  MEMBAR_MODE_FULL
+};
+
 /* Generate code for a simple binary or unary operation.  "Simple" in
    this case means "can be unambiguously described by a (mode, code)
    pair and mapped to a single optab."  */
@@ -217,6 +225,7 @@ rtx expand_bool_compare_and_swap (rtx, r
 rtx expand_sync_operation (rtx, rtx, enum rtx_code);
 rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx);
 rtx expand_sync_lock_test_and_set (rtx, rtx, rtx);
+rtx expand_sync_swap (rtx, rtx, rtx, enum membar_mode);
 
 /* Functions from expmed.c:  */
 
@@ -248,6 +257,7 @@ extern void expand_builtin_setjmp_receiv
 extern rtx expand_builtin_saveregs (void);
 extern void expand_builtin_trap (void);
 extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
+extern void expand_builtin_synchronize (void);
 
 /* Functions from expr.c:  */
 
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 173831)
+++ config/i386/i386.md	(working copy)
@@ -250,6 +250,7 @@
   UNSPECV_MWAIT
   UNSPECV_CMPXCHG
   UNSPECV_XCHG
+  UNSPECV_SWAP_FULL
   UNSPECV_LOCK
   UNSPECV_PROLOGUE_USE
   UNSPECV_CLD
Index: config/i386/sync.md
===================================================================
--- config/i386/sync.md	(revision 173831)
+++ config/i386/sync.md	(working copy)
@@ -232,6 +232,15 @@
   return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
+(define_insn "sync_swap_full<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=<r>")
+	(unspec_volatile:SWI
+	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_SWAP_FULL))
+   (set (match_dup 1)
+	(match_operand:SWI 2 "register_operand" "0"))]
+  ""
+  "xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
+
 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
 (define_insn "sync_lock_test_and_set<mode>"
   [(set (match_operand:SWI 0 "register_operand" "=<r>")
Index: libgcc-std.ver
===================================================================
--- libgcc-std.ver	(revision 173831)
+++ libgcc-std.ver	(working copy)
@@ -1919,3 +1919,26 @@ GCC_4.6.0 {
   __morestack_initial_sp
   __splitstack_find
 }
+
+%inherit GCC_4.7.0 GCC_4.6.0
+GCC_4.7.0 {
+  __sync_swap_acq_1
+  __sync_swap_rel_1
+  __sync_swap_full_1
+
+  __sync_swap_acq_2
+  __sync_swap_rel_2
+  __sync_swap_full_2
+
+  __sync_swap_acq_4
+  __sync_swap_rel_4
+  __sync_swap_full_4
+
+  __sync_swap_acq_8
+  __sync_swap_rel_8
+  __sync_swap_full_8
+
+  __sync_swap_acq_16
+  __sync_swap_rel_16
+  __sync_swap_full_16
+}
