Patchwork [1/5] arm: Convert to atomic optabs.

login
register
mail settings
Submitter Richard Henderson
Date Dec. 1, 2011, 12:44 a.m.
Message ID <1322700249-4693-2-git-send-email-rth@redhat.com>
Download mbox | patch
Permalink /patch/128617/
State New
Headers show

Comments

Richard Henderson - Dec. 1, 2011, 12:44 a.m.
At the same time, perform post-reload splitting.
---
 gcc/config/arm/arm-protos.h   |    7 +-
 gcc/config/arm/arm.c          |  816 +++++++++++++++--------------------------
 gcc/config/arm/arm.h          |   18 -
 gcc/config/arm/arm.md         |   26 +-
 gcc/config/arm/constraints.md |    5 +
 gcc/config/arm/predicates.md  |    4 +
 gcc/config/arm/sync.md        |  668 +++++++++++++--------------------
 7 files changed, 582 insertions(+), 962 deletions(-)

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 23a29c6..8774c8c 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -155,12 +155,11 @@  extern const char *vfp_output_fstmd (rtx *);
 extern void arm_set_return_address (rtx, rtx);
 extern int arm_eliminable_register (rtx);
 extern const char *arm_output_shift(rtx *, int);
-extern void arm_expand_sync (enum machine_mode, struct arm_sync_generator *,
- 			     rtx, rtx, rtx, rtx);
-extern const char *arm_output_memory_barrier (rtx *);
-extern const char *arm_output_sync_insn (rtx, rtx *);
 extern unsigned int arm_sync_loop_insns (rtx , rtx *);
 extern int arm_attr_length_push_multi(rtx, rtx);
+extern void arm_expand_compare_and_swap (rtx op[]);
+extern void arm_split_compare_and_swap (rtx op[]);
+extern void arm_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
 
 #if defined TREE_CODE
 extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index a57494c..da99496 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -24271,520 +24271,6 @@  arm_have_conditional_execution (void)
   return !TARGET_THUMB1;
 }
 
-/* Legitimize a memory reference for sync primitive implemented using
-   ldrex / strex.  We currently force the form of the reference to be
-   indirect without offset.  We do not yet support the indirect offset
-   addressing supported by some ARM targets for these
-   instructions.  */
-static rtx
-arm_legitimize_sync_memory (rtx memory)
-{
-  rtx addr = force_reg (Pmode, XEXP (memory, 0));
-  rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
-
-  set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
-  MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
-  return legitimate_memory;
-}
-
-/* An instruction emitter. */
-typedef void (* emit_f) (int label, const char *, rtx *);
-
-/* An instruction emitter that emits via the conventional
-   output_asm_insn.  */
-static void
-arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
-{
-  output_asm_insn (pattern, operands);
-}
-
-/* Count the number of emitted synchronization instructions.  */
-static unsigned arm_insn_count;
-
-/* An emitter that counts emitted instructions but does not actually
-   emit instruction into the instruction stream.  */
-static void
-arm_count (int label,
-	   const char *pattern ATTRIBUTE_UNUSED,
-	   rtx *operands ATTRIBUTE_UNUSED)
-{
-  if (! label)
-    ++ arm_insn_count;
-}
-
-/* Construct a pattern using conventional output formatting and feed
-   it to output_asm_insn.  Provides a mechanism to construct the
-   output pattern on the fly.  Note the hard limit on the pattern
-   buffer size.  */
-static void ATTRIBUTE_PRINTF_4
-arm_output_asm_insn (emit_f emit, int label, rtx *operands,
-		     const char *pattern, ...)
-{
-  va_list ap;
-  char buffer[256];
-
-  va_start (ap, pattern);
-  vsprintf (buffer, pattern, ap);
-  va_end (ap);
-  emit (label, buffer, operands);
-}
-
-/* Emit the memory barrier instruction, if any, provided by this
-   target to a specified emitter.  */
-static void
-arm_process_output_memory_barrier (emit_f emit, rtx *operands)
-{
-  if (TARGET_HAVE_DMB)
-    {
-      /* Note we issue a system level barrier. We should consider
-         issuing a inner shareabilty zone barrier here instead, ie.
-         "DMB ISH".  */
-      emit (0, "dmb\tsy", operands);
-      return;
-    }
-
-  if (TARGET_HAVE_DMB_MCR)
-    {
-      emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
-      return;
-    }
-
-  gcc_unreachable ();
-}
-
-/* Emit the memory barrier instruction, if any, provided by this
-   target.  */
-const char *
-arm_output_memory_barrier (rtx *operands)
-{
-  arm_process_output_memory_barrier (arm_emit, operands);
-  return "";
-}
-
-/* Helper to figure out the instruction suffix required on ldrex/strex
-   for operations on an object of the specified mode.  */
-static const char *
-arm_ldrex_suffix (enum machine_mode mode)
-{
-  switch (mode)
-    {
-    case QImode: return "b";
-    case HImode: return "h";
-    case SImode: return "";
-    case DImode: return "d";
-    default:
-      gcc_unreachable ();
-    }
-  return "";
-}
-
-/* Emit an ldrex{b,h,d, } instruction appropriate for the specified
-   mode.  */
-static void
-arm_output_ldrex (emit_f emit,
-		  enum machine_mode mode,
-		  rtx target,
-		  rtx memory)
-{
-  rtx operands[3];
-
-  operands[0] = target;
-  if (mode != DImode)
-    {
-      const char *suffix = arm_ldrex_suffix (mode);
-      operands[1] = memory;
-      arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
-    }
-  else
-    {
-      /* The restrictions on target registers in ARM mode are that the two
-	 registers are consecutive and the first one is even; Thumb is
-	 actually more flexible, but DI should give us this anyway.
-	 Note that the 1st register always gets the lowest word in memory.  */
-      gcc_assert ((REGNO (target) & 1) == 0);
-      operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1);
-      operands[2] = memory;
-      arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2");
-    }
-}
-
-/* Emit a strex{b,h,d, } instruction appropriate for the specified
-   mode.  */
-static void
-arm_output_strex (emit_f emit,
-		  enum machine_mode mode,
-		  const char *cc,
-		  rtx result,
-		  rtx value,
-		  rtx memory)
-{
-  rtx operands[4];
-
-  operands[0] = result;
-  operands[1] = value;
-  if (mode != DImode)
-    {
-      const char *suffix = arm_ldrex_suffix (mode);
-      operands[2] = memory;
-      arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
-			  suffix, cc);
-    }
-  else
-    {
-      /* The restrictions on target registers in ARM mode are that the two
-	 registers are consecutive and the first one is even; Thumb is
-	 actually more flexible, but DI should give us this anyway.
-	 Note that the 1st register always gets the lowest word in memory.  */
-      gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
-      operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
-      operands[3] = memory;
-      arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3",
-			   cc);
-    }
-}
-
-/* Helper to emit an it instruction in Thumb2 mode only; although the assembler
-   will ignore it in ARM mode, emitting it will mess up instruction counts we
-   sometimes keep 'flags' are the extra t's and e's if it's more than one
-   instruction that is conditional.  */
-static void
-arm_output_it (emit_f emit, const char *flags, const char *cond)
-{
-  rtx operands[1]; /* Don't actually use the operand.  */
-  if (TARGET_THUMB2)
-    arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond);
-}
-
-/* Helper to emit a two operand instruction.  */
-static void
-arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
-{
-  rtx operands[2];
-
-  operands[0] = d;
-  operands[1] = s;
-  arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
-}
-
-/* Helper to emit a three operand instruction.  */
-static void
-arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
-{
-  rtx operands[3];
-
-  operands[0] = d;
-  operands[1] = a;
-  operands[2] = b;
-  arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
-}
-
-/* Emit a load store exclusive synchronization loop.
-
-   do
-     old_value = [mem]
-     if old_value != required_value
-       break;
-     t1 = sync_op (old_value, new_value)
-     [mem] = t1, t2 = [0|1]
-   while ! t2
-
-   Note:
-     t1 == t2 is not permitted
-     t1 == old_value is permitted
-
-   required_value:
-
-   RTX register representing the required old_value for
-   the modify to continue, if NULL no comparsion is performed.  */
-static void
-arm_output_sync_loop (emit_f emit,
-		      enum machine_mode mode,
-		      rtx old_value,
-		      rtx memory,
-		      rtx required_value,
-		      rtx new_value,
-		      rtx t1,
-		      rtx t2,
-		      enum attr_sync_op sync_op,
-		      int early_barrier_required)
-{
-  rtx operands[2];
-  /* We'll use the lo for the normal rtx in the none-DI case
-     as well as the least-sig word in the DI case.  */
-  rtx old_value_lo, required_value_lo, new_value_lo, t1_lo;
-  rtx old_value_hi, required_value_hi, new_value_hi, t1_hi;
-
-  bool is_di = mode == DImode;
-
-  gcc_assert (t1 != t2);
-
-  if (early_barrier_required)
-    arm_process_output_memory_barrier (emit, NULL);
-
-  arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
-
-  arm_output_ldrex (emit, mode, old_value, memory);
-
-  if (is_di)
-    {
-      old_value_lo = gen_lowpart (SImode, old_value);
-      old_value_hi = gen_highpart (SImode, old_value);
-      if (required_value)
-	{
-	  required_value_lo = gen_lowpart (SImode, required_value);
-	  required_value_hi = gen_highpart (SImode, required_value);
-	}
-      else
-	{
-	  /* Silence false potentially unused warning.  */
-	  required_value_lo = NULL_RTX;
-	  required_value_hi = NULL_RTX;
-	}
-      new_value_lo = gen_lowpart (SImode, new_value);
-      new_value_hi = gen_highpart (SImode, new_value);
-      t1_lo = gen_lowpart (SImode, t1);
-      t1_hi = gen_highpart (SImode, t1);
-    }
-  else
-    {
-      old_value_lo = old_value;
-      new_value_lo = new_value;
-      required_value_lo = required_value;
-      t1_lo = t1;
-
-      /* Silence false potentially unused warning.  */
-      t1_hi = NULL_RTX;
-      new_value_hi = NULL_RTX;
-      required_value_hi = NULL_RTX;
-      old_value_hi = NULL_RTX;
-    }
-
-  if (required_value)
-    {
-      operands[0] = old_value_lo;
-      operands[1] = required_value_lo;
-
-      arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
-      if (is_di)
-        {
-          arm_output_it (emit, "", "eq");
-          arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi);
-        }
-      arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
-    }
-
-  switch (sync_op)
-    {
-    case SYNC_OP_ADD:
-      arm_output_op3 (emit, is_di ? "adds" : "add",
-		      t1_lo, old_value_lo, new_value_lo);
-      if (is_di)
-	arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi);
-      break;
-
-    case SYNC_OP_SUB:
-      arm_output_op3 (emit, is_di ? "subs" : "sub",
-		      t1_lo, old_value_lo, new_value_lo);
-      if (is_di)
-	arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi);
-      break;
-
-    case SYNC_OP_IOR:
-      arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo);
-      if (is_di)
-	arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi);
-      break;
-
-    case SYNC_OP_XOR:
-      arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo);
-      if (is_di)
-	arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi);
-      break;
-
-    case SYNC_OP_AND:
-      arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo);
-      if (is_di)
-	arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
-      break;
-
-    case SYNC_OP_NAND:
-      arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo);
-      if (is_di)
-	arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
-      arm_output_op2 (emit, "mvn", t1_lo, t1_lo);
-      if (is_di)
-	arm_output_op2 (emit, "mvn", t1_hi, t1_hi);
-      break;
-
-    case SYNC_OP_NONE:
-      t1 = new_value;
-      t1_lo = new_value_lo;
-      if (is_di)
-	t1_hi = new_value_hi;
-      break;
-    }
-
-  /* Note that the result of strex is a 0/1 flag that's always 1 register.  */
-  if (t2)
-    {
-      arm_output_strex (emit, mode, "", t2, t1, memory);
-      operands[0] = t2;
-      arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
-      arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
-			   LOCAL_LABEL_PREFIX);
-    }
-  else
-    {
-      /* Use old_value for the return value because for some operations
-	 the old_value can easily be restored.  This saves one register.  */
-      arm_output_strex (emit, mode, "", old_value_lo, t1, memory);
-      operands[0] = old_value_lo;
-      arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
-      arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
-			   LOCAL_LABEL_PREFIX);
-
-      /* Note that we only used the _lo half of old_value as a temporary
-	 so in DI we don't have to restore the _hi part.  */
-      switch (sync_op)
-	{
-	case SYNC_OP_ADD:
-	  arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo);
-	  break;
-
-	case SYNC_OP_SUB:
-	  arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo);
-	  break;
-
-	case SYNC_OP_XOR:
-	  arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo);
-	  break;
-
-	case SYNC_OP_NONE:
-	  arm_output_op2 (emit, "mov", old_value_lo, required_value_lo);
-	  break;
-
-	default:
-	  gcc_unreachable ();
-	}
-    }
-
-  /* Note: label is before barrier so that in cmp failure case we still get
-     a barrier to stop subsequent loads floating upwards past the ldrex
-     PR target/48126.  */
-  arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
-  arm_process_output_memory_barrier (emit, NULL);
-}
-
-static rtx
-arm_get_sync_operand (rtx *operands, int index, rtx default_value)
-{
-  if (index > 0)
-    default_value = operands[index - 1];
-
-  return default_value;
-}
-
-#define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
-  arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
-
-/* Extract the operands for a synchroniztion instruction from the
-   instructions attributes and emit the instruction.  */
-static void
-arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
-{
-  rtx result, memory, required_value, new_value, t1, t2;
-  int early_barrier;
-  enum machine_mode mode;
-  enum attr_sync_op sync_op;
-
-  result = FETCH_SYNC_OPERAND(result, 0);
-  memory = FETCH_SYNC_OPERAND(memory, 0);
-  required_value = FETCH_SYNC_OPERAND(required_value, 0);
-  new_value = FETCH_SYNC_OPERAND(new_value, 0);
-  t1 = FETCH_SYNC_OPERAND(t1, 0);
-  t2 = FETCH_SYNC_OPERAND(t2, 0);
-  early_barrier =
-    get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
-  sync_op = get_attr_sync_op (insn);
-  mode = GET_MODE (memory);
-
-  arm_output_sync_loop (emit, mode, result, memory, required_value,
-			new_value, t1, t2, sync_op, early_barrier);
-}
-
-/* Emit a synchronization instruction loop.  */
-const char *
-arm_output_sync_insn (rtx insn, rtx *operands)
-{
-  arm_process_output_sync_insn (arm_emit, insn, operands);
-  return "";
-}
-
-/* Count the number of machine instruction that will be emitted for a
-   synchronization instruction.  Note that the emitter used does not
-   emit instructions, it just counts instructions being carefull not
-   to count labels.  */
-unsigned int
-arm_sync_loop_insns (rtx insn, rtx *operands)
-{
-  arm_insn_count = 0;
-  arm_process_output_sync_insn (arm_count, insn, operands);
-  return arm_insn_count;
-}
-
-/* Helper to call a target sync instruction generator, dealing with
-   the variation in operands required by the different generators.  */
-static rtx
-arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
-  		    rtx memory, rtx required_value, rtx new_value)
-{
-  switch (generator->op)
-    {
-    case arm_sync_generator_omn:
-      gcc_assert (! required_value);
-      return generator->u.omn (old_value, memory, new_value);
-
-    case arm_sync_generator_omrn:
-      gcc_assert (required_value);
-      return generator->u.omrn (old_value, memory, required_value, new_value);
-    }
-
-  return NULL;
-}
-
-/* Expand a synchronization loop. The synchronization loop is expanded
-   as an opaque block of instructions in order to ensure that we do
-   not subsequently get extraneous memory accesses inserted within the
-   critical region. The exclusive access property of ldrex/strex is
-   only guaranteed in there are no intervening memory accesses. */
-void
-arm_expand_sync (enum machine_mode mode,
-		 struct arm_sync_generator *generator,
-		 rtx target, rtx memory, rtx required_value, rtx new_value)
-{
-  if (target == NULL)
-    target = gen_reg_rtx (mode);
-
-  memory = arm_legitimize_sync_memory (memory);
-  if (mode != SImode && mode != DImode)
-    {
-      rtx load_temp = gen_reg_rtx (SImode);
-
-      if (required_value)
-	required_value = convert_modes (SImode, mode, required_value, true);
-
-      new_value = convert_modes (SImode, mode, new_value, true);
-      emit_insn (arm_call_generator (generator, load_temp, memory,
-				     required_value, new_value));
-      emit_move_insn (target, gen_lowpart (mode, load_temp));
-    }
-  else
-    {
-      emit_insn (arm_call_generator (generator, target, memory, required_value,
-				     new_value));
-    }
-}
-
 static unsigned int
 arm_autovectorize_vector_sizes (void)
 {
@@ -24982,4 +24468,306 @@  arm_count_output_move_double_insns (rtx *operands)
   return count;
 }
 
+/* Emit a memory barrier around an atomic sequence according to MODEL.  */
+
+static void
+arm_pre_atomic_barrier (enum memmodel model)
+{
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_ACQUIRE:
+      break;
+    case MEMMODEL_RELEASE:
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SEQ_CST:
+      emit_insn (gen_memory_barrier ());
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static void
+arm_post_atomic_barrier (enum memmodel model)
+{
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_RELEASE:
+      break;
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SEQ_CST:
+      emit_insn (gen_memory_barrier ());
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Emit the load-exclusive and store-exclusive instructions.  */
+
+static void
+arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
+{
+  rtx (*gen) (rtx, rtx);
+
+  switch (mode)
+    {
+    case QImode: gen = gen_arm_load_exclusiveqi; break;
+    case HImode: gen = gen_arm_load_exclusivehi; break;
+    case SImode: gen = gen_arm_load_exclusivesi; break;
+    case DImode: gen = gen_arm_load_exclusivedi; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (rval, mem));
+}
+
+static void
+arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
+{
+  rtx (*gen) (rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case QImode: gen = gen_arm_store_exclusiveqi; break;
+    case HImode: gen = gen_arm_store_exclusivehi; break;
+    case SImode: gen = gen_arm_store_exclusivesi; break;
+    case DImode: gen = gen_arm_store_exclusivedi; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (bval, rval, mem));
+}
+
+/* Mark the previous jump instruction as unlikely.  */
+
+static void
+emit_unlikely_jump (rtx insn)
+{
+  rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
+
+  insn = emit_jump_insn (insn);
+  add_reg_note (insn, REG_BR_PROB, very_unlikely);
+}
+
+/* Expand a compare and swap pattern.  */
+
+void
+arm_expand_compare_and_swap (rtx operands[])
+{
+  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f;
+  enum machine_mode mode;
+  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+
+  bval = operands[0];
+  rval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = operands[5];
+  mod_s = operands[6];
+  mod_f = operands[7];
+  mode = GET_MODE (mem);
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+      /* For narrow modes, we're going to perform the comparison in SImode,
+	 so do the zero-extension now.  */
+      rval = gen_reg_rtx (SImode);
+      oldval = convert_modes (SImode, mode, oldval, true);
+      /* FALLTHRU */
+
+    case SImode:
+      /* Force the value into a register if needed.  We waited until after
+	 the zero-extension above to do this properly.  */
+      if (!arm_add_operand (oldval, mode))
+	oldval = force_reg (mode, oldval);
+      break;
+
+    case DImode:
+      if (!cmpdi_operand (oldval, mode))
+	oldval = force_reg (mode, oldval);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (mode)
+    {
+    case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
+    case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
+    case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
+    case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
+
+  if (mode == QImode || mode == HImode)
+    emit_move_insn (operands[1], gen_lowpart (mode, rval));
+}
+
+/* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
+   another memory store between the load-exclusive and store-exclusive can
+   reset the monitor from Exclusive to Open state.  This means we must wait
+   until after reload to split the pattern, lest we get a register spill in
+   the middle of the atomic sequence.  */
+
+void
+arm_split_compare_and_swap (rtx operands[])
+{
+  rtx bval, rval, mem, oldval, newval;
+  enum machine_mode mode;
+  enum memmodel mod_s, mod_f;
+  bool is_weak;
+  rtx label1, label2, x, cond;
+
+  bval = operands[0];
+  rval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = (operands[5] != const0_rtx);
+  mod_s = (enum memmodel) INTVAL (operands[6]);
+  mod_f = (enum memmodel) INTVAL (operands[7]);
+  mode = GET_MODE (mem);
+
+  emit_move_insn (bval, const0_rtx);
+
+  arm_pre_atomic_barrier (mod_s);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_label_rtx ();
+      emit_label (label1);
+    }
+  label2 = gen_label_rtx ();
+
+  arm_emit_load_exclusive (mode, rval, mem);
+
+  x = gen_rtx_NE (VOIDmode, rval, oldval);
+  if (mode == DImode)
+    x = gen_cbranchdi4 (x, rval, oldval, label2);
+  else
+    x = gen_cbranchsi4 (x, rval, oldval, label2);
+  emit_unlikely_jump (x);
+
+  arm_emit_store_exclusive (mode, bval, mem, newval);
+
+  /* Thumb1 does not have LDREX, so we do not need to consider that
+     when it comes to computing the below.  */
+  gcc_assert (TARGET_32BIT);
+
+  if (is_weak)
+    emit_insn (gen_xorsi3 (bval, bval, const1_rtx));
+  else
+    {
+      emit_insn (gen_xorsi3_compare0 (bval, bval, const1_rtx));
+
+      cond = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
+      x = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
+      emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
+    }
+
+  if (mod_f != MEMMODEL_RELAXED)
+    emit_label (label2);
+
+  arm_post_atomic_barrier (mod_s);
+
+  if (mod_f == MEMMODEL_RELAXED)
+    emit_label (label2);
+}
+
+void
+arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
+		     rtx value, rtx model_rtx, rtx cond)
+{
+  enum memmodel model = (enum memmodel) INTVAL (model_rtx);
+  enum machine_mode mode = GET_MODE (mem);
+  enum machine_mode wmode = (mode == DImode ? DImode : SImode);
+  rtx label, x;
+
+  arm_pre_atomic_barrier (model);
+
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  if (new_out)
+    new_out = gen_lowpart (wmode, new_out);
+  if (old_out)
+    old_out = gen_lowpart (wmode, old_out);
+  else
+    old_out = new_out;
+  value = simplify_gen_subreg (wmode, value, mode, 0);
+
+  arm_emit_load_exclusive (mode, old_out, mem);
+
+  switch (code)
+    {
+    case SET:
+      new_out = value;
+      break;
+
+    case NOT:
+      x = gen_rtx_AND (wmode, old_out, value);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      x = gen_rtx_NOT (wmode, new_out);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      break;
+
+    case MINUS:
+      if (CONST_INT_P (value))
+	{
+	  value = GEN_INT (-INTVAL (value));
+	  code = PLUS;
+	}
+      /* FALLTHRU */
+
+    case PLUS:
+      if (mode == DImode)
+	{
+	  /* DImode plus/minus need to clobber flags.  */
+	  /* The adddi3 and subdi3 patterns are incorrectly written so that
+	     they require matching operands, even when we could easily support
+	     three operands.  Thankfully, this can be fixed up post-splitting,
+	     as the individual add+adc patterns do accept three operands and
+	     post-reload cprop can make these moves go away.  */
+	  emit_move_insn (new_out, old_out);
+	  if (code == PLUS)
+	    x = gen_adddi3 (new_out, new_out, value);
+	  else
+	    x = gen_subdi3 (new_out, new_out, value);
+	  emit_insn (x);
+	  break;
+	}
+      /* FALLTHRU */
+
+    default:
+      x = gen_rtx_fmt_ee (code, wmode, old_out, value);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      break;
+    }
+
+  arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
+
+  arm_post_atomic_barrier (model);
+}
+
 #include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 85e2b99..31f4856 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -123,24 +123,6 @@  enum target_cpus
 /* The processor for which instructions should be scheduled.  */
 extern enum processor_type arm_tune;
 
-enum arm_sync_generator_tag
-  {
-    arm_sync_generator_omn,
-    arm_sync_generator_omrn
-  };
-
-/* Wrapper to pass around a polymorphic pointer to a sync instruction
-   generator and.  */
-struct arm_sync_generator
-{
-  enum arm_sync_generator_tag op;
-  union
-  {
-    rtx (* omn) (rtx, rtx, rtx);
-    rtx (* omrn) (rtx, rtx, rtx, rtx);
-  } u;
-};
-
 typedef enum arm_cond_code
 {
   ARM_EQ = 0, ARM_NE, ARM_CS, ARM_CC, ARM_MI, ARM_PL, ARM_VS, ARM_VC,
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 8ec9b22..f006495 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -151,11 +151,11 @@ 
   VUNSPEC_WCMP_GT       ; Used by the iwMMXT WCMPGT instructions
   VUNSPEC_EH_RETURN     ; Use to override the return address for exception
                         ; handling.
-  VUNSPEC_SYNC_COMPARE_AND_SWAP    ; Represent an atomic compare swap.
-  VUNSPEC_SYNC_LOCK                ; Represent a sync_lock_test_and_set.
-  VUNSPEC_SYNC_OP                  ; Represent a sync_<op>
-  VUNSPEC_SYNC_NEW_OP              ; Represent a sync_new_<op>
-  VUNSPEC_SYNC_OLD_OP              ; Represent a sync_old_<op>
+  VUNSPEC_ATOMIC_CAS	; Represent an atomic compare swap.
+  VUNSPEC_ATOMIC_XCHG	; Represent an atomic exchange.
+  VUNSPEC_ATOMIC_OP	; Represent an atomic operation.
+  VUNSPEC_LL		; Represent a load-register-exclusive.
+  VUNSPEC_SC		; Represent a store-register-exclusive.
 ])
 
 ;;---------------------------------------------------------------------------
@@ -185,21 +185,9 @@ 
 (define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
   (const (symbol_ref "arm_fpu_attr")))
 
-(define_attr "sync_result"          "none,0,1,2,3,4,5" (const_string "none"))
-(define_attr "sync_memory"          "none,0,1,2,3,4,5" (const_string "none"))
-(define_attr "sync_required_value"  "none,0,1,2,3,4,5" (const_string "none"))
-(define_attr "sync_new_value"       "none,0,1,2,3,4,5" (const_string "none"))
-(define_attr "sync_t1"              "none,0,1,2,3,4,5" (const_string "none"))
-(define_attr "sync_t2"              "none,0,1,2,3,4,5" (const_string "none"))
-(define_attr "sync_release_barrier" "yes,no"           (const_string "yes"))
-(define_attr "sync_op"              "none,add,sub,ior,xor,and,nand"
-                                    (const_string "none"))
-
 ; LENGTH of an instruction (in bytes)
 (define_attr "length" ""
-  (cond [(not (eq_attr "sync_memory" "none"))
- 	   (symbol_ref "arm_sync_loop_insns (insn, operands) * 4")
-	] (const_int 4)))
+  (const_int 4))
 
 ; The architecture which supports the instruction (or alternative).
 ; This can be "a" for ARM, "t" for either of the Thumbs, "32" for
@@ -3074,7 +3062,7 @@ 
   [(set_attr "length" "2")
    (set_attr "conds" "set")])
 
-(define_insn "*xorsi3_compare0"
+(define_insn "xorsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV (xor:SI (match_operand:SI 1 "s_register_operand" "r")
 				 (match_operand:SI 2 "arm_rhs_operand" "rI"))
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index d8ce982..5215337 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -291,6 +291,11 @@ 
  (and (match_code "const_double")
       (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)")))
 
+(define_memory_constraint "Ua"
+ "@internal
+  An address valid for loading/storing register exclusive"
+ (match_operand 0 "mem_noofs_operand"))
+
 (define_memory_constraint "Ut"
  "@internal
   In ARM/Thumb-2 state an address valid for loading/storing opaque structure
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92eb004..e486404 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -764,3 +764,7 @@ 
 
 (define_special_predicate "add_operator"
   (match_code "plus"))
+
+(define_predicate "mem_noofs_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 40ee93c..124ebf0 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -1,5 +1,5 @@ 
 ;; Machine description for ARM processor synchronization primitives.
-;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
 ;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com)
 ;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org)
 ;;
@@ -19,11 +19,20 @@ 
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.  */
 
-;; ARMV6 introduced ldrex and strex instruction. These instruction
-;; access SI width data. In order to implement synchronization
-;; primitives for the narrower QI and HI modes we insert appropriate
-;; AND/OR sequences into the synchronization loop to mask out the
-;; relevant component of an SI access.
+(define_mode_attr sync_predtab
+  [(QI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER")
+   (HI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER")
+   (SI "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER")
+   (DI "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN
+	&& TARGET_HAVE_MEMORY_BARRIER")])
+
+(define_code_iterator syncop [plus minus ior xor and])
+
+(define_code_attr sync_optab
+  [(ior "ior") (xor "xor") (and "and") (plus "add") (minus "sub")])
+
+(define_mode_attr sync_sfx
+  [(QI "b") (HI "h") (SI "") (DI "d")])
 
 (define_expand "memory_barrier"
   [(set (match_dup 0)
@@ -34,463 +43,308 @@ 
   MEM_VOLATILE_P (operands[0]) = 1;
 })
 
-
-(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX &&
-					TARGET_HAVE_MEMORY_BARRIER")
-				(QI "TARGET_HAVE_LDREXBH &&
-					TARGET_HAVE_MEMORY_BARRIER")
-				(HI "TARGET_HAVE_LDREXBH &&
-					TARGET_HAVE_MEMORY_BARRIER")
-				(DI "TARGET_HAVE_LDREXD &&
-					ARM_DOUBLEWORD_ALIGN &&
-					TARGET_HAVE_MEMORY_BARRIER")])
-
-(define_expand "sync_compare_and_swap<mode>"
-  [(set (match_operand:QHSD 0 "s_register_operand")
-        (unspec_volatile:QHSD [(match_operand:QHSD 1 "memory_operand")
-			     (match_operand:QHSD 2 "s_register_operand")
-			     (match_operand:QHSD 3 "s_register_operand")]
-			     VUNSPEC_SYNC_COMPARE_AND_SWAP))]
-  "<sync_predtab>"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omrn;
-    generator.u.omrn = gen_arm_sync_compare_and_swap<mode>;
-    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-                     operands[2], operands[3]);
-    DONE;
-  })
-
-(define_expand "sync_lock_test_and_set<mode>"
-  [(match_operand:QHSD 0 "s_register_operand")
-   (match_operand:QHSD 1 "memory_operand")
-   (match_operand:QHSD 2 "s_register_operand")]
-  "<sync_predtab>"
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "TARGET_HAVE_MEMORY_BARRIER"
   {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_lock_test_and_set<mode>;
-    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1], NULL,
-                     operands[2]);
-    DONE;
-  })
-
-(define_code_iterator syncop [plus minus ior xor and])
-
-(define_code_attr sync_optab [(ior "ior")
-			      (xor "xor")
-			      (and "and")
-			      (plus "add")
-			      (minus "sub")])
+    if (TARGET_HAVE_DMB)
+      {
+	/* Note we issue a system level barrier. We should consider issuing
+	   a inner shareabilty zone barrier here instead, ie. "DMB ISH".  */
+	/* ??? Differentiate based on SEQ_CST vs less strict?  */
+	return "dmb\tsy";
+      }
 
-(define_code_attr sync_clobber [(ior "=&r")
-				(and "=&r")
-				(xor "X")
-				(plus "X")
-				(minus "X")])
+    if (TARGET_HAVE_DMB_MCR)
+      return "mcr\tp15, 0, r0, c7, c10, 5";
 
-(define_code_attr sync_t2_reqd [(ior "4")
-				(and "4")
-				(xor "*")
-				(plus "*")
-				(minus "*")])
-
-(define_expand "sync_<sync_optab><mode>"
-  [(match_operand:QHSD 0 "memory_operand")
-   (match_operand:QHSD 1 "s_register_operand")
-   (syncop:QHSD (match_dup 0) (match_dup 1))]
-  "<sync_predtab>"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
-    arm_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL,
-		     operands[1]);
-    DONE;
-  })
+    gcc_unreachable ();
+  }
+  [(set_attr "length" "4")
+   (set_attr "conds" "unconditional")
+   (set_attr "predicable" "no")])
 
-(define_expand "sync_nand<mode>"
-  [(match_operand:QHSD 0 "memory_operand")
-   (match_operand:QHSD 1 "s_register_operand")
-   (not:QHSD (and:QHSD (match_dup 0) (match_dup 1)))]
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "s_register_operand" "")		;; bool out
+   (match_operand:QHSD 1 "s_register_operand" "")	;; val out
+   (match_operand:QHSD 2 "mem_noofs_operand" "")	;; memory
+   (match_operand:QHSD 3 "general_operand" "")		;; expected
+   (match_operand:QHSD 4 "s_register_operand" "")	;; desired
+   (match_operand:SI 5 "const_int_operand")		;; is_weak
+   (match_operand:SI 6 "const_int_operand")		;; mod_s
+   (match_operand:SI 7 "const_int_operand")]		;; mod_f
   "<sync_predtab>"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_new_nand<mode>;
-    arm_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL,
-                     operands[1]);
-    DONE;
-  })
+{
+  arm_expand_compare_and_swap (operands);
+  DONE;
+})
 
-(define_expand "sync_new_<sync_optab><mode>"
-  [(match_operand:QHSD 0 "s_register_operand")
-   (match_operand:QHSD 1 "memory_operand")
-   (match_operand:QHSD 2 "s_register_operand")
-   (syncop:QHSD (match_dup 1) (match_dup 2))]
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")		;; bool out
+	(unspec_volatile:SI [(const_int 0)] VUNSPEC_ATOMIC_CAS))
+   (set (match_operand:SI 1 "s_register_operand" "=&r")		;; val out
+	(zero_extend:SI
+	  (match_operand:NARROW 2 "mem_noofs_operand" "+Ua")))	;; memory
+   (set (match_dup 2)
+	(unspec_volatile:NARROW
+	  [(match_operand:SI 3 "arm_add_operand" "rIL")		;; expected
+	   (match_operand:NARROW 4 "s_register_operand" "r")	;; desired
+	   (match_operand:SI 5 "const_int_operand")		;; is_weak
+	   (match_operand:SI 6 "const_int_operand")		;; mod_s
+	   (match_operand:SI 7 "const_int_operand")]		;; mod_f
+	  VUNSPEC_ATOMIC_CAS))
+   (clobber (reg:CC CC_REGNUM))]
   "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
-    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-		     NULL, operands[2]);
+    arm_split_compare_and_swap (operands);
     DONE;
   })
 
-(define_expand "sync_new_nand<mode>"
-  [(match_operand:QHSD 0 "s_register_operand")
-   (match_operand:QHSD 1 "memory_operand")
-   (match_operand:QHSD 2 "s_register_operand")
-   (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))]
-  "<sync_predtab>"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_new_nand<mode>;
-    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-    		     NULL, operands[2]);
-    DONE;
-  });
+(define_mode_attr cas_cmp_operand
+  [(SI "arm_add_operand") (DI "cmpdi_operand")])
+(define_mode_attr cas_cmp_str
+  [(SI "rIL") (DI "rDi")])
 
-(define_expand "sync_old_<sync_optab><mode>"
-  [(match_operand:QHSD 0 "s_register_operand")
-   (match_operand:QHSD 1 "memory_operand")
-   (match_operand:QHSD 2 "s_register_operand")
-   (syncop:QHSD (match_dup 1) (match_dup 2))]
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")		;; bool out
+	(unspec_volatile:SI [(const_int 0)] VUNSPEC_ATOMIC_CAS))
+   (set (match_operand:SIDI 1 "s_register_operand" "=&r")	;; val out
+	(match_operand:SIDI 2 "mem_noofs_operand" "+Ua"))	;; memory
+   (set (match_dup 2)
+	(unspec_volatile:SIDI
+	  [(match_operand:SIDI 3 "<cas_cmp_operand>" "<cas_cmp_str>") ;; expect
+	   (match_operand:SIDI 4 "s_register_operand" "r")	;; desired
+	   (match_operand:SI 5 "const_int_operand")		;; is_weak
+	   (match_operand:SI 6 "const_int_operand")		;; mod_s
+	   (match_operand:SI 7 "const_int_operand")]		;; mod_f
+	  VUNSPEC_ATOMIC_CAS))
+   (clobber (reg:CC CC_REGNUM))]
   "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_old_<sync_optab><mode>;
-    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-		     NULL, operands[2]);
+    arm_split_compare_and_swap (operands);
     DONE;
   })
 
-(define_expand "sync_old_nand<mode>"
-  [(match_operand:QHSD 0 "s_register_operand")
-   (match_operand:QHSD 1 "memory_operand")
-   (match_operand:QHSD 2 "s_register_operand")
-   (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))]
+(define_insn_and_split "atomic_exchange<mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")	;; output
+	(match_operand:QHSD 1 "mem_noofs_operand" "+Ua"))	;; memory
+   (set (match_dup 1)
+	(unspec_volatile:QHSD
+	  [(match_operand:QHSD 2 "s_register_operand" "r")	;; input
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  VUNSPEC_ATOMIC_XCHG))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
   "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_old_nand<mode>;
-    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-                     NULL, operands[2]);
+    arm_split_atomic_op (SET, operands[0], NULL, operands[1],
+			 operands[2], operands[3], operands[4]);
     DONE;
   })
 
-(define_insn "arm_sync_compare_and_swap<mode>"
-  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SIDI
-	 [(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
-	  (match_operand:SIDI 2 "s_register_operand" "r")
-	  (match_operand:SIDI 3 "s_register_operand" "r")]
-	 VUNSPEC_SYNC_COMPARE_AND_SWAP))
-   (set (match_dup 1) (unspec_volatile:SIDI [(match_dup 2)]
-                                          VUNSPEC_SYNC_COMPARE_AND_SWAP))
-   (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
-                                                VUNSPEC_SYNC_COMPARE_AND_SWAP))
-   ]
-  "<sync_predtab>"
-  {
-    return arm_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_required_value"  "2")
-   (set_attr "sync_new_value"       "3")
-   (set_attr "sync_t1"              "0")
-   (set_attr "conds" "clob")
-   (set_attr "predicable" "no")])
+(define_mode_attr atomic_op_operand
+  [(QI "reg_or_int_operand")
+   (HI "reg_or_int_operand")
+   (SI "reg_or_int_operand")
+   (DI "s_register_operand")])
 
-(define_insn "arm_sync_compare_and_swap<mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (zero_extend:SI
-	  (unspec_volatile:NARROW
-	    [(match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")
-	     (match_operand:SI 2 "s_register_operand" "r")
-	     (match_operand:SI 3 "s_register_operand" "r")]
-	    VUNSPEC_SYNC_COMPARE_AND_SWAP)))
-   (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)]
-                                          VUNSPEC_SYNC_COMPARE_AND_SWAP))
-   (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
-                                                VUNSPEC_SYNC_COMPARE_AND_SWAP))
-   ]
-  "<sync_predtab>"
-  {
-    return arm_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_required_value"  "2")
-   (set_attr "sync_new_value"       "3")
-   (set_attr "sync_t1"              "0")
-   (set_attr "conds" "clob")
-   (set_attr "predicable" "no")])
+(define_mode_attr atomic_op_str
+  [(QI "rn") (HI "rn") (SI "rn") (DI "r")])
 
-(define_insn "arm_sync_lock_test_and_set<mode>"
-  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
-	(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q"))
-   (set (match_dup 1)
-	(unspec_volatile:SIDI [(match_operand:SIDI 2 "s_register_operand" "r")]
-	VUNSPEC_SYNC_LOCK))
+(define_insn_and_split "atomic_<sync_optab><mode>"
+  [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua")
+	(unspec_volatile:QHSD
+	  [(syncop:QHSD (match_dup 0)
+	     (match_operand:QHSD 1 "<atomic_op_operand>" "<atomic_op_str>"))
+	   (match_operand:SI 2 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
    (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))]
+   (clobber (match_scratch:QHSD 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
   "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    return arm_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_release_barrier" "no")
-   (set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "conds" "clob")
-   (set_attr "predicable" "no")])
+    arm_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
+			 operands[1], operands[2], operands[4]);
+    DONE;
+  })
 
-(define_insn "arm_sync_lock_test_and_set<mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (zero_extend:SI (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")))
-   (set (match_dup 1)
-        (unspec_volatile:NARROW [(match_operand:SI 2 "s_register_operand" "r")]
-				VUNSPEC_SYNC_LOCK))
+(define_insn_and_split "atomic_nand<mode>"
+  [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua")
+	(unspec_volatile:QHSD
+	  [(not:QHSD
+	     (and:QHSD (match_dup 0)
+	       (match_operand:QHSD 1 "<atomic_op_operand>" "<atomic_op_str>")))
+	   (match_operand:SI 2 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
    (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))]
+   (clobber (match_scratch:QHSD 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
   "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    return arm_output_sync_insn (insn, operands);
-  } 
-  [(set_attr "sync_release_barrier" "no")
-   (set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "conds" "clob")
-   (set_attr "predicable" "no")])
+    arm_split_atomic_op (NOT, NULL, operands[3], operands[0],
+			 operands[1], operands[2], operands[4]);
+    DONE;
+  })
 
-(define_insn "arm_sync_new_<sync_optab><mode>"
-  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SIDI [(syncop:SIDI
-			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
-			       (match_operand:SIDI 2 "s_register_operand" "r"))
-			    ]
-			    VUNSPEC_SYNC_NEW_OP))
+(define_insn_and_split "atomic_fetch_<sync_optab><mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")
+	(match_operand:QHSD 1 "mem_noofs_operand" "+Ua"))
    (set (match_dup 1)
-	(unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
-			    VUNSPEC_SYNC_NEW_OP))
+	(unspec_volatile:QHSD
+	  [(syncop:QHSD (match_dup 1)
+	     (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>"))
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
    (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))]
+   (clobber (match_scratch:QHSD 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
   "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    return arm_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "<sync_optab>")
-   (set_attr "conds" "clob")
-   (set_attr "predicable" "no")])
+    arm_split_atomic_op (<CODE>, operands[0], operands[4], operands[1],
+			 operands[2], operands[3], operands[5]);
+    DONE;
+  })
 
-(define_insn "arm_sync_new_<sync_optab><mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI [(syncop:SI
-			       (zero_extend:SI
-				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
-			       (match_operand:SI 2 "s_register_operand" "r"))
-			    ]
-			    VUNSPEC_SYNC_NEW_OP))
+(define_insn_and_split "atomic_fetch_nand<mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")
+	(match_operand:QHSD 1 "mem_noofs_operand" "+Ua"))
    (set (match_dup 1)
-	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
-				VUNSPEC_SYNC_NEW_OP))
+	(unspec_volatile:QHSD
+	  [(not:QHSD
+	     (and:QHSD (match_dup 1)
+	       (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>")))
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
    (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))]
+   (clobber (match_scratch:QHSD 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
   "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    return arm_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "<sync_optab>")
-   (set_attr "conds" "clob")
-   (set_attr "predicable" "no")])
+    arm_split_atomic_op (NOT, operands[0], operands[4], operands[1],
+			 operands[2], operands[3], operands[5]);
+    DONE;
+  })
 
-(define_insn "arm_sync_new_nand<mode>"
-  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SIDI [(not:SIDI (and:SIDI
-			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
-			       (match_operand:SIDI 2 "s_register_operand" "r")))
-			    ]
-			    VUNSPEC_SYNC_NEW_OP))
+(define_insn_and_split "atomic_<sync_optab>_fetch<mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")
+	(syncop:QHSD
+	  (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")
+	  (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>")))
    (set (match_dup 1)
-	(unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
-			    VUNSPEC_SYNC_NEW_OP))
+	(unspec_volatile:QHSD
+	  [(match_dup 1) (match_dup 2)
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
    (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))]
+   (clobber (match_scratch:SI 4 "=&r"))]
   "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    return arm_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "nand")
-   (set_attr "conds" "clob")
-   (set_attr "predicable" "no")])
+    arm_split_atomic_op (<CODE>, NULL, operands[0], operands[1],
+			 operands[2], operands[3], operands[4]);
+    DONE;
+  })
 
-(define_insn "arm_sync_new_nand<mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI
-	  [(not:SI
-	     (and:SI
-	       (zero_extend:SI
-		 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
-	       (match_operand:SI 2 "s_register_operand" "r")))
-	  ] VUNSPEC_SYNC_NEW_OP))
+(define_insn_and_split "atomic_nand_fetch<mode>"
+  [(set (match_operand:QHSD 0 "s_register_operand" "=&r")
+	(not:QHSD
+	  (and:QHSD
+	    (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")
+	    (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>"))))
    (set (match_dup 1)
-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
-				VUNSPEC_SYNC_NEW_OP))
+	(unspec_volatile:QHSD
+	  [(match_dup 1) (match_dup 2)
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  VUNSPEC_ATOMIC_OP))
    (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))]
+   (clobber (match_scratch:SI 4 "=&r"))]
   "<sync_predtab>"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
   {
-    return arm_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "nand")
-   (set_attr "conds" "clob")
-   (set_attr "predicable" "no")])
+    arm_split_atomic_op (NOT, NULL, operands[0], operands[1],
+			 operands[2], operands[3], operands[4]);
+    DONE;
+  })
 
-(define_insn "arm_sync_old_<sync_optab><mode>"
-  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
-	(unspec_volatile:SIDI [(syncop:SIDI
-			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
-			       (match_operand:SIDI 2 "s_register_operand" "r"))
-			    ]
-			    VUNSPEC_SYNC_OLD_OP))
-   (set (match_dup 1)
-        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
-			      VUNSPEC_SYNC_OLD_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SIDI 3 "=&r"))
-   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
-  "<sync_predtab>"
-  {
-    return arm_output_sync_insn (insn, operands);
-  } 
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "<sync_t2_reqd>")
-   (set_attr "sync_op"              "<sync_optab>")
-   (set_attr "conds" "clob")
-   (set_attr "predicable" "no")])
+(define_insn "arm_load_exclusive<mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (zero_extend:SI
+	  (unspec_volatile:NARROW
+	    [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")]
+	    VUNSPEC_LL)))]
+  "TARGET_HAVE_LDREXBH"
+  "ldrex<sync_sfx>\t%0, %C1")
 
-(define_insn "arm_sync_old_<sync_optab><mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI [(syncop:SI
-			       (zero_extend:SI
-				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
-			       (match_operand:SI 2 "s_register_operand" "r"))
-			    ]
-			    VUNSPEC_SYNC_OLD_OP))
-   (set (match_dup 1)
-	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
-			    VUNSPEC_SYNC_OLD_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))
-   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
-  "<sync_predtab>"
-  {
-    return arm_output_sync_insn (insn, operands);
-  } 
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "<sync_t2_reqd>")
-   (set_attr "sync_op"              "<sync_optab>")
-   (set_attr "conds" 		    "clob")
-   (set_attr "predicable" "no")])
+(define_insn "arm_load_exclusivesi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec_volatile:SI
+	  [(match_operand:SI 1 "mem_noofs_operand" "Ua")]
+	  VUNSPEC_LL))]
+  "TARGET_HAVE_LDREX"
+  "ldrex\t%0, %C1")
 
-(define_insn "arm_sync_old_nand<mode>"
-  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
-	(unspec_volatile:SIDI [(not:SIDI (and:SIDI
-			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
-			       (match_operand:SIDI 2 "s_register_operand" "r")))
-			    ]
-			    VUNSPEC_SYNC_OLD_OP))
-   (set (match_dup 1)
-        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
-	                    VUNSPEC_SYNC_OLD_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SIDI 3 "=&r"))
-   (clobber (match_scratch:SI 4 "=&r"))]
-  "<sync_predtab>"
+(define_insn "arm_load_exclusivedi"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(unspec_volatile:DI
+	  [(match_operand:DI 1 "mem_noofs_operand" "Ua")]
+	  VUNSPEC_LL))]
+  "TARGET_HAVE_LDREXD"
   {
-    return arm_output_sync_insn (insn, operands);
-  } 
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "4")
-   (set_attr "sync_op"              "nand")
-   (set_attr "conds" 		    "clob")
-   (set_attr "predicable" "no")])
+    rtx target = operands[0];
+    /* The restrictions on target registers in ARM mode are that the two
+       registers are consecutive and the first one is even; Thumb is
+       actually more flexible, but DI should give us this anyway.
+       Note that the 1st register always gets the lowest word in memory.  */
+    gcc_assert ((REGNO (target) & 1) == 0);
+    operands[2] = gen_rtx_REG (SImode, REGNO (target) + 1);
+    return "ldrexd\t%0, %2, %C1";
+  })
 
-(define_insn "arm_sync_old_nand<mode>"
+(define_insn "arm_store_exclusive<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
-	(unspec_volatile:SI [(not:SI (and:SI
-			       (zero_extend:SI
-				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
-			       (match_operand:SI 2 "s_register_operand" "r")))
-			    ]
-			    VUNSPEC_SYNC_OLD_OP))
-   (set (match_dup 1)
-	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
-			    VUNSPEC_SYNC_OLD_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))
-   (clobber (match_scratch:SI 4 "=&r"))]
+	(unspec_volatile:SI [(const_int 0)] VUNSPEC_SC))
+   (set (match_operand:QHSD 1 "mem_noofs_operand" "=Ua")
+	(unspec_volatile:QHSD
+	  [(match_operand:QHSD 2 "s_register_operand" "r")]
+	  VUNSPEC_SC))]
   "<sync_predtab>"
   {
-    return arm_output_sync_insn (insn, operands);
-  } 
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "4")
-   (set_attr "sync_op"              "nand")
-   (set_attr "conds"                "clob")
-   (set_attr "predicable" "no")])
-
-(define_insn "*memory_barrier"
-  [(set (match_operand:BLK 0 "" "")
-	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
-  "TARGET_HAVE_MEMORY_BARRIER"
-  {
-    return arm_output_memory_barrier (operands);
-  }
-  [(set_attr "length" "4")
-   (set_attr "conds" "unconditional")
-   (set_attr "predicable" "no")])
-
+    if (<MODE>mode == DImode)
+      {
+	rtx value = operands[2];
+	/* The restrictions on target registers in ARM mode are that the two
+	   registers are consecutive and the first one is even; Thumb is
+	   actually more flexible, but DI should give us this anyway.
+	   Note that the 1st register always gets the lowest word in memory.  */
+	gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
+	operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
+	return "strexd\t%0, %2, %3, %C1";
+      }
+    return "strex<sync_sfx>\t%0, %2, %C1";
+  })