diff mbox

[AArch64] Implementent sync gen and atomic builtins.

Message ID 1353081802-16018-1-git-send-email-james.greenhalgh@arm.com
State New
Headers show

Commit Message

James Greenhalgh Nov. 16, 2012, 4:03 p.m. UTC
Hi,

I'm posting this patch on behalf of Sofiane Naci who is
currently on holiday.

This patch does two things:

1. Rewrites the sync generation code in AArch64, to look more like Richard
Henderson's ARM implementation (see
http://gcc.gnu.org/ml/gcc-patches/2011-12/msg00005.html). However, the
implementation here uses load-acquire and store-release to avoid issuing
memory barriers.

2. Implements support for atomic_* patterns and deletes the old implementation
of sync_* patterns.

This patch has been tested with a full regression run against
aarch64-none-elf and there were no regressions.

Is this OK to commit?

Thanks,
James Greenhalgh

---
gcc/
2012-11-13  Sofiane Naci  <sofiane.naci@arm.com>

	* config/aarch64/aarch64.md
	(define_attr "sync_*") Remove.
	(define_attr "length"): Update.
	Include atomics.md.

	* config/aarch64/aarch64-protos.h
	(aarch64_expand_compare_and_swap): Add function prototype.
	(aarch64_split_compare_and_swap): Likewise.
	(aarch64_split_atomic_op): Likewise.
	(aarch64_expand_sync): Remove function prototype.
	(aarch64_output_sync_insn): Likewise.
	(aarch64_output_sync_lock_release): Likewise.
	(aarch64_sync_loop_insns): Likewise.
	(struct aarch64_sync_generator): Remove.
	(enum aarch64_sync_generator_tag): Likewise.

	* config/aarch64/aarch64.c
	(aarch64_legitimize_sync_memory): Remove function.
	(aarch64_emit): Likewise.
	(aarch64_insn_count): Likewise.
	(aarch64_output_asm_insn): Likewise.
	(aarch64_load_store_suffix): Likewise.
	(aarch64_output_sync_load): Likewise.
	(aarch64_output_sync_store): Likewise.
	(aarch64_output_op2): Likewise.
	(aarch64_output_op3): Likewise.
	(aarch64_output_sync_loop): Likewise.
	(aarch64_get_sync_operand): Likewise.
	(aarch64_process_output_sync_insn): Likewise.
	(aarch64_output_sync_insn): Likewise.
	(aarch64_output_sync_lock_release): Likewise.
	(aarch64_sync_loop_insns): Likewise.
	(aarch64_call_generator): Likewise.
	(aarch64_expand_sync): Likewise.
	(* emit_f): Remove variable.
	(aarch64_insn_count): Likewise.
	(FETCH_SYNC_OPERAND): Likewise.
	(aarch64_emit_load_exclusive): New function.
	(aarch64_emit_store_exclusive): Likewise.
	(aarch64_emit_unlikely_jump): Likewise.
	(aarch64_expand_compare_and_swap): Likewise.
	(aarch64_split_compare_and_swap): Likewise.
	(aarch64_split_atomic_op): Likewise.

	* config/aarch64/iterators.md
	(atomic_sfx): New mode attribute.
	(atomic_optab): New code attribute.
	(atomic_op_operand): Likewise.
	(atomic_op_str): Likewise.
	(syncop): Rename to atomic_op.

	* config/aarch64/sync.md: Delete.

	* config/aarch64/atomics.md: New file.

gcc/testsuite

2012-11-13  Sofiane Naci  <sofiane.naci@arm.com>

	* gcc.target/aarch64/atomic-comp-swap-release-acquire.c: New testcase.
	* gcc.target/aarch64/atomic-op-acq_rel.c: Likewise.
	* gcc.target/aarch64/atomic-op-acquire.c: Likewise.
	* gcc.target/aarch64/atomic-op-char.c: Likewise.
	* gcc.target/aarch64/atomic-op-consume.c: Likewise.
	* gcc.target/aarch64/atomic-op-imm.c: Likewise.
	* gcc.target/aarch64/atomic-op-int.c: Likewise.
	* gcc.target/aarch64/atomic-op-long.c: Likewise.
	* gcc.target/aarch64/atomic-op-relaxed.c: Likewise.
	* gcc.target/aarch64/atomic-op-release.c: Likewise.
	* gcc.target/aarch64/atomic-op-seq_cst.c: Likewise.
	* gcc.target/aarch64/atomic-op-short.c: Likewise.

Comments

Marcus Shawcroft Nov. 19, 2012, 6:30 p.m. UTC | #1
On 16 Nov 2012, at 16:03, James Greenhalgh <James.Greenhalgh@arm.com> wrote:

> <0001-Patch-AArch64-Implementent-sync-gen-and-atomic-built.patch>

OK, and back port to ARM/aarch64-4.7-branch please.

/Marcus
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ca4e306..4414df4 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -22,35 +22,6 @@ 
 #ifndef GCC_AARCH64_PROTOS_H
 #define GCC_AARCH64_PROTOS_H
 
- /* This generator struct and enum is used to wrap a function pointer
-    to a function that generates an RTX fragment but takes either 3 or
-    4 operands.
-
-    The omn flavour, wraps a function that generates a synchronization
-    instruction from 3 operands: old value, memory and new value.
-
-    The omrn flavour, wraps a function that generates a synchronization
-    instruction from 4 operands: old value, memory, required value and
-    new value.  */
-
-enum aarch64_sync_generator_tag
-{
-  aarch64_sync_generator_omn,
-  aarch64_sync_generator_omrn
-};
-
- /* Wrapper to pass around a polymorphic pointer to a sync instruction
-    generator and.  */
-struct aarch64_sync_generator
-{
-  enum aarch64_sync_generator_tag op;
-  union
-  {
-    rtx (*omn) (rtx, rtx, rtx);
-    rtx (*omrn) (rtx, rtx, rtx, rtx);
-  } u;
-};
-
 /*
   SYMBOL_CONTEXT_ADR
   The symbol is used in a load-address operation.
@@ -186,8 +157,6 @@  bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context,
 				  enum aarch64_symbol_type *);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
 const char *aarch64_output_casesi (rtx *);
-const char *aarch64_output_sync_insn (rtx, rtx *);
-const char *aarch64_output_sync_lock_release (rtx, rtx);
 enum aarch64_symbol_type aarch64_classify_symbol (rtx,
 						  enum aarch64_symbol_context);
 enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
@@ -210,14 +179,11 @@  rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool);
 rtx aarch64_tls_get_addr (void);
 unsigned aarch64_dbx_register_number (unsigned);
 unsigned aarch64_trampoline_size (void);
-unsigned aarch64_sync_loop_insns (rtx, rtx *);
 void aarch64_asm_output_labelref (FILE *, const char *);
 void aarch64_elf_asm_named_section (const char *, unsigned, tree);
 void aarch64_expand_epilogue (bool);
 void aarch64_expand_mov_immediate (rtx, rtx);
 void aarch64_expand_prologue (void);
-void aarch64_expand_sync (enum machine_mode, struct aarch64_sync_generator *,
-			  rtx, rtx, rtx, rtx);
 void aarch64_function_profiler (FILE *, int);
 void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
 				   const_tree, unsigned);
@@ -256,6 +222,10 @@  enum machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
 rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
 rtx aarch64_load_tp (rtx);
 
+void aarch64_expand_compare_and_swap (rtx op[]);
+void aarch64_split_compare_and_swap (rtx op[]);
+void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
+
 #endif /* RTX_CODE */
 
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4437fef..485ea28 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5859,382 +5859,6 @@  aarch64_preferred_simd_mode (enum machine_mode mode)
   return word_mode;
 }
 
-/* Legitimize a memory reference for sync primitive implemented using
-   LDXR/STXR instructions.  We currently force the form of the reference
-   to be indirect without offset.  */
-static rtx
-aarch64_legitimize_sync_memory (rtx memory)
-{
-  rtx addr = force_reg (Pmode, XEXP (memory, 0));
-  rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
-
-  set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
-  MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
-  return legitimate_memory;
-}
-
-/* An instruction emitter.  */
-typedef void (* emit_f) (int label, const char *, rtx *);
-
-/* An instruction emitter that emits via the conventional
-   output_asm_insn.  */
-static void
-aarch64_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
-{
-  output_asm_insn (pattern, operands);
-}
-
-/* Count the number of emitted synchronization instructions.  */
-static unsigned aarch64_insn_count;
-
-/* An emitter that counts emitted instructions but does not actually
-   emit instruction into the the instruction stream.  */
-static void
-aarch64_count (int label,
-	       const char *pattern ATTRIBUTE_UNUSED,
-	       rtx *operands ATTRIBUTE_UNUSED)
-{
-  if (! label)
-    ++ aarch64_insn_count;
-}
-
-static void
-aarch64_output_asm_insn (emit_f, int, rtx *,
-			 const char *, ...) ATTRIBUTE_PRINTF_4;
-
-/* Construct a pattern using conventional output formatting and feed
-   it to output_asm_insn.  Provides a mechanism to construct the
-   output pattern on the fly.  Note the hard limit on the pattern
-   buffer size.  */
-static void
-aarch64_output_asm_insn (emit_f emit, int label, rtx *operands,
-			 const char *pattern, ...)
-{
-  va_list ap;
-  char buffer[256];
-
-  va_start (ap, pattern);
-  vsnprintf (buffer, sizeof (buffer), pattern, ap);
-  va_end (ap);
-  emit (label, buffer, operands);
-}
-
-/* Helper to figure out the instruction suffix required on LDXR/STXR
-   instructions for operations on an object of the specified mode.  */
-static const char *
-aarch64_load_store_suffix (enum machine_mode mode)
-{
-  switch (mode)
-    {
-    case QImode: return "b";
-    case HImode: return "h";
-    case SImode: return "";
-    case DImode: return "";
-    default:
-      gcc_unreachable ();
-    }
-  return "";
-}
-
-/* Emit an excluive load instruction appropriate for the specified
-   mode.  */
-static void
-aarch64_output_sync_load (emit_f emit,
-			  enum machine_mode mode,
-			  rtx target,
-			  rtx memory,
-			  bool with_barrier)
-{
-  const char *suffix = aarch64_load_store_suffix (mode);
-  rtx operands[2];
-
-  operands[0] = target;
-  operands[1] = memory;
-  aarch64_output_asm_insn (emit, 0, operands, "ld%sxr%s\t%%%s0, %%1",
-			   with_barrier ? "a" : "", suffix,
-			   mode == DImode ? "x" : "w");
-}
-
-/* Emit an exclusive store instruction appropriate for the specified
-   mode.  */
-static void
-aarch64_output_sync_store (emit_f emit,
-			   enum machine_mode mode,
-			   rtx result,
-			   rtx value,
-			   rtx memory,
-			   bool with_barrier)
-{
-  const char *suffix = aarch64_load_store_suffix (mode);
-  rtx operands[3];
-
-  operands[0] = result;
-  operands[1] = value;
-  operands[2] = memory;
-  aarch64_output_asm_insn (emit, 0, operands,
-			   "st%sxr%s\t%%w0, %%%s1, %%2",
-			   with_barrier ? "l" : "",
-			   suffix,
-			   mode == DImode ? "x" : "w");
-}
-
-/* Helper to emit a two operand instruction.  */
-static void
-aarch64_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
-{
-  rtx operands[2];
-  enum machine_mode mode;
-  const char *constraint;
-
-  mode = GET_MODE (d);
-  operands[0] = d;
-  operands[1] = s;
-  constraint = mode == DImode ? "" : "w";
-  aarch64_output_asm_insn (emit, 0, operands, "%s\t%%%s0, %%%s1", mnemonic,
-			   constraint, constraint);
-}
-
-/* Helper to emit a three operand instruction.  */
-static void
-aarch64_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
-{
-  rtx operands[3];
-  enum machine_mode mode;
-  const char *constraint;
-
-  mode = GET_MODE (d);
-  operands[0] = d;
-  operands[1] = a;
-  operands[2] = b;
-
-  constraint = mode == DImode ? "" : "w";
-  aarch64_output_asm_insn (emit, 0, operands, "%s\t%%%s0, %%%s1, %%%s2",
-			   mnemonic, constraint, constraint, constraint);
-}
-
-/* Emit a load store exclusive synchronization loop.
-
-   do
-     old_value = [mem]
-     if old_value != required_value
-       break;
-     t1 = sync_op (old_value, new_value)
-     [mem] = t1, t2 = [0|1]
-   while ! t2
-
-   Note:
-     t1 == t2 is not permitted
-     t1 == old_value is permitted
-
-   required_value:
-
-   RTX register or const_int representing the required old_value for
-   the modify to continue, if NULL no comparsion is performed.  */
-static void
-aarch64_output_sync_loop (emit_f emit,
-			  enum machine_mode mode,
-			  rtx old_value,
-			  rtx memory,
-			  rtx required_value,
-			  rtx new_value,
-			  rtx t1,
-			  rtx t2,
-			  enum attr_sync_op sync_op,
-			  int acquire_barrier,
-			  int release_barrier)
-{
-  rtx operands[1];
-
-  gcc_assert (t1 != t2);
-
-  aarch64_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
-
-  aarch64_output_sync_load (emit, mode, old_value, memory, acquire_barrier);
-
-  if (required_value)
-    {
-      rtx operands[2];
-
-      operands[0] = old_value;
-      operands[1] = required_value;
-      aarch64_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
-      aarch64_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=",
-			       LOCAL_LABEL_PREFIX);
-    }
-
-  switch (sync_op)
-    {
-    case SYNC_OP_ADD:
-      aarch64_output_op3 (emit, "add", t1, old_value, new_value);
-      break;
-
-    case SYNC_OP_SUB:
-      aarch64_output_op3 (emit, "sub", t1, old_value, new_value);
-      break;
-
-    case SYNC_OP_IOR:
-      aarch64_output_op3 (emit, "orr", t1, old_value, new_value);
-      break;
-
-    case SYNC_OP_XOR:
-      aarch64_output_op3 (emit, "eor", t1, old_value, new_value);
-      break;
-
-    case SYNC_OP_AND:
-      aarch64_output_op3 (emit,"and", t1, old_value, new_value);
-      break;
-
-    case SYNC_OP_NAND:
-      aarch64_output_op3 (emit, "and", t1, old_value, new_value);
-      aarch64_output_op2 (emit, "mvn", t1, t1);
-      break;
-
-    case SYNC_OP_NONE:
-      t1 = new_value;
-      break;
-    }
-
-  aarch64_output_sync_store (emit, mode, t2, t1, memory, release_barrier);
-  operands[0] = t2;
-  aarch64_output_asm_insn (emit, 0, operands, "cbnz\t%%w0, %sLSYT%%=",
-			   LOCAL_LABEL_PREFIX);
-
-  aarch64_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
-}
-
-static rtx
-aarch64_get_sync_operand (rtx *operands, int index, rtx default_value)
-{
-  if (index > 0)
-    default_value = operands[index - 1];
-
-  return default_value;
-}
-
-#define FETCH_SYNC_OPERAND(NAME, DEFAULT)                                \
-  aarch64_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), \
-			    DEFAULT);
-
-/* Extract the operands for a synchroniztion instruction from the
-   instructions attributes and emit the instruction.  */
-static void
-aarch64_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
-{
-  rtx result, memory, required_value, new_value, t1, t2;
-  int release_barrier;
-  int acquire_barrier = 1;
-  enum machine_mode mode;
-  enum attr_sync_op sync_op;
-
-  result = FETCH_SYNC_OPERAND (result, 0);
-  memory = FETCH_SYNC_OPERAND (memory, 0);
-  required_value = FETCH_SYNC_OPERAND (required_value, 0);
-  new_value = FETCH_SYNC_OPERAND (new_value, 0);
-  t1 = FETCH_SYNC_OPERAND (t1, 0);
-  t2 = FETCH_SYNC_OPERAND (t2, 0);
-  release_barrier =
-    get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
-  sync_op = get_attr_sync_op (insn);
-  mode = GET_MODE (memory);
-
-  aarch64_output_sync_loop (emit, mode, result, memory, required_value,
-			    new_value, t1, t2, sync_op, acquire_barrier,
-			    release_barrier);
-}
-
-/* Emit a synchronization instruction loop.  */
-const char *
-aarch64_output_sync_insn (rtx insn, rtx *operands)
-{
-  aarch64_process_output_sync_insn (aarch64_emit, insn, operands);
-  return "";
-}
-
-/* Emit a store release instruction appropriate for the specified
-   mode.  */
-const char *
-aarch64_output_sync_lock_release (rtx value, rtx memory)
-{
-  const char *suffix;
-  enum machine_mode mode;
-  rtx operands[2];
-  operands[0] = value;
-  operands[1] = memory;
-  mode = GET_MODE (memory);
-  suffix = aarch64_load_store_suffix (mode);
-  aarch64_output_asm_insn (aarch64_emit, 0, operands,
-			   "stlr%s\t%%%s0, %%1",
-			   suffix,
-			   mode == DImode ? "x" : "w");
-  return "";
-}
-
-/* Count the number of machine instruction that will be emitted for a
-   synchronization instruction.  Note that the emitter used does not
-   emit instructions, it just counts instructions being careful not
-   to count labels.  */
-unsigned int
-aarch64_sync_loop_insns (rtx insn, rtx *operands)
-{
-  aarch64_insn_count = 0;
-  aarch64_process_output_sync_insn (aarch64_count, insn, operands);
-  return aarch64_insn_count;
-}
-
-/* Helper to call a target sync instruction generator, dealing with
-   the variation in operands required by the different generators.  */
-static rtx
-aarch64_call_generator (struct aarch64_sync_generator *generator, rtx old_value,
-			rtx memory, rtx required_value, rtx new_value)
-{
-  switch (generator->op)
-    {
-    case aarch64_sync_generator_omn:
-      gcc_assert (! required_value);
-      return generator->u.omn (old_value, memory, new_value);
-
-    case aarch64_sync_generator_omrn:
-      gcc_assert (required_value);
-      return generator->u.omrn (old_value, memory, required_value, new_value);
-    }
-
-  return NULL;
-}
-
-/* Expand a synchronization loop.  The synchronization loop is
-   expanded as an opaque block of instructions in order to ensure that
-   we do not subsequently get extraneous memory accesses inserted
-   within the critical region.  The exclusive access property of
-   LDXR/STXR instructions is only guaranteed if there are no intervening
-   memory accesses.  */
-void
-aarch64_expand_sync (enum machine_mode mode,
-		     struct aarch64_sync_generator *generator,
-		     rtx target, rtx memory, rtx required_value, rtx new_value)
-{
-  if (target == NULL)
-    target = gen_reg_rtx (mode);
-
-  memory = aarch64_legitimize_sync_memory (memory);
-  if (mode != SImode && mode != DImode)
-    {
-      rtx load_temp = gen_reg_rtx (SImode);
-
-      if (required_value)
-	required_value = convert_modes (SImode, mode, required_value, true);
-
-      new_value = convert_modes (SImode, mode, new_value, true);
-      emit_insn (aarch64_call_generator (generator, load_temp, memory,
-					 required_value, new_value));
-      emit_move_insn (target, gen_lowpart (mode, load_temp));
-    }
-  else
-    {
-      emit_insn (aarch64_call_generator (generator, target, memory,
-					 required_value, new_value));
-    }
-}
-
 /* Return the equivalent letter for size.  */
 static unsigned char
 sizetochar (int size)
@@ -6795,6 +6419,243 @@  aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
 }
 
+/* Emit load exclusive.  */
+
+static void
+aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
+			     rtx mem, rtx model_rtx)
+{
+  rtx (*gen) (rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case QImode: gen = gen_aarch64_load_exclusiveqi; break;
+    case HImode: gen = gen_aarch64_load_exclusivehi; break;
+    case SImode: gen = gen_aarch64_load_exclusivesi; break;
+    case DImode: gen = gen_aarch64_load_exclusivedi; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (rval, mem, model_rtx));
+}
+
+/* Emit store exclusive.  */
+
+static void
+aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
+			      rtx rval, rtx mem, rtx model_rtx)
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case QImode: gen = gen_aarch64_store_exclusiveqi; break;
+    case HImode: gen = gen_aarch64_store_exclusivehi; break;
+    case SImode: gen = gen_aarch64_store_exclusivesi; break;
+    case DImode: gen = gen_aarch64_store_exclusivedi; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (bval, rval, mem, model_rtx));
+}
+
+/* Mark the previous jump instruction as unlikely.  */
+
+static void
+aarch64_emit_unlikely_jump (rtx insn)
+{
+  rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
+
+  insn = emit_jump_insn (insn);
+  add_reg_note (insn, REG_BR_PROB, very_unlikely);
+}
+
+/* Expand a compare and swap pattern.  */
+
+void
+aarch64_expand_compare_and_swap (rtx operands[])
+{
+  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
+  enum machine_mode mode, cmp_mode;
+  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+
+  bval = operands[0];
+  rval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = operands[5];
+  mod_s = operands[6];
+  mod_f = operands[7];
+  mode = GET_MODE (mem);
+  cmp_mode = mode;
+
+  /* Normally the succ memory model must be stronger than fail, but in the
+     unlikely event of fail being ACQUIRE and succ being RELEASE we need to
+     promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
+
+  if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
+      && INTVAL (mod_s) == MEMMODEL_RELEASE)
+    mod_s = GEN_INT (MEMMODEL_ACQ_REL);
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+      /* For short modes, we're going to perform the comparison in SImode,
+	 so do the zero-extension now.  */
+      cmp_mode = SImode;
+      rval = gen_reg_rtx (SImode);
+      oldval = convert_modes (SImode, mode, oldval, true);
+      /* Fall through.  */
+
+    case SImode:
+    case DImode:
+      /* Force the value into a register if needed.  */
+      if (!aarch64_plus_operand (oldval, mode))
+	oldval = force_reg (cmp_mode, oldval);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (mode)
+    {
+    case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
+    case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
+    case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
+    case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
+
+  if (mode == QImode || mode == HImode)
+    emit_move_insn (operands[1], gen_lowpart (mode, rval));
+
+  x = gen_rtx_REG (CCmode, CC_REGNUM);
+  x = gen_rtx_EQ (SImode, x, const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, bval, x));
+}
+
+/* Split a compare and swap pattern.  */
+
+void
+aarch64_split_compare_and_swap (rtx operands[])
+{
+  rtx rval, mem, oldval, newval, scratch;
+  enum machine_mode mode;
+  enum memmodel mod_s;
+  bool is_weak;
+  rtx label1, label2, x, cond;
+
+  rval = operands[0];
+  mem = operands[1];
+  oldval = operands[2];
+  newval = operands[3];
+  is_weak = (operands[4] != const0_rtx);
+  mod_s = (enum memmodel) INTVAL (operands[5]);
+  scratch = operands[7];
+  mode = GET_MODE (mem);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_label_rtx ();
+      emit_label (label1);
+    }
+  label2 = gen_label_rtx ();
+
+  aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
+
+  cond = aarch64_gen_compare_reg (NE, rval, oldval);
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
+
+  aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
+
+  if (!is_weak)
+    {
+      x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
+      aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
+    }
+  else
+    {
+      cond = gen_rtx_REG (CCmode, CC_REGNUM);
+      x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+    }
+
+  emit_label (label2);
+}
+
+/* Split an atomic operation.  */
+
+void
+aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
+		     rtx value, rtx model_rtx, rtx cond)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  enum machine_mode wmode = (mode == DImode ? DImode : SImode);
+  rtx label, x;
+
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  if (new_out)
+    new_out = gen_lowpart (wmode, new_out);
+  if (old_out)
+    old_out = gen_lowpart (wmode, old_out);
+  else
+    old_out = new_out;
+  value = simplify_gen_subreg (wmode, value, mode, 0);
+
+  aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
+
+  switch (code)
+    {
+    case SET:
+      new_out = value;
+      break;
+
+    case NOT:
+      x = gen_rtx_AND (wmode, old_out, value);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      x = gen_rtx_NOT (wmode, new_out);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      break;
+
+    case MINUS:
+      if (CONST_INT_P (value))
+	{
+	  value = GEN_INT (-INTVAL (value));
+	  code = PLUS;
+	}
+      /* Fall through.  */
+
+    default:
+      x = gen_rtx_fmt_ee (code, wmode, old_out, value);
+      emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
+      break;
+    }
+
+  aarch64_emit_store_exclusive (mode, cond, mem,
+				gen_lowpart (mode, new_out), model_rtx);
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
+}
+
 static void
 aarch64_start_file (void)
 {
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 804d7e7..739d94c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -104,60 +104,6 @@ 
 (include "iterators.md")
 
 ;; -------------------------------------------------------------------
-;; Synchronization Builtins
-;; -------------------------------------------------------------------
-
-;; The following sync_* attributes are applied to sychronization
-;; instruction patterns to control the way in which the
-;; synchronization loop is expanded.
-;; All instruction patterns that call aarch64_output_sync_insn ()
-;; should define these attributes.  Refer to the comment above
-;; aarch64.c:aarch64_output_sync_loop () for more detail on the use of
-;; these attributes.
-
-;; Attribute specifies the operand number which contains the
-;; result of a synchronization operation.  The result is the old value
-;; loaded from SYNC_MEMORY.
-(define_attr "sync_result"          "none,0,1,2,3,4,5" (const_string "none"))
-
-;; Attribute specifies the operand number which contains the memory
-;; address to which the synchronization operation is being applied.
-(define_attr "sync_memory"          "none,0,1,2,3,4,5" (const_string "none"))
-
-;; Attribute specifies the operand number which contains the required
-;; old value expected in the memory location.  This attribute may be
-;; none if no required value test should be performed in the expanded
-;; code.
-(define_attr "sync_required_value"  "none,0,1,2,3,4,5" (const_string "none"))
-
-;; Attribute specifies the operand number of the new value to be stored
-;; into the memory location identitifed by the sync_memory attribute.
-(define_attr "sync_new_value"       "none,0,1,2,3,4,5" (const_string "none"))
-
-;; Attribute specifies the operand number of a temporary register
-;; which can be clobbered by the synchronization instruction sequence.
-;; The register provided byn SYNC_T1 may be the same as SYNC_RESULT is
-;; which case the result value will be clobbered and not available
-;; after the synchronization loop exits.
-(define_attr "sync_t1"              "none,0,1,2,3,4,5" (const_string "none"))
-
-;; Attribute specifies the operand number of a temporary register
-;; which can be clobbered by the synchronization instruction sequence.
-;; This register is used to collect the result of a store exclusive
-;; instruction.
-(define_attr "sync_t2"              "none,0,1,2,3,4,5" (const_string "none"))
-
-;; Attribute that specifies whether or not the emitted synchronization
-;; loop must contain a release barrier.
-(define_attr "sync_release_barrier" "yes,no"           (const_string "yes"))
-
-;; Attribute that specifies the operation that the synchronization
-;; loop should apply to the old and new values to generate the value
-;; written back to memory.
-(define_attr "sync_op"              "none,add,sub,ior,xor,and,nand"
-                                    (const_string "none"))
-
-;; -------------------------------------------------------------------
 ;; Instruction types and attributes
 ;; -------------------------------------------------------------------
 
@@ -370,9 +316,7 @@ 
 (define_attr "simd" "no,yes" (const_string "no"))
 
 (define_attr "length" ""
-  (cond [(not (eq_attr "sync_memory" "none"))
-	   (symbol_ref "aarch64_sync_loop_insns (insn, operands) * 4")
-	] (const_int 4)))
+  (const_int 4))
 
 ;; Attribute that controls whether an alternative is enabled or not.
 ;; Currently it is only used to disable alternatives which touch fp or simd
@@ -2937,5 +2881,5 @@ 
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
-;; Synchronization Builtins
-(include "sync.md")
+;; Atomic Operations
+(include "atomics.md")
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
new file mode 100644
index 0000000..a9a1844
--- /dev/null
+++ b/gcc/config/aarch64/atomics.md
@@ -0,0 +1,382 @@ 
+;; Machine description for AArch64 processor synchronization primitives.
+;; Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspecv"
+ [
+    UNSPECV_LX				; Represent a load-exclusive.
+    UNSPECV_SX				; Represent a store-exclusive.
+    UNSPECV_LDA				; Represent an atomic load or load-acquire.
+    UNSPECV_STL				; Represent an atomic store or store-release.
+    UNSPECV_ATOMIC_CMPSW		; Represent an atomic compare swap.
+    UNSPECV_ATOMIC_EXCHG		; Represent an atomic exchange.
+    UNSPECV_ATOMIC_OP			; Represent an atomic operation.
+])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand" "")			;; bool out
+   (match_operand:ALLI 1 "register_operand" "")			;; val out
+   (match_operand:ALLI 2 "aarch64_sync_memory_operand" "")	;; memory
+   (match_operand:ALLI 3 "general_operand" "")			;; expected
+   (match_operand:ALLI 4 "register_operand" "")			;; desired
+   (match_operand:SI 5 "const_int_operand")			;; is_weak
+   (match_operand:SI 6 "const_int_operand")			;; mod_s
+   (match_operand:SI 7 "const_int_operand")]			;; mod_f
+  ""
+  {
+    aarch64_expand_compare_and_swap (operands);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (reg:CC CC_REGNUM)					;; bool out
+    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+   (set (match_operand:SI 0 "register_operand" "=&r")		;; val out
+    (zero_extend:SI
+      (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:SHORT
+      [(match_operand:SI 2 "aarch64_plus_operand" "rI")	;; expected
+       (match_operand:SHORT 3 "register_operand" "r")	;; desired
+       (match_operand:SI 4 "const_int_operand")		;; is_weak
+       (match_operand:SI 5 "const_int_operand")		;; mod_s
+       (match_operand:SI 6 "const_int_operand")]		;; mod_f
+      UNSPECV_ATOMIC_CMPSW))
+   (clobber (match_scratch:SI 7 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_compare_and_swap (operands);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (reg:CC CC_REGNUM)					;; bool out
+    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+   (set (match_operand:GPI 0 "register_operand" "=&r")		;; val out
+    (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:GPI
+      [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
+       (match_operand:GPI 3 "register_operand" "r")		;; desired
+       (match_operand:SI 4 "const_int_operand")		;; is_weak
+       (match_operand:SI 5 "const_int_operand")		;; mod_s
+       (match_operand:SI 6 "const_int_operand")]		;; mod_f
+      UNSPECV_ATOMIC_CMPSW))
+   (clobber (match_scratch:SI 7 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_compare_and_swap (operands);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_exchange<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")		;; output
+    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(match_operand:ALLI 2 "register_operand" "r")	;; input
+       (match_operand:SI 3 "const_int_operand" "")]		;; model
+      UNSPECV_ATOMIC_EXCHG))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (SET, operands[0], NULL, operands[1],
+			    operands[2], operands[3], operands[4]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_<atomic_optab><mode>"
+  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
+    (unspec_volatile:ALLI
+      [(atomic_op:ALLI (match_dup 0)
+	(match_operand:ALLI 1 "<atomic_op_operand>" "rn"))
+       (match_operand:SI 2 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+       (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:ALLI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
+			    operands[1], operands[2], operands[4]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_nand<mode>"
+  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
+    (unspec_volatile:ALLI
+      [(not:ALLI
+	(and:ALLI (match_dup 0)
+	  (match_operand:ALLI 1 "aarch64_logical_operand" "rn")))
+       (match_operand:SI 2 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:ALLI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+     aarch64_split_atomic_op (NOT, NULL, operands[3], operands[0],
+			     operands[1], operands[2], operands[4]);
+     DONE;
+  }
+)
+
+(define_insn_and_split "atomic_fetch_<atomic_optab><mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(atomic_op:ALLI (match_dup 1)
+	(match_operand:ALLI 2 "<atomic_op_operand>" "rn"))
+       (match_operand:SI 3 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:ALLI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (<CODE>, operands[0], operands[4], operands[1],
+			     operands[2], operands[3], operands[5]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_fetch_nand<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(not:ALLI
+	 (and:ALLI (match_dup 1)
+	   (match_operand:ALLI 2 "aarch64_logical_operand" "rn")))
+       (match_operand:SI 3 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:ALLI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (NOT, operands[0], operands[4], operands[1],
+			    operands[2], operands[3], operands[5]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_<atomic_optab>_fetch<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (atomic_op:ALLI
+      (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
+      (match_operand:ALLI 2 "<atomic_op_operand>" "rn")))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(match_dup 1) (match_dup 2)
+       (match_operand:SI 3 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+    (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (<CODE>, NULL, operands[0], operands[1],
+			     operands[2], operands[3], operands[4]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "atomic_nand_fetch<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=&r")
+    (not:ALLI
+      (and:ALLI
+	(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
+	(match_operand:ALLI 2 "aarch64_logical_operand" "rn"))))
+   (set (match_dup 1)
+    (unspec_volatile:ALLI
+      [(match_dup 1) (match_dup 2)
+       (match_operand:SI 3 "const_int_operand")]		;; model
+      UNSPECV_ATOMIC_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_atomic_op (NOT, NULL, operands[0], operands[1],
+			    operands[2], operands[3], operands[4]);
+    DONE;
+  }
+)
+
+(define_insn "atomic_load<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=r")
+    (unspec_volatile:ALLI
+      [(match_operand:ALLI 1 "aarch64_sync_memory_operand" "Q")
+       (match_operand:SI 2 "const_int_operand")]			;; model
+      UNSPECV_LDA))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_RELEASE)
+      return "ldr<atomic_sfx>\t%<w>0, %1";
+    else
+      return "ldar<atomic_sfx>\t%<w>0, %1";
+  }
+)
+
+(define_insn "atomic_store<mode>"
+  [(set (match_operand:ALLI 0 "memory_operand" "=Q")
+    (unspec_volatile:ALLI
+      [(match_operand:ALLI 1 "general_operand" "rZ")
+       (match_operand:SI 2 "const_int_operand")]			;; model
+      UNSPECV_STL))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_ACQUIRE)
+      return "str<atomic_sfx>\t%<w>1, %0";
+    else
+      return "stlr<atomic_sfx>\t%<w>1, %0";
+  }
+)
+
+(define_insn "aarch64_load_exclusive<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+    (zero_extend:SI
+      (unspec_volatile:SHORT
+	[(match_operand:SHORT 1 "aarch64_sync_memory_operand" "Q")
+	 (match_operand:SI 2 "const_int_operand")]
+	UNSPECV_LX)))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_RELEASE)
+      return "ldxr<atomic_sfx>\t%w0, %1";
+    else
+      return "ldaxr<atomic_sfx>\t%w0, %1";
+  }
+)
+
+(define_insn "aarch64_load_exclusive<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+    (unspec_volatile:GPI
+      [(match_operand:GPI 1 "aarch64_sync_memory_operand" "Q")
+       (match_operand:SI 2 "const_int_operand")]
+      UNSPECV_LX))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_RELEASE)
+      return "ldxr\t%<w>0, %1";
+    else
+      return "ldaxr\t%<w>0, %1";
+  }
+)
+
+(define_insn "aarch64_store_exclusive<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+    (unspec_volatile:SI [(const_int 0)] UNSPECV_SX))
+   (set (match_operand:ALLI 1 "aarch64_sync_memory_operand" "=Q")
+    (unspec_volatile:ALLI
+      [(match_operand:ALLI 2 "register_operand" "r")
+       (match_operand:SI 3 "const_int_operand")]
+      UNSPECV_SX))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[3]);
+    if (model == MEMMODEL_RELAXED
+	|| model == MEMMODEL_CONSUME
+	|| model == MEMMODEL_ACQUIRE)
+      return "stxr<atomic_sfx>\t%w0, %<w>2, %1";
+    else
+      return "stlxr<atomic_sfx>\t%w0, %<w>2, %1";
+  }
+)
+
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand" "")]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[0]);
+    if (model != MEMMODEL_RELAXED && model != MEMMODEL_CONSUME)
+      emit_insn (gen_dmb (operands[0]));
+    DONE;
+  }
+)
+
+(define_expand "dmb"
+  [(set (match_dup 1)
+    (unspec:BLK [(match_dup 1) (match_operand:SI 0 "const_int_operand")]
+     UNSPEC_MB))]
+   ""
+   {
+    operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+    MEM_VOLATILE_P (operands[1]) = 1;
+  }
+)
+
+(define_insn "*dmb"
+  [(set (match_operand:BLK 0 "" "")
+    (unspec:BLK [(match_dup 0) (match_operand:SI 1 "const_int_operand")]
+     UNSPEC_MB))]
+  ""
+  {
+    enum memmodel model = (enum memmodel) INTVAL (operands[1]);
+    if (model == MEMMODEL_ACQUIRE)
+      return "dmb\\tishld";
+    else
+      return "dmb\\tish";
+  }
+)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 8d5d4b0..7a1cdc8 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -449,6 +449,10 @@ 
 
 (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")])
 
+;; Mode for atomic operation suffixes
+(define_mode_attr atomic_sfx
+  [(QI "b") (HI "h") (SI "") (DI "")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------
@@ -480,7 +484,7 @@ 
 ;; Iterator for __sync_<op> operations that where the operation can be
 ;; represented directly RTL.  This is all of the sync operations bar
 ;; nand.
-(define_code_iterator syncop [plus minus ior xor and])
+(define_code_iterator atomic_op [plus minus ior xor and])
 
 ;; Iterator for integer conversions
 (define_code_iterator FIXUORS [fix unsigned_fix])
@@ -575,6 +579,16 @@ 
 ;; MLA/MLS attributes.
 (define_code_attr as [(ss_plus "a") (ss_minus "s")])
 
+;; Atomic operations
+(define_code_attr atomic_optab
+  [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")])
+
+(define_code_attr atomic_op_operand
+  [(ior "aarch64_logical_operand")
+   (xor "aarch64_logical_operand")
+   (and "aarch64_logical_operand")
+   (plus "aarch64_plus_operand")
+   (minus "aarch64_plus_operand")])
 
 ;; -------------------------------------------------------------------
 ;; Int Iterators.
diff --git a/gcc/config/aarch64/sync.md b/gcc/config/aarch64/sync.md
deleted file mode 100644
index 61f1f1b..0000000
--- a/gcc/config/aarch64/sync.md
+++ /dev/null
@@ -1,467 +0,0 @@ 
-;; Machine description for AArch64 processor synchronization primitives.
-;; Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
-;; Contributed by ARM Ltd.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful, but
-;; WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;; General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-(define_c_enum "unspecv"
- [
-    UNSPECV_SYNC_COMPARE_AND_SWAP       ; Represent a sync_compare_and_swap.
-    UNSPECV_SYNC_LOCK			; Represent a sync_lock_test_and_set.
-    UNSPECV_SYNC_LOCK_RELEASE		; Represent a sync_lock_release.
-    UNSPECV_SYNC_OP			; Represent a sync_<op>
-    UNSPECV_SYNC_NEW_OP			; Represent a sync_new_<op>
-    UNSPECV_SYNC_OLD_OP			; Represent a sync_old_<op>
-])
-
-(define_expand "sync_compare_and_swap<mode>"
-  [(set (match_operand:ALLI 0 "register_operand")
-        (unspec_volatile:ALLI [(match_operand:ALLI 1 "memory_operand")
-			       (match_operand:ALLI 2 "register_operand")
-			       (match_operand:ALLI 3 "register_operand")]
-			       UNSPECV_SYNC_COMPARE_AND_SWAP))]
-  ""
-  {
-    struct aarch64_sync_generator generator;
-    generator.op = aarch64_sync_generator_omrn;
-    generator.u.omrn = gen_aarch64_sync_compare_and_swap<mode>;
-    aarch64_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-    			 operands[2], operands[3]);
-    DONE;
-  })
-
-(define_expand "sync_lock_test_and_set<mode>"
-  [(match_operand:ALLI 0 "register_operand")
-   (match_operand:ALLI 1 "memory_operand")
-   (match_operand:ALLI 2 "register_operand")]
-  ""
-  {
-    struct aarch64_sync_generator generator;
-    generator.op = aarch64_sync_generator_omn;
-    generator.u.omn = gen_aarch64_sync_lock_test_and_set<mode>;
-    aarch64_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-                         NULL, operands[2]);
-    DONE;
-  })
-
-(define_expand "sync_<optab><mode>"
-  [(match_operand:ALLI 0 "memory_operand")
-   (match_operand:ALLI 1 "register_operand")
-   (syncop:ALLI (match_dup 0) (match_dup 1))]
-  ""
-  {
-    struct aarch64_sync_generator generator;
-    generator.op = aarch64_sync_generator_omn;
-    generator.u.omn = gen_aarch64_sync_new_<optab><mode>;
-    aarch64_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL,
-                         operands[1]);
-    DONE;
-  })
-
-(define_expand "sync_nand<mode>"
-  [(match_operand:ALLI 0 "memory_operand")
-   (match_operand:ALLI 1 "register_operand")
-   (not:ALLI (and:ALLI (match_dup 0) (match_dup 1)))]
-  ""
-  {
-    struct aarch64_sync_generator generator;
-    generator.op = aarch64_sync_generator_omn;
-    generator.u.omn = gen_aarch64_sync_new_nand<mode>;
-    aarch64_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL,
-                         operands[1]);
-    DONE;
-  })
-
-(define_expand "sync_new_<optab><mode>"
-  [(match_operand:ALLI 0 "register_operand")
-   (match_operand:ALLI 1 "memory_operand")
-   (match_operand:ALLI 2 "register_operand")
-   (syncop:ALLI (match_dup 1) (match_dup 2))]
-  ""
-  {
-    struct aarch64_sync_generator generator;
-    generator.op = aarch64_sync_generator_omn;
-    generator.u.omn = gen_aarch64_sync_new_<optab><mode>;
-    aarch64_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-    		    	 NULL, operands[2]);
-    DONE;
-  })
-
-(define_expand "sync_new_nand<mode>"
-  [(match_operand:ALLI 0 "register_operand")
-   (match_operand:ALLI 1 "memory_operand")
-   (match_operand:ALLI 2 "register_operand")
-   (not:ALLI (and:ALLI (match_dup 1) (match_dup 2)))]
-  ""
-  {
-    struct aarch64_sync_generator generator;
-    generator.op = aarch64_sync_generator_omn;
-    generator.u.omn = gen_aarch64_sync_new_nand<mode>;
-    aarch64_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-    			 NULL, operands[2]);
-    DONE;
-  });
-
-(define_expand "sync_old_<optab><mode>"
-  [(match_operand:ALLI 0 "register_operand")
-   (match_operand:ALLI 1 "memory_operand")
-   (match_operand:ALLI 2 "register_operand")
-   (syncop:ALLI (match_dup 1) (match_dup 2))]
-  ""
-  {
-    struct aarch64_sync_generator generator;
-    generator.op = aarch64_sync_generator_omn;
-    generator.u.omn = gen_aarch64_sync_old_<optab><mode>;
-    aarch64_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-    		         NULL, operands[2]);
-    DONE;
-  })
-
-(define_expand "sync_old_nand<mode>"
-  [(match_operand:ALLI 0 "register_operand")
-   (match_operand:ALLI 1 "memory_operand")
-   (match_operand:ALLI 2 "register_operand")
-   (not:ALLI (and:ALLI (match_dup 1) (match_dup 2)))]
-  ""
-  {
-    struct aarch64_sync_generator generator;
-    generator.op = aarch64_sync_generator_omn;
-    generator.u.omn = gen_aarch64_sync_old_nand<mode>;
-    aarch64_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-                         NULL, operands[2]);
-    DONE;
-  })
-
-(define_expand "memory_barrier"
-  [(set (match_dup 0) (unspec:BLK [(match_dup 0)] UNSPEC_MB))]
-  ""
-{
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
-})
-
-(define_insn "aarch64_sync_compare_and_swap<mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=&r")
-        (unspec_volatile:GPI
-	  [(match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")
-   	   (match_operand:GPI 2 "register_operand" "r")
-	   (match_operand:GPI 3 "register_operand" "r")]
-	  UNSPECV_SYNC_COMPARE_AND_SWAP))
-   (set (match_dup 1) (unspec_volatile:GPI [(match_dup 2)]
-                                          UNSPECV_SYNC_COMPARE_AND_SWAP))
-   (clobber:GPI (match_scratch:GPI 4 "=&r"))
-   (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
-                                                UNSPECV_SYNC_COMPARE_AND_SWAP))
-   ]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_required_value"  "2")
-   (set_attr "sync_new_value"       "3")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "4")
-   ])
-
-(define_insn "aarch64_sync_compare_and_swap<mode>"
-  [(set (match_operand:SI 0 "register_operand" "=&r")
-        (zero_extend:SI
-	  (unspec_volatile:SHORT
-	    [(match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q")
-   	     (match_operand:SI 2 "register_operand" "r")
-	     (match_operand:SI 3 "register_operand" "r")]
-	    UNSPECV_SYNC_COMPARE_AND_SWAP)))
-   (set (match_dup 1) (unspec_volatile:SHORT [(match_dup 2)]
-                                             UNSPECV_SYNC_COMPARE_AND_SWAP))
-   (clobber:SI (match_scratch:SI 4 "=&r"))
-   (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
-                                                UNSPECV_SYNC_COMPARE_AND_SWAP))
-   ]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_required_value"  "2")
-   (set_attr "sync_new_value"       "3")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "4")
-   ])
-
-(define_insn "aarch64_sync_lock_test_and_set<mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=&r")
-        (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))
-   (set (match_dup 1)
-        (unspec_volatile:GPI [(match_operand:GPI 2 "register_operand" "r")]
-	                     UNSPECV_SYNC_LOCK))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:GPI 3 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_release_barrier" "no")
-   (set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   ])
-
-(define_insn "aarch64_sync_lock_test_and_set<mode>"
-  [(set (match_operand:SI 0 "register_operand" "=&r")
-        (zero_extend:SI (match_operand:SHORT 1
-	                  "aarch64_sync_memory_operand" "+Q")))
-   (set (match_dup 1)
-        (unspec_volatile:SHORT [(match_operand:SI 2 "register_operand" "r")]
-                               UNSPECV_SYNC_LOCK))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_release_barrier" "no")
-   (set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   ])
-
-(define_insn "aarch64_sync_new_<optab><mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=&r")
-        (unspec_volatile:GPI
-	  [(syncop:GPI
-	     (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")
-             (match_operand:GPI 2 "register_operand" "r"))]
-           UNSPECV_SYNC_NEW_OP))
-   (set (match_dup 1)
-        (unspec_volatile:GPI [(match_dup 1) (match_dup 2)]
-	                    UNSPECV_SYNC_NEW_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:GPI 3 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "<optab>")
-   ])
-
-(define_insn "aarch64_sync_new_nand<mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=&r")
-        (unspec_volatile:GPI
-	  [(not:GPI (and:GPI
-                     (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")
-                     (match_operand:GPI 2 "register_operand" "r")))]
-          UNSPECV_SYNC_NEW_OP))
-   (set (match_dup 1)
-        (unspec_volatile:GPI [(match_dup 1) (match_dup 2)]
-	                    UNSPECV_SYNC_NEW_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:GPI 3 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "nand")
-   ])
-
-(define_insn "aarch64_sync_new_<optab><mode>"
-  [(set (match_operand:SI 0 "register_operand" "=&r")
-        (unspec_volatile:SI
-	  [(syncop:SI
-             (zero_extend:SI
-	       (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))
-               (match_operand:SI 2 "register_operand" "r"))]
-          UNSPECV_SYNC_NEW_OP))
-   (set (match_dup 1)
-        (unspec_volatile:SHORT [(match_dup 1) (match_dup 2)]
-	                       UNSPECV_SYNC_NEW_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "<optab>")
-   ])
-
-(define_insn "aarch64_sync_new_nand<mode>"
-  [(set (match_operand:SI 0 "register_operand" "=&r")
-        (unspec_volatile:SI
-	  [(not:SI
-	     (and:SI
-               (zero_extend:SI
-	         (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))
-               (match_operand:SI 2 "register_operand" "r")))
-	  ] UNSPECV_SYNC_NEW_OP))
-   (set (match_dup 1)
-        (unspec_volatile:SHORT [(match_dup 1) (match_dup 2)]
-	                       UNSPECV_SYNC_NEW_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "0")
-   (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "nand")
-   ])
-
-(define_insn "aarch64_sync_old_<optab><mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=&r")
-        (unspec_volatile:GPI
-          [(syncop:GPI
-             (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")
-             (match_operand:GPI 2 "register_operand" "r"))]
-          UNSPECV_SYNC_OLD_OP))
-   (set (match_dup 1)
-        (unspec_volatile:GPI [(match_dup 1) (match_dup 2)]
-	                     UNSPECV_SYNC_OLD_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:GPI 3 "=&r"))
-   (clobber (match_scratch:GPI 4 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "4")
-   (set_attr "sync_op"              "<optab>")
-   ])
-
-(define_insn "aarch64_sync_old_nand<mode>"
-  [(set (match_operand:GPI 0 "register_operand" "=&r")
-        (unspec_volatile:GPI
-	  [(not:GPI (and:GPI
-                     (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")
-                     (match_operand:GPI 2 "register_operand" "r")))]
-          UNSPECV_SYNC_OLD_OP))
-   (set (match_dup 1)
-        (unspec_volatile:GPI [(match_dup 1) (match_dup 2)]
-	                     UNSPECV_SYNC_OLD_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:GPI 3 "=&r"))
-   (clobber (match_scratch:GPI 4 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "4")
-   (set_attr "sync_op"              "nand")
-   ])
-
-(define_insn "aarch64_sync_old_<optab><mode>"
-  [(set (match_operand:SI 0 "register_operand" "=&r")
-        (unspec_volatile:SI
-	  [(syncop:SI
-             (zero_extend:SI
-	       (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))
-               (match_operand:SI 2 "register_operand" "r"))]
-           UNSPECV_SYNC_OLD_OP))
-   (set (match_dup 1)
-        (unspec_volatile:SHORT [(match_dup 1) (match_dup 2)]
-	                       UNSPECV_SYNC_OLD_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))
-   (clobber (match_scratch:SI 4 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "4")
-   (set_attr "sync_op"              "<optab>")
-   ])
-
-(define_insn "aarch64_sync_old_nand<mode>"
-  [(set (match_operand:SI 0 "register_operand" "=&r")
-        (unspec_volatile:SI
-	  [(not:SI
-	     (and:SI
-               (zero_extend:SI
-		 (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))
-                 (match_operand:SI 2 "register_operand" "r")))]
-          UNSPECV_SYNC_OLD_OP))
-   (set (match_dup 1)
-        (unspec_volatile:SHORT [(match_dup 1) (match_dup 2)]
-	                       UNSPECV_SYNC_OLD_OP))
-   (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))
-   (clobber (match_scratch:SI 4 "=&r"))]
-  ""
-  {
-    return aarch64_output_sync_insn (insn, operands);
-  }
-  [(set_attr "sync_result"          "0")
-   (set_attr "sync_memory"          "1")
-   (set_attr "sync_new_value"       "2")
-   (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "4")
-   (set_attr "sync_op"              "nand")
-   ])
-
-(define_insn "*memory_barrier"
-  [(set (match_operand:BLK 0 "" "")
-	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
-  ""
-  "dmb\\tish"
-)
-
-(define_insn "sync_lock_release<mode>"
-  [(set (match_operand:ALLI 0 "memory_operand" "+Q")
-  	(unspec_volatile:ALLI [(match_operand:ALLI 1 "register_operand" "r")]
-	                      UNSPECV_SYNC_LOCK_RELEASE))]
-
-  ""
-  {
-    return aarch64_output_sync_lock_release (operands[1], operands[0]);
-  })
-
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
new file mode 100644
index 0000000..1492e25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
@@ -0,0 +1,41 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define STRONG 0
+#define WEAK 1
+int v = 0;
+
+int
+atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b)
+{
+  return __atomic_compare_exchange (&v, &a, &b,
+				    STRONG, __ATOMIC_RELEASE,
+				    __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b)
+{
+  return __atomic_compare_exchange (&v, &a, &b,
+				    WEAK, __ATOMIC_RELEASE,
+				    __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b)
+{
+  return __atomic_compare_exchange_n (&v, &a, b,
+				      STRONG, __ATOMIC_RELEASE,
+				      __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b)
+{
+  return __atomic_compare_exchange_n (&v, &a, b,
+				      WEAK, __ATOMIC_RELEASE,
+				      __ATOMIC_ACQUIRE);
+}
+
+/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
+/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
new file mode 100644
index 0000000..be6682f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int v = 0;
+
+int
+atomic_fetch_add_ACQ_REL (int a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL);
+}
+
+int
+atomic_fetch_sub_ACQ_REL (int a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL);
+}
+
+int
+atomic_fetch_and_ACQ_REL (int a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL);
+}
+
+int
+atomic_fetch_nand_ACQ_REL (int a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL);
+}
+
+int
+atomic_fetch_xor_ACQ_REL (int a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL);
+}
+
+int
+atomic_fetch_or_ACQ_REL (int a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL);
+}
+
+/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
new file mode 100644
index 0000000..023797e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int v = 0;
+
+int
+atomic_fetch_add_ACQUIRE (int a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_fetch_sub_ACQUIRE (int a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_fetch_and_ACQUIRE (int a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_fetch_nand_ACQUIRE (int a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_fetch_xor_ACQUIRE (int a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_fetch_or_ACQUIRE (int a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE);
+}
+
+/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
new file mode 100644
index 0000000..8dcc4c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+char v = 0;
+
+char
+atomic_fetch_add_RELAXED (char a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
+}
+
+char
+atomic_fetch_sub_RELAXED (char a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
+}
+
+char
+atomic_fetch_and_RELAXED (char a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
+}
+
+char
+atomic_fetch_nand_RELAXED (char a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
+}
+
+char
+atomic_fetch_xor_RELAXED (char a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
+}
+
+char
+atomic_fetch_or_RELAXED (char a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
+}
+
+/* { dg-final { scan-assembler-times "ldxrb\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stxrb\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
new file mode 100644
index 0000000..e3afde2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int v = 0;
+
+int
+atomic_fetch_add_CONSUME (int a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME);
+}
+
+int
+atomic_fetch_sub_CONSUME (int a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME);
+}
+
+int
+atomic_fetch_and_CONSUME (int a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME);
+}
+
+int
+atomic_fetch_nand_CONSUME (int a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME);
+}
+
+int
+atomic_fetch_xor_CONSUME (int a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME);
+}
+
+int
+atomic_fetch_or_CONSUME (int a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME);
+}
+
+/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
new file mode 100644
index 0000000..6c6f7e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
@@ -0,0 +1,78 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int v = 0;
+
+int
+atomic_fetch_add_RELAXED ()
+{
+  return __atomic_fetch_add (&v, 4096, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_sub_ACQUIRE ()
+{
+  return __atomic_fetch_sub (&v, 4096, __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_fetch_and_SEQ_CST ()
+{
+  return __atomic_fetch_and (&v, 4096, __ATOMIC_SEQ_CST);
+}
+
+int
+atomic_fetch_nand_ACQ_REL ()
+{
+  return __atomic_fetch_nand (&v, 4096, __ATOMIC_ACQ_REL);
+}
+
+int
+atomic_fetch_xor_CONSUME ()
+{
+  return __atomic_fetch_xor (&v, 4096, __ATOMIC_CONSUME);
+}
+
+int
+atomic_fetch_or_RELAXED ()
+{
+  return __atomic_fetch_or (&v, 4096, __ATOMIC_RELAXED);
+}
+
+int
+atomic_add_fetch_ACQUIRE ()
+{
+  return __atomic_add_fetch (&v, 4096, __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_sub_fetch_RELAXED ()
+{
+  return __atomic_sub_fetch (&v, 4096, __ATOMIC_RELAXED);
+}
+
+int
+atomic_and_fetch_SEQ_CST ()
+{
+  return __atomic_and_fetch (&v, 4096, __ATOMIC_SEQ_CST);
+}
+
+int
+atomic_nand_fetch_ACQUIRE ()
+{
+  return __atomic_nand_fetch (&v, 4096, __ATOMIC_ACQUIRE);
+}
+
+int
+atomic_xor_fetch_RELEASE ()
+{
+  return __atomic_xor_fetch (&v, 4096, __ATOMIC_RELEASE);
+}
+
+int
+atomic_or_fetch_CONSUME ()
+{
+  return __atomic_or_fetch (&v, 4096, __ATOMIC_CONSUME);
+}
+
+/* { dg-final { scan-assembler-times "\tw\[0-9\]+, w\[0-9\]+, #*4096" 12 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
new file mode 100644
index 0000000..065ccf5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int v = 0;
+
+int
+atomic_fetch_add_RELAXED (int a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_sub_RELAXED (int a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_and_RELAXED (int a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_nand_RELAXED (int a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_xor_RELAXED (int a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_or_RELAXED (int a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
+}
+
+/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
new file mode 100644
index 0000000..9468ef4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long v = 0;
+
+long
+atomic_fetch_add_RELAXED (long a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
+}
+
+long
+atomic_fetch_sub_RELAXED (long a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
+}
+
+long
+atomic_fetch_and_RELAXED (long a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
+}
+
+long
+atomic_fetch_nand_RELAXED (long a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
+}
+
+long
+atomic_fetch_xor_RELAXED (long a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
+}
+
+long
+atomic_fetch_or_RELAXED (long a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
+}
+
+/* { dg-final { scan-assembler-times "ldxr\tx\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, x\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
new file mode 100644
index 0000000..065ccf5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int v = 0;
+
+int
+atomic_fetch_add_RELAXED (int a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_sub_RELAXED (int a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_and_RELAXED (int a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_nand_RELAXED (int a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_xor_RELAXED (int a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
+}
+
+int
+atomic_fetch_or_RELAXED (int a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
+}
+
+/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
new file mode 100644
index 0000000..3d8c49c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int v = 0;
+
+int
+atomic_fetch_add_RELEASE (int a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE);
+}
+
+int
+atomic_fetch_sub_RELEASE (int a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE);
+}
+
+int
+atomic_fetch_and_RELEASE (int a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE);
+}
+
+int
+atomic_fetch_nand_RELEASE (int a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE);
+}
+
+int
+atomic_fetch_xor_RELEASE (int a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE);
+}
+
+int
+atomic_fetch_or_RELEASE (int a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE);
+}
+
+/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
new file mode 100644
index 0000000..a8ad4f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int v = 0;
+
+int
+atomic_fetch_add_SEQ_CST (int a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST);
+}
+
+int
+atomic_fetch_sub_SEQ_CST (int a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST);
+}
+
+int
+atomic_fetch_and_SEQ_CST (int a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST);
+}
+
+int
+atomic_fetch_nand_SEQ_CST (int a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST);
+}
+
+int
+atomic_fetch_xor_SEQ_CST (int a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST);
+}
+
+int
+atomic_fetch_or_SEQ_CST (int a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST);
+}
+
+/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
new file mode 100644
index 0000000..30db340
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+short v = 0;
+
+short
+atomic_fetch_add_RELAXED (short a)
+{
+  return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED);
+}
+
+short
+atomic_fetch_sub_RELAXED (short a)
+{
+  return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED);
+}
+
+short
+atomic_fetch_and_RELAXED (short a)
+{
+  return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED);
+}
+
+short
+atomic_fetch_nand_RELAXED (short a)
+{
+  return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED);
+}
+
+short
+atomic_fetch_xor_RELAXED (short a)
+{
+  return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED);
+}
+
+short
+atomic_fetch_or_RELAXED (short a)
+{
+  return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED);
+}
+
+/* { dg-final { scan-assembler-times "ldxrh\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */
+/* { dg-final { scan-assembler-times "stxrh\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */