diff mbox

[ARC] Add support for atomic memory built-in.

Message ID 1447669119-17513-1-git-send-email-claziss@synopsys.com
State New
Headers show

Commit Message

Claudiu Zissulescu Nov. 16, 2015, 10:18 a.m. UTC
This patch adds support for atomic memory built-in for ARCHS and ARC700. Tested with dg.exp.

OK to apply?

Thanks,
Claudiu

ChangeLogs:
gcc/

2015-11-12  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc-protos.h (arc_expand_atomic_op): Prototype.
	(arc_split_compare_and_swap): Likewise.
	(arc_expand_compare_and_swap): Likewise.
	* config/arc/arc.c (arc_init): Check usage atomic option.
	(arc_pre_atomic_barrier): New function.
	(arc_post_atomic_barrier): Likewise.
	(emit_unlikely_jump): Likewise.
	(arc_expand_compare_and_swap_qh): Likewise.
	(arc_expand_compare_and_swap): Likewise.
	(arc_split_compare_and_swap): Likewise.
	(arc_expand_atomic_op): Likewise.
	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): New C macro.
	(ASM_SPEC): Enable mlock option when matomic is used.
	* config/arc/arc.md (UNSPEC_ARC_MEMBAR): Define.
	(VUNSPEC_ARC_CAS): Likewise.
	(VUNSPEC_ARC_LL): Likewise.
	(VUNSPEC_ARC_SC): Likewise.
	(VUNSPEC_ARC_EX): Likewise.
	* config/arc/arc.opt (matomic): New option.
	* config/arc/constraints.md (ATO): New constraint.
	* config/arc/predicates.md (mem_noofs_operand): New predicate.
	* doc/invoke.texi: Document -matomic.
	* config/arc/atomic.md: New file.

gcc/testsuite

2015-11-12  Claudiu Zissulescu  <claziss@synopsys.com>

	* lib/target-supports.exp (check_effective_target_arc_atomic): New
	function.
	(check_effective_target_sync_int_long): Add checks for ARC atomic
	feature.
	(check_effective_target_sync_char_short): Likewise.
---
 gcc/config/arc/arc-protos.h           |   4 +
 gcc/config/arc/arc.c                  | 391 ++++++++++++++++++++++++++++++++++
 gcc/config/arc/arc.h                  |   6 +-
 gcc/config/arc/arc.md                 |   9 +
 gcc/config/arc/arc.opt                |   3 +
 gcc/config/arc/atomic.md              | 235 ++++++++++++++++++++
 gcc/config/arc/constraints.md         |   6 +
 gcc/config/arc/predicates.md          |   4 +
 gcc/doc/invoke.texi                   |   8 +-
 gcc/testsuite/lib/target-supports.exp |  11 +
 10 files changed, 675 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/arc/atomic.md

Comments

Claudiu Zissulescu Ianculescu Nov. 30, 2015, 4:33 p.m. UTC | #1
Ping. This patch is stalling for two weeks.

Thanks,
Claudiu

On Mon, Nov 16, 2015 at 11:18 AM, Claudiu Zissulescu
<Claudiu.Zissulescu@synopsys.com> wrote:
> This patch adds support for atomic memory built-in for ARCHS and ARC700. Tested with dg.exp.
>
> OK to apply?
>
> Thanks,
> Claudiu
>
> ChangeLogs:
> gcc/
>
> 2015-11-12  Claudiu Zissulescu  <claziss@synopsys.com>
>
>         * config/arc/arc-protos.h (arc_expand_atomic_op): Prototype.
>         (arc_split_compare_and_swap): Likewise.
>         (arc_expand_compare_and_swap): Likewise.
>         * config/arc/arc.c (arc_init): Check usage atomic option.
>         (arc_pre_atomic_barrier): New function.
>         (arc_post_atomic_barrier): Likewise.
>         (emit_unlikely_jump): Likewise.
>         (arc_expand_compare_and_swap_qh): Likewise.
>         (arc_expand_compare_and_swap): Likewise.
>         (arc_split_compare_and_swap): Likewise.
>         (arc_expand_atomic_op): Likewise.
>         * config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): New C macro.
>         (ASM_SPEC): Enable mlock option when matomic is used.
>         * config/arc/arc.md (UNSPEC_ARC_MEMBAR): Define.
>         (VUNSPEC_ARC_CAS): Likewise.
>         (VUNSPEC_ARC_LL): Likewise.
>         (VUNSPEC_ARC_SC): Likewise.
>         (VUNSPEC_ARC_EX): Likewise.
>         * config/arc/arc.opt (matomic): New option.
>         * config/arc/constraints.md (ATO): New constraint.
>         * config/arc/predicates.md (mem_noofs_operand): New predicate.
>         * doc/invoke.texi: Document -matomic.
>         * config/arc/atomic.md: New file.
>
> gcc/testsuite
>
> 2015-11-12  Claudiu Zissulescu  <claziss@synopsys.com>
>
>         * lib/target-supports.exp (check_effective_target_arc_atomic): New
>         function.
>         (check_effective_target_sync_int_long): Add checks for ARC atomic
>         feature.
>         (check_effective_target_sync_char_short): Likewise.
> ---
>  gcc/config/arc/arc-protos.h           |   4 +
>  gcc/config/arc/arc.c                  | 391 ++++++++++++++++++++++++++++++++++
>  gcc/config/arc/arc.h                  |   6 +-
>  gcc/config/arc/arc.md                 |   9 +
>  gcc/config/arc/arc.opt                |   3 +
>  gcc/config/arc/atomic.md              | 235 ++++++++++++++++++++
>  gcc/config/arc/constraints.md         |   6 +
>  gcc/config/arc/predicates.md          |   4 +
>  gcc/doc/invoke.texi                   |   8 +-
>  gcc/testsuite/lib/target-supports.exp |  11 +
>  10 files changed, 675 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/config/arc/atomic.md
>
> diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
> index 6e04351..3581bb0 100644
> --- a/gcc/config/arc/arc-protos.h
> +++ b/gcc/config/arc/arc-protos.h
> @@ -41,6 +41,10 @@ extern int arc_output_commutative_cond_exec (rtx *operands, bool);
>  extern bool arc_expand_movmem (rtx *operands);
>  extern bool prepare_move_operands (rtx *operands, machine_mode mode);
>  extern void emit_shift (enum rtx_code, rtx, rtx, rtx);
> +extern void arc_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
> +extern void arc_split_compare_and_swap (rtx *);
> +extern void arc_expand_compare_and_swap (rtx *);
> +
>  #endif /* RTX_CODE */
>
>  #ifdef TREE_CODE
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index 8bb0969..d47bbe4 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -61,6 +61,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "context.h"
>  #include "builtins.h"
>  #include "rtl-iter.h"
> +#include "alias.h"
>
>  /* Which cpu we're compiling for (ARC600, ARC601, ARC700).  */
>  static const char *arc_cpu_string = "";
> @@ -884,6 +885,9 @@ arc_init (void)
>        flag_pic = 0;
>      }
>
> +  if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
> +    error ("-matomic is only supported for ARC700 or ARC HS cores");
> +
>    arc_init_reg_tables ();
>
>    /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
> @@ -9650,6 +9654,393 @@ arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
>    return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
>  }
>
> +/* Emit a (pre) memory barrier around an atomic sequence according to
> +   MODEL.  */
> +
> +static void
> +arc_pre_atomic_barrier (enum memmodel model)
> +{
> + switch (model & MEMMODEL_MASK)
> +    {
> +    case MEMMODEL_RELAXED:
> +    case MEMMODEL_CONSUME:
> +    case MEMMODEL_ACQUIRE:
> +    case MEMMODEL_SYNC_ACQUIRE:
> +      break;
> +    case MEMMODEL_RELEASE:
> +    case MEMMODEL_ACQ_REL:
> +    case MEMMODEL_SYNC_RELEASE:
> +      emit_insn (gen_membar (const0_rtx));
> +      break;
> +    case MEMMODEL_SEQ_CST:
> +    case MEMMODEL_SYNC_SEQ_CST:
> +      emit_insn (gen_sync (const1_rtx));
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +
> +/* Emit a (post) memory barrier around an atomic sequence according to
> +   MODEL.  */
> +
> +static void
> +arc_post_atomic_barrier (enum memmodel model)
> +{
> + switch (model & MEMMODEL_MASK)
> +    {
> +    case MEMMODEL_RELAXED:
> +    case MEMMODEL_CONSUME:
> +    case MEMMODEL_RELEASE:
> +    case MEMMODEL_SYNC_RELEASE:
> +      break;
> +    case MEMMODEL_ACQUIRE:
> +    case MEMMODEL_ACQ_REL:
> +    case MEMMODEL_SYNC_ACQUIRE:
> +      emit_insn (gen_membar (const0_rtx));
> +      break;
> +    case MEMMODEL_SEQ_CST:
> +    case MEMMODEL_SYNC_SEQ_CST:
> +      emit_insn (gen_sync (const1_rtx));
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +
> +/* Expand a compare and swap pattern.  */
> +
> +static void
> +emit_unlikely_jump (rtx insn)
> +{
> +  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
> +
> +  insn = emit_jump_insn (insn);
> +  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
> +}
> +
> +/* Expand code to perform a 8 or 16-bit compare and swap by doing
> +   32-bit compare and swap on the word containing the byte or
> +   half-word.  The difference between a weak and a strong CAS is that
> +   the weak version may simply fail.  The strong version relays on two
> +   loops, one checks if the SCOND op is succsfully or not, the other
> +   checks if the 32 bit accessed location which contains the 8 or 16
> +   bit datum is not changed by other thread.  The first loop is
> +   implemented by the atomic_compare_and_swapsi_1 pattern.  The second
> +   loops is implemented by this routine.  */
> +
> +static void
> +arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
> +                               rtx oldval, rtx newval, rtx weak,
> +                               rtx mod_s, rtx mod_f)
> +{
> +  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
> +  rtx addr = gen_reg_rtx (Pmode);
> +  rtx off = gen_reg_rtx (SImode);
> +  rtx oldv = gen_reg_rtx (SImode);
> +  rtx newv = gen_reg_rtx (SImode);
> +  rtx oldvalue = gen_reg_rtx (SImode);
> +  rtx newvalue = gen_reg_rtx (SImode);
> +  rtx res = gen_reg_rtx (SImode);
> +  rtx resv = gen_reg_rtx (SImode);
> +  rtx memsi, val, mask, end_label, loop_label, cc, x;
> +  machine_mode mode;
> +  bool is_weak = (weak != const0_rtx);
> +
> +  /* Truncate the address.  */
> +  emit_insn (gen_rtx_SET (addr,
> +                         gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
> +
> +  /* Compute the datum offset.  */
> +  emit_insn (gen_rtx_SET (off,
> +                         gen_rtx_AND (SImode, addr1, GEN_INT (3))));
> +  if (TARGET_BIG_ENDIAN)
> +    emit_insn (gen_rtx_SET (off,
> +                           gen_rtx_MINUS (SImode,
> +                                          (GET_MODE (mem) == QImode) ?
> +                                          GEN_INT (3) : GEN_INT (2), off)));
> +
> +  /* Normal read from truncated address.  */
> +  memsi = gen_rtx_MEM (SImode, addr);
> +  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
> +  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
> +
> +  val = copy_to_reg (memsi);
> +
> +  /* Convert the offset in bits.  */
> +  emit_insn (gen_rtx_SET (off,
> +                         gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
> +
> +  /* Get the proper mask.  */
> +  if (GET_MODE (mem) == QImode)
> +    mask = force_reg (SImode, GEN_INT (0xff));
> +  else
> +    mask = force_reg (SImode, GEN_INT (0xffff));
> +
> +  emit_insn (gen_rtx_SET (mask,
> +                         gen_rtx_ASHIFT (SImode, mask, off)));
> +
> +  /* Prepare the old and new values.  */
> +  emit_insn (gen_rtx_SET (val,
> +                         gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
> +                                      val)));
> +
> +  oldval = gen_lowpart (SImode, oldval);
> +  emit_insn (gen_rtx_SET (oldv,
> +                         gen_rtx_ASHIFT (SImode, oldval, off)));
> +
> +  newval = gen_lowpart_common (SImode, newval);
> +  emit_insn (gen_rtx_SET (newv,
> +                         gen_rtx_ASHIFT (SImode, newval, off)));
> +
> +  emit_insn (gen_rtx_SET (oldv,
> +                         gen_rtx_AND (SImode, oldv, mask)));
> +
> +  emit_insn (gen_rtx_SET (newv,
> +                         gen_rtx_AND (SImode, newv, mask)));
> +
> +  if (!is_weak)
> +    {
> +      end_label = gen_label_rtx ();
> +      loop_label = gen_label_rtx ();
> +      emit_label (loop_label);
> +    }
> +
> +  /* Make the old and new values.  */
> +  emit_insn (gen_rtx_SET (oldvalue,
> +                         gen_rtx_IOR (SImode, oldv, val)));
> +
> +  emit_insn (gen_rtx_SET (newvalue,
> +                         gen_rtx_IOR (SImode, newv, val)));
> +
> +  /* Try an 32bit atomic compare and swap.  It clobbers the CC
> +     register.  */
> +  emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
> +                                             weak, mod_s, mod_f));
> +
> +  /* Regardless of the weakness of the operation, a proper boolean
> +     result needs to be provided.  */
> +  x = gen_rtx_REG (CC_Zmode, CC_REG);
> +  x = gen_rtx_EQ (SImode, x, const0_rtx);
> +  emit_insn (gen_rtx_SET (bool_result, x));
> +
> +  if (!is_weak)
> +    {
> +      /* Check the results: if the atomic op is successfully the goto
> +        to end label.  */
> +      x = gen_rtx_REG (CC_Zmode, CC_REG);
> +      x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
> +      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +                               gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
> +      emit_jump_insn (gen_rtx_SET (pc_rtx, x));
> +
> +      /* Wait for the right moment when the accessed 32-bit location
> +        is stable.  */
> +      emit_insn (gen_rtx_SET (resv,
> +                             gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
> +                                          res)));
> +      mode = SELECT_CC_MODE (NE, resv, val);
> +      cc = gen_rtx_REG (mode, CC_REG);
> +      emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
> +
> +      /* Set the new value of the 32 bit location, proper masked.  */
> +      emit_insn (gen_rtx_SET (val, resv));
> +
> +      /* Try again if location is unstable.  Fall through if only
> +        scond op failed.  */
> +      x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
> +      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +                               gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
> +      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +
> +      emit_label (end_label);
> +    }
> +
> +  /* End: proper return the result for the given mode.  */
> +  emit_insn (gen_rtx_SET (res,
> +                         gen_rtx_AND (SImode, res, mask)));
> +
> +  emit_insn (gen_rtx_SET (res,
> +                         gen_rtx_LSHIFTRT (SImode, res, off)));
> +
> +  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
> +}
> +
> +/* Helper function used by "atomic_compare_and_swap" expand
> +   pattern.  */
> +
> +void
> +arc_expand_compare_and_swap (rtx operands[])
> +{
> +  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
> +  machine_mode mode;
> +
> +  bval = operands[0];
> +  rval = operands[1];
> +  mem = operands[2];
> +  oldval = operands[3];
> +  newval = operands[4];
> +  is_weak = operands[5];
> +  mod_s = operands[6];
> +  mod_f = operands[7];
> +  mode = GET_MODE (mem);
> +
> +  if (reg_overlap_mentioned_p (rval, oldval))
> +    oldval = copy_to_reg (oldval);
> +
> +  if (mode == SImode)
> +    {
> +      emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
> +                                                 is_weak, mod_s, mod_f));
> +      x = gen_rtx_REG (CC_Zmode, CC_REG);
> +      x = gen_rtx_EQ (SImode, x, const0_rtx);
> +      emit_insn (gen_rtx_SET (bval, x));
> +    }
> +  else
> +    {
> +      arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
> +                                     is_weak, mod_s, mod_f);
> +    }
> +}
> +
> +/* Helper function used by the "atomic_compare_and_swapsi_1"
> +   pattern.  */
> +
> +void
> +arc_split_compare_and_swap (rtx operands[])
> +{
> +  rtx rval, mem, oldval, newval;
> +  machine_mode mode;
> +  enum memmodel mod_s, mod_f;
> +  bool is_weak;
> +  rtx label1, label2, x, cond;
> +
> +  rval = operands[0];
> +  mem = operands[1];
> +  oldval = operands[2];
> +  newval = operands[3];
> +  is_weak = (operands[4] != const0_rtx);
> +  mod_s = (enum memmodel) INTVAL (operands[5]);
> +  mod_f = (enum memmodel) INTVAL (operands[6]);
> +  mode = GET_MODE (mem);
> +
> +  /* ARC atomic ops work only with 32-bit aligned memories.  */
> +  gcc_assert (mode == SImode);
> +
> +  arc_pre_atomic_barrier (mod_s);
> +
> +  label1 = NULL_RTX;
> +  if (!is_weak)
> +    {
> +      label1 = gen_label_rtx ();
> +      emit_label (label1);
> +    }
> +  label2 = gen_label_rtx ();
> +
> +  /* Load exclusive.  */
> +  emit_insn (gen_arc_load_exclusivesi (rval, mem));
> +
> +  /* Check if it is oldval.  */
> +  mode = SELECT_CC_MODE (NE, rval, oldval);
> +  cond = gen_rtx_REG (mode, CC_REG);
> +  emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval)));
> +
> +  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +                           gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
> +  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +
> +  /* Exclusively store new item.  Store clobbers CC reg.  */
> +  emit_insn (gen_arc_store_exclusivesi (mem, newval));
> +
> +  if (!is_weak)
> +    {
> +      /* Check the result of the store.  */
> +      cond = gen_rtx_REG (CC_Zmode, CC_REG);
> +      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
> +      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +                               gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
> +      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +    }
> +
> +  if (mod_f != MEMMODEL_RELAXED)
> +    emit_label (label2);
> +
> +  arc_post_atomic_barrier (mod_s);
> +
> +  if (mod_f == MEMMODEL_RELAXED)
> +    emit_label (label2);
> +}
> +
> +/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
> +   to perform.  MEM is the memory on which to operate.  VAL is the second
> +   operand of the binary operator.  BEFORE and AFTER are optional locations to
> +   return the value of MEM either before of after the operation.  MODEL_RTX
> +   is a CONST_INT containing the memory model to use.  */
> +
> +void
> +arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
> +                        rtx orig_before, rtx orig_after, rtx model_rtx)
> +{
> +  enum memmodel model = (enum memmodel) INTVAL (model_rtx);
> +  machine_mode mode = GET_MODE (mem);
> +  rtx label, x, cond;
> +  rtx before = orig_before, after = orig_after;
> +
> +  /* ARC atomic ops work only with 32-bit aligned memories.  */
> +  gcc_assert (mode == SImode);
> +
> +  arc_pre_atomic_barrier (model);
> +
> +  label = gen_label_rtx ();
> +  emit_label (label);
> +  label = gen_rtx_LABEL_REF (VOIDmode, label);
> +
> +  if (before == NULL_RTX)
> +    before = gen_reg_rtx (mode);
> +
> +  if (after == NULL_RTX)
> +    after = gen_reg_rtx (mode);
> +
> +  /* Load exclusive.  */
> +  emit_insn (gen_arc_load_exclusivesi (before, mem));
> +
> +  switch (code)
> +    {
> +    case NOT:
> +      x = gen_rtx_AND (mode, before, val);
> +      emit_insn (gen_rtx_SET (after, x));
> +      x = gen_rtx_NOT (mode, after);
> +      emit_insn (gen_rtx_SET (after, x));
> +      break;
> +
> +    case MINUS:
> +      if (CONST_INT_P (val))
> +       {
> +         val = GEN_INT (-INTVAL (val));
> +         code = PLUS;
> +       }
> +
> +      /* FALLTHRU.  */
> +    default:
> +      x = gen_rtx_fmt_ee (code, mode, before, val);
> +      emit_insn (gen_rtx_SET (after, x));
> +      break;
> +   }
> +
> +  /* Exclusively store new item.  Store clobbers CC reg.  */
> +  emit_insn (gen_arc_store_exclusivesi (mem, after));
> +
> +  /* Check the result of the store.  */
> +  cond = gen_rtx_REG (CC_Zmode, CC_REG);
> +  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +                           label, pc_rtx);
> +  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +
> +  arc_post_atomic_barrier (model);
> +}
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
>
>  #include "gt-arc.h"
> diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
> index d312f9f..c895725 100644
> --- a/gcc/config/arc/arc.h
> +++ b/gcc/config/arc/arc.h
> @@ -88,6 +88,10 @@ along with GCC; see the file COPYING3.  If not see
>        {                                        \
>         builtin_define ("__HS__");      \
>        }                                        \
> +    if (TARGET_ATOMIC)                 \
> +      {                                        \
> +       builtin_define ("__ARC_ATOMIC__");      \
> +      }                                        \
>      if (TARGET_NORM)                   \
>        {                                        \
>         builtin_define ("__ARC_NORM__");\
> @@ -153,7 +157,7 @@ along with GCC; see the file COPYING3.  If not see
>  %{mcpu=ARC700|!mcpu=*:%{mrtsc}} \
>  %{mcpu=ARCHS:-mHS} \
>  %{mcpu=ARCEM:-mEM} \
> -"
> +%{matomic:-mlock}"
>
>  #if DEFAULT_LIBC == LIBC_UCLIBC
>  /* Note that the default is to link against dynamic libraries, if they are
> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
> index 1d070a3..ac181a9 100644
> --- a/gcc/config/arc/arc.md
> +++ b/gcc/config/arc/arc.md
> @@ -128,6 +128,12 @@
>     (VUNSPEC_UNIMP_S 28) ; blockage insn for unimp_s generation
>     (VUNSPEC_NOP 29) ; volatile NOP
>
> +   (UNSPEC_ARC_MEMBAR 30)
> +   (VUNSPEC_ARC_CAS 31)
> +   (VUNSPEC_ARC_LL 32)
> +   (VUNSPEC_ARC_SC 33)
> +   (VUNSPEC_ARC_EX 34)
> +
>     (R0_REG 0)
>     (R1_REG 1)
>     (R2_REG 2)
> @@ -5531,3 +5537,6 @@
>  (include "fpx.md")
>
>  (include "simdext.md")
> +
> +;; include atomic extensions
> +(include "atomic.md")
> diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
> index 0c10c67..c4d7306 100644
> --- a/gcc/config/arc/arc.opt
> +++ b/gcc/config/arc/arc.opt
> @@ -414,3 +414,6 @@ Target Joined
>  mmac_
>  Target Joined
>
> +matomic
> +Target Report Mask(ATOMIC)
> +Enable atomic instructions.
> diff --git a/gcc/config/arc/atomic.md b/gcc/config/arc/atomic.md
> new file mode 100644
> index 0000000..13bcb76
> --- /dev/null
> +++ b/gcc/config/arc/atomic.md
> @@ -0,0 +1,235 @@
> +;; GCC machine description for ARC atomic instructions.
> +;; Copyright (C) 2015 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify
> +;; it under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful,
> +;; but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;; GNU General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +(define_mode_iterator QHSI [QI HI SI])
> +(define_code_iterator atomicop [plus minus ior xor and])
> +(define_code_attr atomic_optab
> +  [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")])
> +
> +(define_expand "mem_thread_fence"
> +  [(match_operand:SI 0 "const_int_operand")]
> +  ""
> +{
> +  enum memmodel model = (enum memmodel) INTVAL (operands[0]);
> +  switch (model)
> +    {
> +    case MEMMODEL_RELAXED:
> +      break;
> +    case MEMMODEL_CONSUME:
> +    case MEMMODEL_ACQUIRE:
> +    case MEMMODEL_RELEASE:
> +    case MEMMODEL_ACQ_REL:
> +    case MEMMODEL_SYNC_ACQUIRE:
> +    case MEMMODEL_SYNC_RELEASE:
> +      emit_insn (gen_membar (const0_rtx));
> +      break;
> +    case MEMMODEL_SEQ_CST:
> +    case MEMMODEL_SYNC_SEQ_CST:
> +      emit_insn (gen_sync (const1_rtx));
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +  DONE;
> +})
> +
> +(define_expand "membar"
> +  [(set (match_dup 1)
> +       (unspec:BLK [(match_dup 1) (match_operand:SI 0 "const_int_operand")]
> +                   UNSPEC_ARC_MEMBAR))]
> +  ""
> +{
> +  operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
> +  MEM_VOLATILE_P (operands[1]) = 1;
> +})
> +
> +;; A compiler-only memory barrier.  Generic code, when checking for the
> +;; existence of various named patterns, uses asm("":::"memory") when we
> +;; don't need an actual instruction.
> +(define_insn "*membar_empty"
> +  [(set (match_operand:BLK 0 "" "")
> +       (unspec:BLK [(match_dup 0) (const_int 0)]
> +                   UNSPEC_ARC_MEMBAR))]
> +  ""
> +  ""
> +  [(set_attr "type" "multi")
> +   (set_attr "length" "0")])
> +
> +(define_expand "atomic_compare_and_swap<mode>"
> +  [(match_operand:SI 0 "register_operand" "")  ;; bool out
> +   (match_operand:QHSI 1 "register_operand" "")        ;; val out
> +   (match_operand:QHSI 2 "mem_noofs_operand" "");; memory
> +   (match_operand:QHSI 3 "register_operand" "")        ;; expected
> +   (match_operand:QHSI 4 "register_operand" "")        ;; desired
> +   (match_operand:SI 5 "const_int_operand")    ;; is_weak
> +   (match_operand:SI 6 "const_int_operand")    ;; mod_s
> +   (match_operand:SI 7 "const_int_operand")]   ;; mod_f
> +  "TARGET_ATOMIC"
> +{
> +  arc_expand_compare_and_swap (operands);
> +  DONE;
> +})
> +
> +(define_insn_and_split "atomic_compare_and_swapsi_1"
> +  [(set (reg:CC_Z CC_REG)                                      ;; bool out
> +       (unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ARC_CAS))
> +   (set (match_operand:SI 0 "register_operand"      "=&r")     ;; val out
> +       (match_operand:SI 1 "mem_noofs_operand"      "+ATO"))   ;; memory
> +   (set (match_dup 1)
> +       (unspec_volatile:SI
> +         [(match_operand:SI 2 "register_operand"     "r") ;; expect
> +          (match_operand:SI 3 "register_operand"     "r") ;; desired
> +          (match_operand:SI 4 "const_int_operand")        ;; is_weak
> +          (match_operand:SI 5 "const_int_operand")        ;; mod_s
> +          (match_operand:SI 6 "const_int_operand")]       ;; mod_f
> +         VUNSPEC_ARC_CAS))]
> +  "TARGET_ATOMIC"
> +  "#"
> +  "&& reload_completed"
> +  [(const_int 0)]
> +  {
> +    arc_split_compare_and_swap (operands);
> +    DONE;
> +  })
> +
> +(define_insn "arc_load_exclusivesi"
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> +       (unspec_volatile:SI
> +         [(match_operand:SI 1 "mem_noofs_operand" "ATO")]
> +         VUNSPEC_ARC_LL))]
> +  "TARGET_ATOMIC"
> +  "llock %0,%1"
> +  [(set_attr "type" "load")
> +   (set_attr "iscompact" "false")
> +   (set_attr "predicable" "no")
> +   (set_attr "length" "*")])
> +
> +(define_insn "arc_store_exclusivesi"
> +  [(set (match_operand:SI 0 "mem_noofs_operand"     "=ATO")
> +       (unspec_volatile:SI[(match_operand:SI 1 "register_operand" "r")]
> +                          VUNSPEC_ARC_SC))
> +   (clobber (reg:CC_Z CC_REG))]
> +  "TARGET_ATOMIC"
> +  "scond %1,%0"
> +  [(set_attr "type" "store")
> +   (set_attr "iscompact" "false")
> +   (set_attr "predicable" "no")
> +   (set_attr "length" "*")])
> +
> +(define_expand "atomic_exchangesi"
> +  [(match_operand:SI 0 "register_operand" "")
> +   (match_operand:SI 1 "mem_noofs_operand" "")
> +   (match_operand:SI 2 "register_operand" "")
> +   (match_operand:SI 3 "const_int_operand" "")]
> +  "TARGET_ATOMIC"
> +{
> +  enum memmodel model = (enum memmodel) INTVAL (operands[3]);
> +
> +  if (model == MEMMODEL_SEQ_CST)
> +    emit_insn (gen_sync (const1_rtx));
> +  emit_insn (gen_exchangesi (operands[0], operands[1], operands[2]));
> +  DONE;
> +})
> +
> +(define_insn "exchangesi"
> +  [(set (match_operand:SI 0 "register_operand" "=r")
> +       (unspec_volatile:SI [(match_operand:SI 1 "mem_noofs_operand" "+ATO")]
> +                           VUNSPEC_ARC_EX))
> +   (set (match_dup 1)
> +       (match_operand:SI 2 "register_operand" "0"))]
> +  ""
> +  "ex %0,%1"
> +  [(set_attr "type" "load")
> +   (set_attr "iscompact" "false")
> +   (set_attr "predicable" "no")
> +   (set_attr "length" "*")])
> +
> +(define_expand "atomic_<atomic_optab>si"
> +  [(match_operand:SI 0 "mem_noofs_operand" "")  ;; memory
> +   (atomicop:SI (match_dup 0)
> +               (match_operand:SI 1 "register_operand" "")) ;; operand
> +   (match_operand:SI 2 "const_int_operand" "")] ;; model
> +  "TARGET_ATOMIC"
> +{
> +  arc_expand_atomic_op (<CODE>, operands[0], operands[1],
> +                               NULL_RTX, NULL_RTX, operands[2]);
> +  DONE;
> +})
> +
> +(define_expand "atomic_nandsi"
> +  [(match_operand:SI 0 "mem_noofs_operand" "") ;; memory
> +   (match_operand:SI 1 "register_operand" "")  ;; operand
> +   (match_operand:SI 2 "const_int_operand" "")]        ;; model
> +  "TARGET_ATOMIC"
> +{
> + arc_expand_atomic_op (NOT, operands[0], operands[1],
> +                           NULL_RTX, NULL_RTX, operands[2]);
> + DONE;
> +})
> +
> +(define_expand "atomic_fetch_<atomic_optab>si"
> +  [(match_operand:SI 0 "register_operand" "")  ;; output
> +   (match_operand:SI 1 "mem_noofs_operand" "") ;; memory
> +   (atomicop:SI (match_dup 1)
> +               (match_operand:SI 2 "register_operand" "")) ;; operand
> +   (match_operand:SI 3 "const_int_operand" "")]        ;; model
> +  "TARGET_ATOMIC"
> +{
> +  arc_expand_atomic_op (<CODE>, operands[1], operands[2],
> +                               operands[0], NULL_RTX, operands[3]);
> +  DONE;
> +})
> +
> +(define_expand "atomic_fetch_nandsi"
> +  [(match_operand:SI 0 "register_operand" "")  ;; output
> +   (match_operand:SI 1 "mem_noofs_operand" "") ;; memory
> +   (match_operand:SI 2 "register_operand" "")  ;; operand
> +   (match_operand:SI 3 "const_int_operand" "")]        ;; model
> +  "TARGET_ATOMIC"
> +{
> +  arc_expand_atomic_op (NOT, operands[1], operands[2],
> +                            operands[0], NULL_RTX, operands[3]);
> +  DONE;
> +})
> +
> +(define_expand "atomic_<atomic_optab>_fetchsi"
> +  [(match_operand:SI 0 "register_operand" "")  ;; output
> +   (match_operand:SI 1 "mem_noofs_operand" "") ;; memory
> +   (atomicop:SI (match_dup 1)
> +               (match_operand:SI 2 "register_operand" "")) ;; operand
> +   (match_operand:SI 3 "const_int_operand" "")]        ;; model
> +  "TARGET_ATOMIC"
> +{
> +  arc_expand_atomic_op (<CODE>, operands[1], operands[2],
> +                               NULL_RTX, operands[0], operands[3]);
> +  DONE;
> +})
> +
> +(define_expand "atomic_nand_fetchsi"
> +  [(match_operand:SI 0 "register_operand" "")  ;; output
> +   (match_operand:SI 1 "mem_noofs_operand" "") ;; memory
> +   (match_operand:SI 2 "register_operand" "")  ;; operand
> +   (match_operand:SI 3 "const_int_operand" "")]        ;; model
> +  "TARGET_ATOMIC"
> +{
> +  arc_expand_atomic_op (NOT, operands[1], operands[2],
> +                            NULL_RTX, operands[0], operands[3]);
> +  DONE;
> +})
> +
> diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md
> index 65ea44a..18309cc 100644
> --- a/gcc/config/arc/constraints.md
> +++ b/gcc/config/arc/constraints.md
> @@ -421,3 +421,9 @@
>     An unsigned 6-bit integer constant, up to 62."
>    (and (match_code "const_int")
>         (match_test "UNSIGNED_INT6 (ival - 1)")))
> +
> +;; Memory constraint used for atomic ops.
> +(define_memory_constraint "ATO"
> +  "A memory with only a base register"
> +  (match_operand 0 "mem_noofs_operand"))
> +
> diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
> index 43f9474..de0735a 100644
> --- a/gcc/config/arc/predicates.md
> +++ b/gcc/config/arc/predicates.md
> @@ -813,3 +813,7 @@
>  (define_predicate "short_const_int_operand"
>    (and (match_operand 0 "const_int_operand")
>         (match_test "satisfies_constraint_C16 (op)")))
> +
> +(define_predicate "mem_noofs_operand"
> +  (and (match_code "mem")
> +       (match_code "reg" "0")))
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index fb908b3..c0a99d7 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -538,7 +538,7 @@ Objective-C and Objective-C++ Dialects}.
>  @gccoptlist{-mbarrel-shifter @gol
>  -mcpu=@var{cpu} -mA6 -mARC600 -mA7 -mARC700 @gol
>  -mdpfp -mdpfp-compact -mdpfp-fast -mno-dpfp-lrsr @gol
> --mea -mno-mpy -mmul32x16 -mmul64 @gol
> +-mea -mno-mpy -mmul32x16 -mmul64 -matomic @gol
>  -mnorm -mspfp -mspfp-compact -mspfp-fast -msimd -msoft-float -mswap @gol
>  -mcrc -mdsp-packa -mdvbf -mlock -mmac-d16 -mmac-24 -mrtsc -mswape @gol
>  -mtelephony -mxy -misize -mannotate-align -marclinux -marclinux_prof @gol
> @@ -12948,6 +12948,12 @@ can overridden by FPX options; @samp{mspfp}, @samp{mspfp-compact}, or
>  @opindex mswap
>  Generate swap instructions.
>
> +@item -matomic
> +@opindex matomic
> +This enables Locked Load/Store Conditional extension to implement
> +atomic memopry built-in functions.  Not available for ARC 6xx or ARC
> +EM cores.
> +
>  @item -mdiv-rem
>  @opindex mdiv-rem
>  Enable DIV/REM instructions for ARCv2 cores.
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 75d5068..cc847ee 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -2602,6 +2602,15 @@ proc check_effective_target_aarch64_little_endian { } {
>      }]
>  }
>
> +# Return 1 if this is a compiler supporting ARC atomic operations
> +proc check_effective_target_arc_atomic { } {
> +    return [check_no_compiler_messages arc_atomic assembly {
> +       #if !defined(__ARC_ATOMIC__)
> +       #error FOO
> +       #endif
> +    }]
> +}
> +
>  # Return 1 if this is an arm target using 32-bit instructions
>  proc check_effective_target_arm32 { } {
>      if { ![istarget arm*-*-*] } {
> @@ -5513,6 +5522,7 @@ proc check_effective_target_sync_int_long { } {
>              || [istarget crisv32-*-*] || [istarget cris-*-*]
>              || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9])
>              || [istarget spu-*-*]
> +            || ([istarget arc*-*-*] && [check_effective_target_arc_atomic])
>              || [check_effective_target_mips_llsc] } {
>             set et_sync_int_long_saved 1
>          }
> @@ -5544,6 +5554,7 @@ proc check_effective_target_sync_char_short { } {
>              || [istarget crisv32-*-*] || [istarget cris-*-*]
>              || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9])
>              || [istarget spu-*-*]
> +            || ([istarget arc*-*-*] && [check_effective_target_arc_atomic])
>              || [check_effective_target_mips_llsc] } {
>             set et_sync_char_short_saved 1
>          }
> --
> 1.9.1
>
Joern Wolfgang Rennecke Dec. 4, 2015, 1:09 a.m. UTC | #2
On 16/11/15 10:18, Claudiu Zissulescu wrote:
>   
> +/* Expand code to perform a 8 or 16-bit compare and swap by doing
> +   32-bit compare and swap on the word containing the byte or
> +   half-word.  The difference between a weak and a strong CAS is that
> +   the weak version may simply fail.  The strong version relays on two
>
Typo: relays -> relies

More importantly, your use of barriers makes no sense to me.
Memory models other that MEMMODEL_RELAXED impose two requirement
on the compiler:
- For systems without hardware memory coherency  (e.g. multiple caches 
with software
   synchronisation), emit any instructions necessary to archive 
coherency for those objects
   that the access / memory model requires.
- Prevent code movement by compiler optimizations.  This is where, 
hardware-independently, the
   memory model makes / could make a difference in how much restrictions 
are placed on the
   optimizers.

Because of PR middle-end/59448, we currently promote MEMMODEL_CONSUME to 
MEMMODEL_AQUIRE;
which is a shame, really, because otherwise we could just rely on 
ordinary dependencies to prevent
reordering after a cache flush/invalidation at the atomic operation.

Now, assuming we have multiple cores with software-synchronized caches:

A MEMMODEL_SEQ / MEMMODEL_RELEASE operation requires a cache flush 
(unless you have a
write-through cache in the first place), so that all values that have 
been written into the local cache
become visible in main memory.  Also, any writes that are delayed due to 
out-of-order operation or
a write buffer must be flushed to main memory.

A MEMMODEL_SEQ / MEMMODEL_AQUIRE operation requires a cache invalidation 
- preceded by a
cache flush to avoid loosing data, so that values written by the 
releasing thread to main memory
will be seen by the current thread.

The patterns that represent the hardware cache / synchronisation 
operations may also double as
memory barriers for the compiler.

If you don't need hardware cache / synchronization operations (either 
because you have hardware coherency, or you have only a single cache 
system for all cores / the only core in the system),
you still need memory barriers for the compiler.

AFAICT, you use hardware synchronisation instruction for EMMODEL_SEQ, 
and compiler memory barriers
for all other memory models (except MEMMODEL_RELAXED).  That makes no 
sense; either the platform
needs explicit instructions for memory coherency, or it doesn't.

On the other hand, your memory barriers are more restrictive than they 
need to be.
To tell the compiler that it must not sink a write below MEMMODEL_SEQ / 
MEMMODE_RELEASE operations,
it is sufficient to display a USE of an unspecified memory location.  
This is also true when you have
a cache flush: it is sufficient to show the compiler that this cache 
flush may read anything.
(Well, actually, for our purposes it'd be OK to make it so that 
thread-local variables, spill slots and variables that satisfy an escape 
analysis are considered  independent.)
The USE of the unspecified memory has to be tied to the atomic 
operation, of course.  This could be
by making it part of the instruction pattern itself, or by having the 
atomic operation USE something
(e.g. a fake hard register) that is 'set' by the memory barrier / sync/ 
cache flush pattern.
Claudiu Zissulescu Dec. 7, 2015, 1:25 p.m. UTC | #3
Hi,

> AFAICT, you use hardware synchronisation instruction for EMMODEL_SEQ,
> and compiler memory barriers for all other memory models (except
> MEMMODEL_RELAXED).  That makes no sense; either the platform needs
> explicit instructions for memory coherency, or it doesn't.

Indeed, we on purpose misused the sync primitive to compensate for the lack of data memory barrier (dmb) primitive in the early SMP-HS cores. Now, I've checked and we can safely use the dmb primitive for all HS cores present today (no old HS ip without dmb is out there). Hence, I've refurbish the patch (attached) removing the old sync/software memory barrier combinations and use the newer dmb instruction for it.

Tested with dg.exp (when passing -matomic to gcc compiler line, the atomic tests are also successfully executed).

Thanks,
Claudiu
Joern Wolfgang Rennecke Dec. 9, 2015, 5:11 a.m. UTC | #4
On 07/12/15 13:25, Claudiu Zissulescu wrote:
>
> Tested with dg.exp (when passing -matomic to gcc compiler line, the atomic tests are also successfully executed).
The comment before "*memory_barrier" could use some elaboration on what 
it does for TARGET_HS.
Otherwise, this is OK.
Claudiu Zissulescu Dec. 9, 2015, 2:31 p.m. UTC | #5
I will add this text before  "*memory_barrier" pattern:

;; For ARCHS, we use a hardware data memory barrier that waits for
;; completion of current data memory operations before initiating
;; similar data memory operations.

Once done, I will commit it.

Thanks,
Claudiu

>
> Tested with dg.exp (when passing -matomic to gcc compiler line, the atomic tests are also successfully executed).
The comment before "*memory_barrier" could use some elaboration on what it does for TARGET_HS.
Otherwise, this is OK.
Claudiu Zissulescu Dec. 10, 2015, 1:35 p.m. UTC | #6
Patch applied:  Committed r231509

Thanks,
Claudiu

> -----Original Message-----
> From: Joern Wolfgang Rennecke [mailto:gnu@amylaar.uk]
> Sent: Wednesday, December 09, 2015 6:11 AM
> To: Claudiu Zissulescu; gcc-patches@gcc.gnu.org
> Cc: Francois.Bedard@synopsys.com; jeremy.bennett@embecosm.com
> Subject: Re: [PATCH] [ARC] Add support for atomic memory built-in.
> 
> 
> 
> On 07/12/15 13:25, Claudiu Zissulescu wrote:
> >
> > Tested with dg.exp (when passing -matomic to gcc compiler line, the
> atomic tests are also successfully executed).
> The comment before "*memory_barrier" could use some elaboration on
> what it does for TARGET_HS.
> Otherwise, this is OK.
diff mbox

Patch

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 6e04351..3581bb0 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -41,6 +41,10 @@  extern int arc_output_commutative_cond_exec (rtx *operands, bool);
 extern bool arc_expand_movmem (rtx *operands);
 extern bool prepare_move_operands (rtx *operands, machine_mode mode);
 extern void emit_shift (enum rtx_code, rtx, rtx, rtx);
+extern void arc_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void arc_split_compare_and_swap (rtx *);
+extern void arc_expand_compare_and_swap (rtx *);
+
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 8bb0969..d47bbe4 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -61,6 +61,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "context.h"
 #include "builtins.h"
 #include "rtl-iter.h"
+#include "alias.h"
 
 /* Which cpu we're compiling for (ARC600, ARC601, ARC700).  */
 static const char *arc_cpu_string = "";
@@ -884,6 +885,9 @@  arc_init (void)
       flag_pic = 0;
     }
 
+  if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS))
+    error ("-matomic is only supported for ARC700 or ARC HS cores");
+
   arc_init_reg_tables ();
 
   /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
@@ -9650,6 +9654,393 @@  arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
   return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
 }
 
+/* Emit a (pre) memory barrier around an atomic sequence according to
+   MODEL.  */
+
+static void
+arc_pre_atomic_barrier (enum memmodel model)
+{
+ switch (model & MEMMODEL_MASK)
+    {
+    case MEMMODEL_RELAXED:
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_SYNC_ACQUIRE:
+      break;
+    case MEMMODEL_RELEASE:
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SYNC_RELEASE:
+      emit_insn (gen_membar (const0_rtx));
+      break;
+    case MEMMODEL_SEQ_CST:
+    case MEMMODEL_SYNC_SEQ_CST:
+      emit_insn (gen_sync (const1_rtx));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Emit a (post) memory barrier around an atomic sequence according to
+   MODEL.  */
+
+static void
+arc_post_atomic_barrier (enum memmodel model)
+{
+ switch (model & MEMMODEL_MASK)
+    {
+    case MEMMODEL_RELAXED:
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_RELEASE:
+    case MEMMODEL_SYNC_RELEASE:
+      break;
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SYNC_ACQUIRE:
+      emit_insn (gen_membar (const0_rtx));
+      break;
+    case MEMMODEL_SEQ_CST:
+    case MEMMODEL_SYNC_SEQ_CST:
+      emit_insn (gen_sync (const1_rtx));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Expand a compare and swap pattern.  */
+
+static void
+emit_unlikely_jump (rtx insn)
+{
+  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+
+  insn = emit_jump_insn (insn);
+  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
+}
+
+/* Expand code to perform a 8 or 16-bit compare and swap by doing
+   32-bit compare and swap on the word containing the byte or
+   half-word.  The difference between a weak and a strong CAS is that
+   the weak version may simply fail.  The strong version relays on two
+   loops, one checks if the SCOND op is succsfully or not, the other
+   checks if the 32 bit accessed location which contains the 8 or 16
+   bit datum is not changed by other thread.  The first loop is
+   implemented by the atomic_compare_and_swapsi_1 pattern.  The second
+   loops is implemented by this routine.  */
+
+static void
+arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
+				rtx oldval, rtx newval, rtx weak,
+				rtx mod_s, rtx mod_f)
+{
+  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
+  rtx addr = gen_reg_rtx (Pmode);
+  rtx off = gen_reg_rtx (SImode);
+  rtx oldv = gen_reg_rtx (SImode);
+  rtx newv = gen_reg_rtx (SImode);
+  rtx oldvalue = gen_reg_rtx (SImode);
+  rtx newvalue = gen_reg_rtx (SImode);
+  rtx res = gen_reg_rtx (SImode);
+  rtx resv = gen_reg_rtx (SImode);
+  rtx memsi, val, mask, end_label, loop_label, cc, x;
+  machine_mode mode;
+  bool is_weak = (weak != const0_rtx);
+
+  /* Truncate the address.  */
+  emit_insn (gen_rtx_SET (addr,
+			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
+
+  /* Compute the datum offset.  */
+  emit_insn (gen_rtx_SET (off,
+			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_rtx_SET (off,
+			    gen_rtx_MINUS (SImode,
+					   (GET_MODE (mem) == QImode) ?
+					   GEN_INT (3) : GEN_INT (2), off)));
+
+  /* Normal read from truncated address.  */
+  memsi = gen_rtx_MEM (SImode, addr);
+  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
+  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
+
+  val = copy_to_reg (memsi);
+
+  /* Convert the offset in bits.  */
+  emit_insn (gen_rtx_SET (off,
+			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
+
+  /* Get the proper mask.  */
+  if (GET_MODE (mem) == QImode)
+    mask = force_reg (SImode, GEN_INT (0xff));
+  else
+    mask = force_reg (SImode, GEN_INT (0xffff));
+
+  emit_insn (gen_rtx_SET (mask,
+			  gen_rtx_ASHIFT (SImode, mask, off)));
+
+  /* Prepare the old and new values.  */
+  emit_insn (gen_rtx_SET (val,
+			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+				       val)));
+
+  oldval = gen_lowpart (SImode, oldval);
+  emit_insn (gen_rtx_SET (oldv,
+			  gen_rtx_ASHIFT (SImode, oldval, off)));
+
+  newval = gen_lowpart_common (SImode, newval);
+  emit_insn (gen_rtx_SET (newv,
+			  gen_rtx_ASHIFT (SImode, newval, off)));
+
+  emit_insn (gen_rtx_SET (oldv,
+			  gen_rtx_AND (SImode, oldv, mask)));
+
+  emit_insn (gen_rtx_SET (newv,
+			  gen_rtx_AND (SImode, newv, mask)));
+
+  if (!is_weak)
+    {
+      end_label = gen_label_rtx ();
+      loop_label = gen_label_rtx ();
+      emit_label (loop_label);
+    }
+
+  /* Make the old and new values.  */
+  emit_insn (gen_rtx_SET (oldvalue,
+			  gen_rtx_IOR (SImode, oldv, val)));
+
+  emit_insn (gen_rtx_SET (newvalue,
+			  gen_rtx_IOR (SImode, newv, val)));
+
+  /* Try an 32bit atomic compare and swap.  It clobbers the CC
+     register.  */
+  emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
+					      weak, mod_s, mod_f));
+
+  /* Regardless of the weakness of the operation, a proper boolean
+     result needs to be provided.  */
+  x = gen_rtx_REG (CC_Zmode, CC_REG);
+  x = gen_rtx_EQ (SImode, x, const0_rtx);
+  emit_insn (gen_rtx_SET (bool_result, x));
+
+  if (!is_weak)
+    {
+      /* Check the results: if the atomic op is successfully the goto
+	 to end label.  */
+      x = gen_rtx_REG (CC_Zmode, CC_REG);
+      x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+
+      /* Wait for the right moment when the accessed 32-bit location
+	 is stable.  */
+      emit_insn (gen_rtx_SET (resv,
+			      gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+					   res)));
+      mode = SELECT_CC_MODE (NE, resv, val);
+      cc = gen_rtx_REG (mode, CC_REG);
+      emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
+
+      /* Set the new value of the 32 bit location, proper masked.  */
+      emit_insn (gen_rtx_SET (val, resv));
+
+      /* Try again if location is unstable.  Fall through if only
+	 scond op failed.  */
+      x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
+      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+      emit_label (end_label);
+    }
+
+  /* End: proper return the result for the given mode.  */
+  emit_insn (gen_rtx_SET (res,
+			  gen_rtx_AND (SImode, res, mask)));
+
+  emit_insn (gen_rtx_SET (res,
+			  gen_rtx_LSHIFTRT (SImode, res, off)));
+
+  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
+}
+
+/* Helper function used by "atomic_compare_and_swap" expand
+   pattern.  */
+
+void
+arc_expand_compare_and_swap (rtx operands[])
+{
+  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
+  machine_mode mode;
+
+  bval = operands[0];
+  rval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = operands[5];
+  mod_s = operands[6];
+  mod_f = operands[7];
+  mode = GET_MODE (mem);
+
+  if (reg_overlap_mentioned_p (rval, oldval))
+    oldval = copy_to_reg (oldval);
+
+  if (mode == SImode)
+    {
+      emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
+						  is_weak, mod_s, mod_f));
+      x = gen_rtx_REG (CC_Zmode, CC_REG);
+      x = gen_rtx_EQ (SImode, x, const0_rtx);
+      emit_insn (gen_rtx_SET (bval, x));
+    }
+  else
+    {
+      arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
+				      is_weak, mod_s, mod_f);
+    }
+}
+
+/* Helper function used by the "atomic_compare_and_swapsi_1"
+   pattern.  */
+
+void
+arc_split_compare_and_swap (rtx operands[])
+{
+  rtx rval, mem, oldval, newval;
+  machine_mode mode;
+  enum memmodel mod_s, mod_f;
+  bool is_weak;
+  rtx label1, label2, x, cond;
+
+  rval = operands[0];
+  mem = operands[1];
+  oldval = operands[2];
+  newval = operands[3];
+  is_weak = (operands[4] != const0_rtx);
+  mod_s = (enum memmodel) INTVAL (operands[5]);
+  mod_f = (enum memmodel) INTVAL (operands[6]);
+  mode = GET_MODE (mem);
+
+  /* ARC atomic ops work only with 32-bit aligned memories.  */
+  gcc_assert (mode == SImode);
+
+  arc_pre_atomic_barrier (mod_s);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_label_rtx ();
+      emit_label (label1);
+    }
+  label2 = gen_label_rtx ();
+
+  /* Load exclusive.  */
+  emit_insn (gen_arc_load_exclusivesi (rval, mem));
+
+  /* Check if it is oldval.  */
+  mode = SELECT_CC_MODE (NE, rval, oldval);
+  cond = gen_rtx_REG (mode, CC_REG);
+  emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode, rval, oldval)));
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+  /* Exclusively store new item.  Store clobbers CC reg.  */
+  emit_insn (gen_arc_store_exclusivesi (mem, newval));
+
+  if (!is_weak)
+    {
+      /* Check the result of the store.  */
+      cond = gen_rtx_REG (CC_Zmode, CC_REG);
+      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
+      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+    }
+
+  if (mod_f != MEMMODEL_RELAXED)
+    emit_label (label2);
+
+  arc_post_atomic_barrier (mod_s);
+
+  if (mod_f == MEMMODEL_RELAXED)
+    emit_label (label2);
+}
+
+/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
+   to perform.  MEM is the memory on which to operate.  VAL is the second
+   operand of the binary operator.  BEFORE and AFTER are optional locations to
+   return the value of MEM either before of after the operation.  MODEL_RTX
+   is a CONST_INT containing the memory model to use.  */
+
+void
+arc_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+			 rtx orig_before, rtx orig_after, rtx model_rtx)
+{
+  enum memmodel model = (enum memmodel) INTVAL (model_rtx);
+  machine_mode mode = GET_MODE (mem);
+  rtx label, x, cond;
+  rtx before = orig_before, after = orig_after;
+
+  /* ARC atomic ops work only with 32-bit aligned memories.  */
+  gcc_assert (mode == SImode);
+
+  arc_pre_atomic_barrier (model);
+
+  label = gen_label_rtx ();
+  emit_label (label);
+  label = gen_rtx_LABEL_REF (VOIDmode, label);
+
+  if (before == NULL_RTX)
+    before = gen_reg_rtx (mode);
+
+  if (after == NULL_RTX)
+    after = gen_reg_rtx (mode);
+
+  /* Load exclusive.  */
+  emit_insn (gen_arc_load_exclusivesi (before, mem));
+
+  switch (code)
+    {
+    case NOT:
+      x = gen_rtx_AND (mode, before, val);
+      emit_insn (gen_rtx_SET (after, x));
+      x = gen_rtx_NOT (mode, after);
+      emit_insn (gen_rtx_SET (after, x));
+      break;
+
+    case MINUS:
+      if (CONST_INT_P (val))
+	{
+	  val = GEN_INT (-INTVAL (val));
+	  code = PLUS;
+	}
+
+      /* FALLTHRU.  */
+    default:
+      x = gen_rtx_fmt_ee (code, mode, before, val);
+      emit_insn (gen_rtx_SET (after, x));
+      break;
+   }
+
+  /* Exclusively store new item.  Store clobbers CC reg.  */
+  emit_insn (gen_arc_store_exclusivesi (mem, after));
+
+  /* Check the result of the store.  */
+  cond = gen_rtx_REG (CC_Zmode, CC_REG);
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    label, pc_rtx);
+  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+  arc_post_atomic_barrier (model);
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-arc.h"
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index d312f9f..c895725 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -88,6 +88,10 @@  along with GCC; see the file COPYING3.  If not see
       {					\
 	builtin_define ("__HS__");	\
       }					\
+    if (TARGET_ATOMIC)			\
+      {					\
+	builtin_define ("__ARC_ATOMIC__");	\
+      }					\
     if (TARGET_NORM)			\
       {					\
 	builtin_define ("__ARC_NORM__");\
@@ -153,7 +157,7 @@  along with GCC; see the file COPYING3.  If not see
 %{mcpu=ARC700|!mcpu=*:%{mrtsc}} \
 %{mcpu=ARCHS:-mHS} \
 %{mcpu=ARCEM:-mEM} \
-"
+%{matomic:-mlock}"
 
 #if DEFAULT_LIBC == LIBC_UCLIBC
 /* Note that the default is to link against dynamic libraries, if they are
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 1d070a3..ac181a9 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -128,6 +128,12 @@ 
    (VUNSPEC_UNIMP_S 28) ; blockage insn for unimp_s generation
    (VUNSPEC_NOP 29) ; volatile NOP
 
+   (UNSPEC_ARC_MEMBAR 30)
+   (VUNSPEC_ARC_CAS 31)
+   (VUNSPEC_ARC_LL 32)
+   (VUNSPEC_ARC_SC 33)
+   (VUNSPEC_ARC_EX 34)
+
    (R0_REG 0)
    (R1_REG 1)
    (R2_REG 2)
@@ -5531,3 +5537,6 @@ 
 (include "fpx.md")
 
 (include "simdext.md")
+
+;; include atomic extensions
+(include "atomic.md")
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 0c10c67..c4d7306 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -414,3 +414,6 @@  Target Joined
 mmac_
 Target Joined
 
+matomic
+Target Report Mask(ATOMIC)
+Enable atomic instructions.
diff --git a/gcc/config/arc/atomic.md b/gcc/config/arc/atomic.md
new file mode 100644
index 0000000..13bcb76
--- /dev/null
+++ b/gcc/config/arc/atomic.md
@@ -0,0 +1,235 @@ 
+;; GCC machine description for ARC atomic instructions.
+;; Copyright (C) 2015 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_iterator QHSI [QI HI SI])
+(define_code_iterator atomicop [plus minus ior xor and])
+(define_code_attr atomic_optab
+  [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")])
+
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand")]
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[0]);
+  switch (model)
+    {
+    case MEMMODEL_RELAXED:
+      break;
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_RELEASE:
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SYNC_ACQUIRE:
+    case MEMMODEL_SYNC_RELEASE:
+      emit_insn (gen_membar (const0_rtx));
+      break;
+    case MEMMODEL_SEQ_CST:
+    case MEMMODEL_SYNC_SEQ_CST:
+      emit_insn (gen_sync (const1_rtx));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_expand "membar"
+  [(set (match_dup 1)
+	(unspec:BLK [(match_dup 1) (match_operand:SI 0 "const_int_operand")]
+		    UNSPEC_ARC_MEMBAR))]
+  ""
+{
+  operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[1]) = 1;
+})
+
+;; A compiler-only memory barrier.  Generic code, when checking for the
+;; existence of various named patterns, uses asm("":::"memory") when we
+;; don't need an actual instruction.
+(define_insn "*membar_empty"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0) (const_int 0)]
+		    UNSPEC_ARC_MEMBAR))]
+  ""
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "length" "0")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand" "")	;; bool out
+   (match_operand:QHSI 1 "register_operand" "")	;; val out
+   (match_operand:QHSI 2 "mem_noofs_operand" "");; memory
+   (match_operand:QHSI 3 "register_operand" "")	;; expected
+   (match_operand:QHSI 4 "register_operand" "")	;; desired
+   (match_operand:SI 5 "const_int_operand")	;; is_weak
+   (match_operand:SI 6 "const_int_operand")	;; mod_s
+   (match_operand:SI 7 "const_int_operand")]	;; mod_f
+  "TARGET_ATOMIC"
+{
+  arc_expand_compare_and_swap (operands);
+  DONE;
+})
+
+(define_insn_and_split "atomic_compare_and_swapsi_1"
+  [(set (reg:CC_Z CC_REG)					;; bool out
+	(unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ARC_CAS))
+   (set (match_operand:SI 0 "register_operand"      "=&r")	;; val out
+	(match_operand:SI 1 "mem_noofs_operand"      "+ATO"))	;; memory
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_operand:SI 2 "register_operand"     "r") ;; expect
+	   (match_operand:SI 3 "register_operand"     "r") ;; desired
+	   (match_operand:SI 4 "const_int_operand")	   ;; is_weak
+	   (match_operand:SI 5 "const_int_operand")	   ;; mod_s
+	   (match_operand:SI 6 "const_int_operand")]	   ;; mod_f
+	  VUNSPEC_ARC_CAS))]
+  "TARGET_ATOMIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arc_split_compare_and_swap (operands);
+    DONE;
+  })
+
+(define_insn "arc_load_exclusivesi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI
+	  [(match_operand:SI 1 "mem_noofs_operand" "ATO")]
+	  VUNSPEC_ARC_LL))]
+  "TARGET_ATOMIC"
+  "llock %0,%1"
+  [(set_attr "type" "load")
+   (set_attr "iscompact" "false")
+   (set_attr "predicable" "no")
+   (set_attr "length" "*")])
+
+(define_insn "arc_store_exclusivesi"
+  [(set (match_operand:SI 0 "mem_noofs_operand"     "=ATO")
+	(unspec_volatile:SI[(match_operand:SI 1 "register_operand" "r")]
+			   VUNSPEC_ARC_SC))
+   (clobber (reg:CC_Z CC_REG))]
+  "TARGET_ATOMIC"
+  "scond %1,%0"
+  [(set_attr "type" "store")
+   (set_attr "iscompact" "false")
+   (set_attr "predicable" "no")
+   (set_attr "length" "*")])
+
+(define_expand "atomic_exchangesi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "mem_noofs_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_ATOMIC"
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[3]);
+
+  if (model == MEMMODEL_SEQ_CST)
+    emit_insn (gen_sync (const1_rtx));
+  emit_insn (gen_exchangesi (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "exchangesi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "mem_noofs_operand" "+ATO")]
+			    VUNSPEC_ARC_EX))
+   (set (match_dup 1)
+	(match_operand:SI 2 "register_operand" "0"))]
+  ""
+  "ex %0,%1"
+  [(set_attr "type" "load")
+   (set_attr "iscompact" "false")
+   (set_attr "predicable" "no")
+   (set_attr "length" "*")])
+
+(define_expand "atomic_<atomic_optab>si"
+  [(match_operand:SI 0 "mem_noofs_operand" "")  ;; memory
+   (atomicop:SI (match_dup 0)
+		(match_operand:SI 1 "register_operand" "")) ;; operand
+   (match_operand:SI 2 "const_int_operand" "")] ;; model
+  "TARGET_ATOMIC"
+{
+  arc_expand_atomic_op (<CODE>, operands[0], operands[1],
+				NULL_RTX, NULL_RTX, operands[2]);
+  DONE;
+})
+
+(define_expand "atomic_nandsi"
+  [(match_operand:SI 0 "mem_noofs_operand" "")	;; memory
+   (match_operand:SI 1 "register_operand" "")	;; operand
+   (match_operand:SI 2 "const_int_operand" "")]	;; model
+  "TARGET_ATOMIC"
+{
+ arc_expand_atomic_op (NOT, operands[0], operands[1],
+			    NULL_RTX, NULL_RTX, operands[2]);
+ DONE;
+})
+
+(define_expand "atomic_fetch_<atomic_optab>si"
+  [(match_operand:SI 0 "register_operand" "")	;; output
+   (match_operand:SI 1 "mem_noofs_operand" "")	;; memory
+   (atomicop:SI (match_dup 1)
+		(match_operand:SI 2 "register_operand" "")) ;; operand
+   (match_operand:SI 3 "const_int_operand" "")]	;; model
+  "TARGET_ATOMIC"
+{
+  arc_expand_atomic_op (<CODE>, operands[1], operands[2],
+				operands[0], NULL_RTX, operands[3]);
+  DONE;
+})
+
+(define_expand "atomic_fetch_nandsi"
+  [(match_operand:SI 0 "register_operand" "")	;; output
+   (match_operand:SI 1 "mem_noofs_operand" "")	;; memory
+   (match_operand:SI 2 "register_operand" "")	;; operand
+   (match_operand:SI 3 "const_int_operand" "")]	;; model
+  "TARGET_ATOMIC"
+{
+  arc_expand_atomic_op (NOT, operands[1], operands[2],
+			     operands[0], NULL_RTX, operands[3]);
+  DONE;
+})
+
+(define_expand "atomic_<atomic_optab>_fetchsi"
+  [(match_operand:SI 0 "register_operand" "")	;; output
+   (match_operand:SI 1 "mem_noofs_operand" "")	;; memory
+   (atomicop:SI (match_dup 1)
+		(match_operand:SI 2 "register_operand" "")) ;; operand
+   (match_operand:SI 3 "const_int_operand" "")]	;; model
+  "TARGET_ATOMIC"
+{
+  arc_expand_atomic_op (<CODE>, operands[1], operands[2],
+				NULL_RTX, operands[0], operands[3]);
+  DONE;
+})
+
+(define_expand "atomic_nand_fetchsi"
+  [(match_operand:SI 0 "register_operand" "")	;; output
+   (match_operand:SI 1 "mem_noofs_operand" "")	;; memory
+   (match_operand:SI 2 "register_operand" "")	;; operand
+   (match_operand:SI 3 "const_int_operand" "")]	;; model
+  "TARGET_ATOMIC"
+{
+  arc_expand_atomic_op (NOT, operands[1], operands[2],
+			     NULL_RTX, operands[0], operands[3]);
+  DONE;
+})
+
diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md
index 65ea44a..18309cc 100644
--- a/gcc/config/arc/constraints.md
+++ b/gcc/config/arc/constraints.md
@@ -421,3 +421,9 @@ 
    An unsigned 6-bit integer constant, up to 62."
   (and (match_code "const_int")
        (match_test "UNSIGNED_INT6 (ival - 1)")))
+
+;; Memory constraint used for atomic ops.
+(define_memory_constraint "ATO"
+  "A memory with only a base register"
+  (match_operand 0 "mem_noofs_operand"))
+
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index 43f9474..de0735a 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -813,3 +813,7 @@ 
 (define_predicate "short_const_int_operand"
   (and (match_operand 0 "const_int_operand")
        (match_test "satisfies_constraint_C16 (op)")))
+
+(define_predicate "mem_noofs_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fb908b3..c0a99d7 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -538,7 +538,7 @@  Objective-C and Objective-C++ Dialects}.
 @gccoptlist{-mbarrel-shifter @gol
 -mcpu=@var{cpu} -mA6 -mARC600 -mA7 -mARC700 @gol
 -mdpfp -mdpfp-compact -mdpfp-fast -mno-dpfp-lrsr @gol
--mea -mno-mpy -mmul32x16 -mmul64 @gol
+-mea -mno-mpy -mmul32x16 -mmul64 -matomic @gol
 -mnorm -mspfp -mspfp-compact -mspfp-fast -msimd -msoft-float -mswap @gol
 -mcrc -mdsp-packa -mdvbf -mlock -mmac-d16 -mmac-24 -mrtsc -mswape @gol
 -mtelephony -mxy -misize -mannotate-align -marclinux -marclinux_prof @gol
@@ -12948,6 +12948,12 @@  can overridden by FPX options; @samp{mspfp}, @samp{mspfp-compact}, or
 @opindex mswap
 Generate swap instructions.
 
+@item -matomic
+@opindex matomic
+This enables Locked Load/Store Conditional extension to implement
+atomic memopry built-in functions.  Not available for ARC 6xx or ARC
+EM cores.
+
 @item -mdiv-rem
 @opindex mdiv-rem
 Enable DIV/REM instructions for ARCv2 cores.
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 75d5068..cc847ee 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2602,6 +2602,15 @@  proc check_effective_target_aarch64_little_endian { } {
     }]
 }
 
+# Return 1 if this is a compiler supporting ARC atomic operations
+proc check_effective_target_arc_atomic { } {
+    return [check_no_compiler_messages arc_atomic assembly {
+	#if !defined(__ARC_ATOMIC__)
+	#error FOO
+	#endif
+    }]
+}
+
 # Return 1 if this is an arm target using 32-bit instructions
 proc check_effective_target_arm32 { } {
     if { ![istarget arm*-*-*] } {
@@ -5513,6 +5522,7 @@  proc check_effective_target_sync_int_long { } {
 	     || [istarget crisv32-*-*] || [istarget cris-*-*]
 	     || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9])
 	     || [istarget spu-*-*]
+	     || ([istarget arc*-*-*] && [check_effective_target_arc_atomic])
 	     || [check_effective_target_mips_llsc] } {
            set et_sync_int_long_saved 1
         }
@@ -5544,6 +5554,7 @@  proc check_effective_target_sync_char_short { } {
 	     || [istarget crisv32-*-*] || [istarget cris-*-*]
 	     || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9])
 	     || [istarget spu-*-*]
+	     || ([istarget arc*-*-*] && [check_effective_target_arc_atomic])
 	     || [check_effective_target_mips_llsc] } {
            set et_sync_char_short_saved 1
         }