===================================================================
@@ -535,6 +535,7 @@ Objective-C and Objective-C++ Dialects}.
-mtp=@var{name} -mtls-dialect=@var{dialect} @gol
-mword-relocations @gol
-mfix-cortex-m3-ldrd @gol
+-mfix-cortex-a9-volatile-hazards @gol
-munaligned-access @gol
-mneon-for-64bits @gol
-mslow-flash-data @gol
@@ -12677,6 +12678,16 @@ with overlapping destination and base registers ar
generating these instructions. This option is enabled by default when
@option{-mcpu=cortex-m3} is specified.
+@item -mfix-cortex-a9-volatile-hazards
+@opindex mfix-cortex-a9-volatile-hazards
+Cortex-A9 MPCore processors have an erratum that in rare cases cause
+successive memory loads to appear out of program order if another processor
+is simultaneously writing to the same location. This causes problems if
+volatile variables are used for communication between processors.
+This option enables the ARM recommended workaround, to insert a @code{dmb}
+instruction after each volatile load. Because of the potentially high
+overhead, this workaround is not enabled by default.
+
@item -munaligned-access
@itemx -mno-unaligned-access
@opindex munaligned-access
===================================================================
@@ -264,6 +264,11 @@ Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
that may trigger Cortex-M3 errata.
+mfix-cortex-a9-volatile-hazards
+Target Report Var(fix_a9_volatile_hazards) Init(0)
+Avoid errata causing read-after-read hazards for concurrent volatile
+accesses on Cortex-A9 MPCore processors.
+
munaligned-access
Target Report Var(unaligned_access) Init(2)
Enable unaligned word and halfword accesses to packed data.
===================================================================
@@ -46,7 +46,7 @@
(define_insn "*memory_barrier"
[(set (match_operand:BLK 0 "" "")
(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
- "TARGET_HAVE_MEMORY_BARRIER"
+ "TARGET_HAVE_MEMORY_BARRIER && !TARGET_THUMB2"
{
if (TARGET_HAVE_DMB)
{
@@ -65,6 +65,29 @@
(set_attr "conds" "unconditional")
(set_attr "predicable" "no")])
+;; Thumb-2 version allows conditional execution
+(define_insn "*memory_barrier_t2"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+ "TARGET_HAVE_MEMORY_BARRIER && TARGET_THUMB2"
+ {
+ if (TARGET_HAVE_DMB)
+ {
+ /* Note we issue a system level barrier. We should consider issuing
+ a inner shareabilty zone barrier here instead, ie. "DMB ISH". */
+ /* ??? Differentiate based on SEQ_CST vs less strict? */
+ return "dmb%?\tsy";
+ }
+
+ if (TARGET_HAVE_DMB_MCR)
+ return "mcr%?\tp15, 0, r0, c7, c10, 5";
+
+ gcc_unreachable ();
+ }
+ [(set_attr "length" "4")
+ (set_attr "conds" "nocond")
+ (set_attr "predicable" "yes")])
+
(define_insn "atomic_load<mode>"
[(set (match_operand:QHSI 0 "register_operand" "=r")
(unspec_volatile:QHSI
===================================================================
@@ -2856,6 +2856,13 @@ arm_option_override (void)
fix_cm3_ldrd = 0;
}
+ if (fix_a9_volatile_hazards == 1 && !TARGET_HAVE_DMB)
+ {
+ warning (0, "target CPU does not support DMB, disabling "
+ "-mfix-cortex-a9-volatile-hazards");
+ fix_a9_volatile_hazards = 0;
+ }
+
/* Enable -munaligned-access by default for
- all ARMv6 architecture-based processors
- ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
@@ -17208,6 +17215,85 @@ thumb2_reorg (void)
CLEAR_REG_SET (&live);
}
+/* Return TRUE if BODY contains any volatile loads. */
+
+static bool
+any_volatile_loads_p (const_rtx body)
+{
+ int i, j;
+ rtx lhs, rhs;
+ enum rtx_code code;
+ const char *fmt;
+
+ if (body == NULL_RTX)
+ return false;
+
+ code = GET_CODE (body);
+
+ if (code == SET)
+ {
+ lhs = SET_DEST (body);
+ rhs = SET_SRC (body);
+
+ if (!REG_P (lhs) && GET_CODE (lhs) != SUBREG)
+ return false;
+
+ if ((MEM_P (rhs) || GET_CODE (rhs) == SYMBOL_REF)
+ && MEM_VOLATILE_P (rhs))
+ return true;
+ }
+ else
+ {
+ fmt = GET_RTX_FORMAT (code);
+
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'e')
+ {
+ if (any_volatile_loads_p (XEXP (body, i)))
+ return true;
+ }
+ else if (fmt[i] == 'E')
+ for (j = 0; j < XVECLEN (body, i); j++)
+ if (any_volatile_loads_p (XVECEXP (body, i, j)))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Work around read-after-read erratum on Cortex-A9 MPCore processors by
+ inserting DMB instructions after volatile loads. For more information see:
+
+ http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a/\
+ UAN0004A_a9_read_read.pdf
+*/
+
+static void
+arm_cortex_a9_errata_reorg (rtx insn)
+{
+ rtx body = PATTERN (insn);
+
+ if (any_volatile_loads_p (body))
+ {
+ rtx barrier = gen_memory_barrier ();
+
+ /* In Thumb-2 mode, the barrier can be executed with the same condition
+ as the load, which may reduce the number of IT instructions needed for
+ adjacent loads. In ARM mode barriers are unconditional
+ instructions, so leave them alone. */
+ if (TARGET_THUMB2 && GET_CODE (body) == COND_EXEC)
+ {
+ rtx predicate = COND_EXEC_TEST (body);
+ barrier = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (predicate),
+ PATTERN (barrier));
+ }
+
+ emit_insn_after (barrier, insn);
+ }
+}
+
/* Gcc puts the pool in the wrong place for ARM, since we can only
load addresses a limited distance around the pc. We do some
special munging to move the constant pool values to the correct
@@ -17248,6 +17334,9 @@ arm_reorg (void)
{
rtx table;
+ if (fix_a9_volatile_hazards)
+ arm_cortex_a9_errata_reorg (insn);
+
note_invalid_constants (insn, address, true);
address += get_attr_length (insn);
===================================================================
@@ -2800,6 +2800,20 @@ proc check_effective_target_arm_cortex_m { } {
} "-mthumb"]
}
+# Return 1 if this is an ARM CPU that supports the DMB instruction.
+
+proc check_effective_target_arm_dmb { } {
+ return [check_no_compiler_messages arm_dmb assembly {
+ #if !defined(__ARM_ARCH_7A__) \
+ && !defined(__ARM_ARCH_7R__) \
+ && !defined(__ARM_ARCH_7M__) \
+ && !defined(__ARM_ARCH_6M__)
+ #error FOO
+ #endif
+ int i;
+ } ""]
+}
+
# Return 1 if the target supports executing NEON instructions, 0
# otherwise. Cache the result.
===================================================================
@@ -0,0 +1,13 @@
+/* { dg-do compile { target arm_dmb } } */
+/* { dg-options "-O2 -mfix-cortex-a9-volatile-hazards" } */
+
+volatile float x;
+volatile double y;
+
+void foo (void)
+{
+ ++x;
+ ++y;
+}
+
+/* { dg-final { scan-assembler-times "dmb" 2 } } */
===================================================================
@@ -0,0 +1,11 @@
+/* { dg-do compile { target arm_dmb } } */
+/* { dg-options "-O2 -mfix-cortex-a9-volatile-hazards" } */
+
+volatile int x;
+
+int foo ()
+{
+ return x;
+}
+
+/* { dg-final { scan-assembler "dmb" } } */
===================================================================
@@ -0,0 +1,14 @@
+/* { dg-do compile { target arm_dmb } } */
+/* { dg-options "-O2 -mthumb -mfix-cortex-a9-volatile-hazards" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+volatile int x;
+volatile int y;
+
+int foo (int c)
+{
+ return x < y + c ? x : y;
+}
+
+/* { dg-final { scan-assembler "dmbgt" } } */
+/* { dg-final { scan-assembler "dmble" } } */
===================================================================
@@ -0,0 +1,15 @@
+/* { dg-do compile { target arm_dmb } } */
+/* { dg-options "-O2 -marm -mfix-cortex-a9-volatile-hazards" } */
+
+volatile int x;
+
+int foo (int c)
+{
+ int r = 0;
+ if (c == 10)
+ r = x;
+ return r;
+}
+
+/* { dg-final { scan-assembler "dmb\[\\t \]*sy" { xfail { arm_thumb2 } } } } */
+/* { dg-final { scan-assembler-not "dmbeq" { xfail { arm_thumb2 } } } } */