===================================================================
@@ -99,6 +99,8 @@ extern int arm_no_early_alu_shift_dep (r
extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
extern int arm_no_early_mul_dep (rtx, rtx);
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
+extern int arm_writeback_dep (rtx, rtx);
+extern int arm_writeback_only_dep (rtx, rtx);
extern int tls_mentioned_p (rtx);
extern int symbol_mentioned_p (rtx);
===================================================================
@@ -22689,6 +22689,50 @@ arm_mac_accumulator_is_mul_result (rtx p
&& !reg_overlap_mentioned_p (mul_result, mac_op1));
}
+/* Return true if there is an address register writeback dependency
+ between PRODUCER and CONSUMER. */
+
+int
+arm_writeback_dep (rtx producer, rtx consumer)
+{
+ rtx note;
+
+ for (note = REG_NOTES (producer); note; note = XEXP (note, 1))
+ if (REG_NOTE_KIND (note) == REG_INC
+ && reg_referenced_p (XEXP (note, 0), PATTERN (consumer)))
+ return true;
+ return false;
+}
+
+/* A note_stores callback for which DATA is an rtx *. If DEST is set
+ by a SET pattern, and if *DATA is nonnull, check whether instruction
+ *DATA references DEST. Clear *DATA if so. */
+
+static void
+arm_writeback_only_dep_1 (rtx dest, const_rtx container, void *data)
+{
+ rtx *consumer;
+
+ consumer = (rtx *) data;
+ if (GET_CODE (container) == SET
+ && *consumer
+ && reg_referenced_p (dest, PATTERN (*consumer)))
+ *consumer = 0;
+}
+
+/* Return true if the only true dependence between PRODUCER and CONSUMER
+ is an address register writeback. */
+
+int
+arm_writeback_only_dep (rtx producer, rtx consumer)
+{
+ if (arm_writeback_dep (producer, consumer))
+ {
+ note_stores (PATTERN (producer), arm_writeback_only_dep_1, &consumer);
+ return consumer != NULL_RTX;
+ }
+ return 0;
+}
/* The EABI says test the least significant bit of a guard variable. */
===================================================================
@@ -186,8 +186,22 @@ (define_bypass 4 "cortex_a8_mul,cortex_a
"cortex_a8_alu_shift_reg"
"arm_no_early_alu_shift_value_dep")
+
+;; Load address register writeback
+
+;; Address register writeback has a latency of 2 instructions, or 1 if
+;; there is no early dependency. Don't bother handling early shift
+;; dependencies for address writeback; it's very unlikely that an
+;; address will be used that way in critical code.
+(define_bypass 1 "cortex_a8_load*"
+ "cortex_a8_alu*,cortex_a8_mov"
+ "arm_writeback_only_dep")
+(define_bypass 2 "cortex_a8_load*"
+ "cortex_a8_*mul*,cortex_a8_*mla*,cortex_a8_load*,
+ cortex_a8_store*,cortex_a8_branch,cortex_a8_call"
+ "arm_writeback_only_dep")
+
;; Load instructions.
-;; The presence of any register writeback is ignored here.
;; A load result has latency 3 unless the dependent instruction has
;; no early dep, in which case it is only latency two.
@@ -229,8 +243,18 @@ (define_bypass 4 "cortex_a8_load3_4"
"cortex_a8_alu_shift_reg"
"arm_no_early_alu_shift_value_dep")
+;; Store address register writeback
+
+;; See comment for load address writeback above.
+(define_bypass 1 "cortex_a8_store*"
+ "cortex_a8_alu*,cortex_a8_mov"
+ "arm_writeback_dep")
+(define_bypass 2 "cortex_a8_store*"
+ "cortex_a8_*mul*,cortex_a8_*mla*,cortex_a8_load*,
+ cortex_a8_store*,cortex_a8_branch,cortex_a8_call"
+ "arm_writeback_dep")
+
;; Store instructions.
-;; Writeback is again ignored.
(define_insn_reservation "cortex_a8_store1_2" 0
(and (eq_attr "tune" "cortexa8")
===================================================================
@@ -163,6 +163,15 @@ (define_bypass 2 "cortex_a9_dp_shift"
cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+;; Address register writeback has a latency of 1 instruction.
+
+(define_bypass 1 "cortex_a9_load*"
+ "cortex_a9_*"
+ "arm_writeback_only_dep")
+(define_bypass 1 "cortex_a9_store*"
+ "cortex_a9_*"
+ "arm_writeback_dep")
+
;; An instruction in the load store pipeline can provide
;; read access to a DP instruction in the P0 default pipeline
;; before the writeback stage.