@@ -73,6 +73,8 @@ extern void alpha_end_function (FILE *, const char *, tree);
extern bool alpha_find_lo_sum_using_gp (rtx);
+extern int alpha_store_data_bypass_p (rtx_insn *, rtx_insn *);
+
#ifdef REAL_VALUE_TYPE
extern int check_float_value (machine_mode, REAL_VALUE_TYPE *, int);
#endif
@@ -7564,6 +7564,75 @@ alpha_does_function_need_gp (void)
return 0;
}
+/* Helper function for alpha_store_data_bypass_p, handle just a single SET
+ IN_SET. */
+
+static bool
+alpha_store_data_bypass_p_1 (rtx_insn *out_insn, rtx in_set)
+{
+ if (!MEM_P (SET_DEST (in_set)))
+ return false;
+
+ rtx out_set = single_set (out_insn);
+ if (out_set)
+ return !reg_mentioned_p (SET_DEST (out_set), SET_DEST (in_set));
+
+ rtx out_pat = PATTERN (out_insn);
+ if (GET_CODE (out_pat) != PARALLEL)
+ return false;
+
+ for (int i = 0; i < XVECLEN (out_pat, 0); i++)
+ {
+ rtx out_exp = XVECEXP (out_pat, 0, i);
+
+ if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE
+ || GET_CODE (out_exp) == TRAP_IF)
+ continue;
+
+ gcc_assert (GET_CODE (out_exp) == SET);
+
+ if (reg_mentioned_p (SET_DEST (out_exp), SET_DEST (in_set)))
+ return false;
+ }
+
+ return true;
+}
+
+/* True if the dependency between OUT_INSN and IN_INSN is on the store
+ data not the address operand(s) of the store. IN_INSN and OUT_INSN
+ must be either a single_set or a PARALLEL with SETs inside.
+
+ This alpha-specific version of store_data_bypass_p ignores TRAP_IF
+ that would result in assertion failure (and internal compiler error)
+ in the generic store_data_bypass_p function. */
+
+int
+alpha_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
+{
+ rtx in_set = single_set (in_insn);
+ if (in_set)
+ return alpha_store_data_bypass_p_1 (out_insn, in_set);
+
+ rtx in_pat = PATTERN (in_insn);
+ if (GET_CODE (in_pat) != PARALLEL)
+ return false;
+
+ for (int i = 0; i < XVECLEN (in_pat, 0); i++)
+ {
+ rtx in_exp = XVECEXP (in_pat, 0, i);
+
+ if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE
+ || GET_CODE (in_exp) == TRAP_IF)
+ continue;
+
+ gcc_assert (GET_CODE (in_exp) == SET);
+
+ if (!alpha_store_data_bypass_p_1 (out_insn, in_exp))
+ return false;
+ }
+
+ return true;
+}
/* Helper function to set RTX_FRAME_RELATED_P on instructions, including
sequences. */
@@ -44,14 +44,7 @@ (define_insn_reservation "ev4_ld" 1
; Stores can issue before the data (but not address) is ready.
(define_insn_reservation "ev4_ist" 1
(and (eq_attr "tune" "ev4")
- (eq_attr "type" "ist"))
- "ev4_ib1+ev4_abox")
-
-; ??? Separate from ev4_ist because store_data_bypass_p can't handle
-; the patterns with multiple sets, like store-conditional.
-(define_insn_reservation "ev4_ist_c" 1
- (and (eq_attr "tune" "ev4")
- (eq_attr "type" "st_c"))
+ (eq_attr "type" "ist,st_c"))
"ev4_ib1+ev4_abox")
(define_insn_reservation "ev4_fst" 1
@@ -110,7 +103,7 @@ (define_bypass 1 "ev4_icmp" "ev4_ibr")
(define_bypass 0
"ev4_iaddlog,ev4_shiftcm,ev4_icmp"
"ev4_ist"
- "store_data_bypass_p")
+ "alpha_store_data_bypass_p")
; Multiplies use a non-pipelined imul unit. Also, "no [ebox] insn can
; be issued exactly three cycles before an integer multiply completes".
@@ -121,7 +114,7 @@ (define_insn_reservation "ev4_imulsi" 21
(eq_attr "opsize" "si")))
"ev4_ib0+ev4_imul,ev4_imul*18,ev4_ebox")
-(define_bypass 20 "ev4_imulsi" "ev4_ist" "store_data_bypass_p")
+(define_bypass 20 "ev4_imulsi" "ev4_ist" "alpha_store_data_bypass_p")
(define_insn_reservation "ev4_imuldi" 23
(and (eq_attr "tune" "ev4")
@@ -129,7 +122,7 @@ (define_insn_reservation "ev4_imuldi" 23
(eq_attr "opsize" "!si")))
"ev4_ib0+ev4_imul,ev4_imul*20,ev4_ebox")
-(define_bypass 22 "ev4_imuldi" "ev4_ist" "store_data_bypass_p")
+(define_bypass 22 "ev4_imuldi" "ev4_ist" "alpha_store_data_bypass_p")
; Most FP insns have a 6 cycle latency, but with a 4 cycle bypass back in.
(define_insn_reservation "ev4_fpop" 6
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftrapv -mcpu=ev4" } */
+
+typedef struct tnode_t {
+ struct tnode_t *tn_left, *tn_right;
+ int v_quad;
+} tnode_t;
+
+int constant_addr(const tnode_t *, long *);
+int constant_addr(const tnode_t *tn, long *offsp)
+{
+ long offs1 = 0, offs2 = 0;
+
+ if (tn->v_quad > 0) {
+ offs1 = tn->v_quad;
+ return 0;
+ } else if (tn->v_quad > -1) {
+ offs2 = tn->tn_right->v_quad;
+ if (!constant_addr(tn->tn_left, &offs1))
+ return 0;
+ } else {
+ return 0;
+ }
+ *offsp = offs1 + offs2;
+ return 1;
+}