diff mbox series

[arm] Fix 88714, Arm LDRD/STRD peepholes

Message ID VI1PR0801MB2014E8C29FEEF5216A76E79DE06E0@VI1PR0801MB2014.eurprd08.prod.outlook.com
State New
Headers show
Series [arm] Fix 88714, Arm LDRD/STRD peepholes | expand

Commit Message

Matthew Malcomson Feb. 5, 2019, 2:44 p.m. UTC
These peepholes match a pair of SImode loads or stores that can be
implemented with a single LDRD or STRD instruction.
When compiling for TARGET_ARM, these peepholes originally created a set
pattern in DI mode to be caught by movdi patterns.

This approach failed to take into account the possibility that the two
matched insns operated on memory with different aliasing information.
The peepholes lost the aliasing information on one of the insns, which
could then cause the scheduler to make an invalid transformation.

This patch changes the peepholes so they generate a PARALLEL expression
of the two relevant loads or stores, which means the aliasing
information of both is kept.  Such a PARALLEL pattern is what the
peepholes currently produce for TARGET_THUMB2.

In order to match these new insn patterns, we add two new define_insn's.  These
define_insn's use the same checks as the peepholes to find valid insns.

Note that the patterns now created by the peepholes for LDRD and STRD
are very similar to those created by the peepholes for LDM and STM.
Many patterns could be matched by the LDM and STM define_insns, which
means we rely on the order the define_insn patterns are defined in the
machine description, with those for LDRD/STRD defined before those for
LDM/STM.

The difference between the peepholes for LDRD/STRD and those for LDM/STM
are mainly that those for LDRD/STRD have some logic to ensure that the
two registers are consecutive and the first one is even.

Bootstrapped and regtested on arm-none-linux-gnu.
Demonstrated fix of bug 88714 by bootstrapping on armv7l.


gcc/ChangeLog:

2019-02-05  Matthew Malcomson  <matthew.malcomson@arm.com>

	PR bootstrap/88714
	* config/arm/arm-protos.h (valid_operands_ldrd_strd,
	arm_count_ldrdstrd_insns): New declarations.
	* config/arm/arm.c (mem_ok_for_ldrd_strd): Remove broken handling of
	MINUS.
	(valid_operands_ldrd_strd): New function.
	(arm_count_ldrdstrd_insns): New function.
	* config/arm/ldrdstrd.md: Change peepholes to generate PARALLEL SImode
	sets instead of single DImode set and define new insns to match this.

gcc/testsuite/ChangeLog:

2019-02-05  Matthew Malcomson  <matthew.malcomson@arm.com>

	* gcc.c-torture/execute/pr88714.c: New test.
	* gcc.dg/rtl/arm/ldrd-peepholes.c: New test.



###############     Attachment also inlined for ease of reply    ###############
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 79ede0db174fcce87abe8b4d18893550d4c7e2f6..485bc68a618d6ae4a1640368ccb025fe2c9e1420 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -125,6 +125,7 @@ extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
 extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
 extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
 extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
+extern bool valid_operands_ldrd_strd (rtx *, bool);
 extern int arm_gen_movmemqi (rtx *);
 extern bool gen_movmem_ldrd_strd (rtx *);
 extern machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
@@ -146,6 +147,7 @@ extern const char *output_mov_long_double_arm_from_arm (rtx *);
 extern const char *output_move_double (rtx *, bool, int *count);
 extern const char *output_move_quad (rtx *);
 extern int arm_count_output_move_double_insns (rtx *);
+extern int arm_count_ldrdstrd_insns (rtx *, bool);
 extern const char *output_move_vfp (rtx *operands);
 extern const char *output_move_neon (rtx *operands);
 extern int arm_attr_length_move_neon (rtx_insn *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 73cb8df9af1ec9d680091bb8691bcd925a1be1d3..1c336da9e5b0948ef1058c46966364510dc1ca38 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15556,7 +15556,7 @@ mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
       *base = addr;
       return true;
     }
-  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+  else if (GET_CODE (addr) == PLUS)
     {
       *base = XEXP (addr, 0);
       *offset = XEXP (addr, 1);
@@ -15721,7 +15721,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
     }
 
   /* Make sure accesses are to consecutive memory locations.  */
-  if (gap != 4)
+  if (gap != GET_MODE_SIZE (SImode))
     return false;
 
   if (!align_ok_ldrd_strd (align[0], offset))
@@ -15802,6 +15802,55 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
 }
 
 
+/* Return true if parallel execution of the two word-size accesses provided
+   could be satisfied with a single LDRD/STRD instruction.  Two word-size
+   accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
+   register operands and OPERANDS[2,3] are the corresponding memory operands.
+   */
+bool
+valid_operands_ldrd_strd (rtx *operands, bool load)
+{
+  int nops = 2;
+  HOST_WIDE_INT offsets[2], offset, align[2];
+  rtx base = NULL_RTX;
+  rtx cur_base, cur_offset;
+  int i, gap;
+
+  /* Check that the memory references are immediate offsets from the
+     same base register.  Extract the base register, the destination
+     registers, and the corresponding memory offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
+				 &align[i]))
+	return false;
+
+      if (i == 0)
+	base = cur_base;
+      else if (REGNO (base) != REGNO (cur_base))
+	return false;
+
+      offsets[i] = INTVAL (cur_offset);
+      if (GET_CODE (operands[i]) == SUBREG)
+	return false;
+    }
+
+  if (offsets[0] > offsets[1])
+    return false;
+
+  gap = offsets[1] - offsets[0];
+  offset = offsets[0];
+
+  /* Make sure accesses are to consecutive memory locations.  */
+  if (gap != GET_MODE_SIZE (SImode))
+    return false;
+
+  if (!align_ok_ldrd_strd (align[0], offset))
+    return false;
+
+  return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
+				false, load);
+}
 
 
 /* Print a symbolic form of X to the debug file, F.  */
@@ -28474,6 +28523,26 @@ arm_count_output_move_double_insns (rtx *operands)
   return count;
 }
 
+/* Same as above, but operands are a register/memory pair in SImode.
+   Assumes operands has the base register in position 0 and memory in position
+   2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
+int
+arm_count_ldrdstrd_insns (rtx *operands, bool load)
+{
+  int count;
+  rtx ops[2];
+  int regnum, memnum;
+  if (load)
+    regnum = 0, memnum = 1;
+  else
+    regnum = 1, memnum = 0;
+  ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  ops[memnum] = adjust_address (operands[2], DImode, 0);
+  output_move_double (ops, false, &count);
+  return count;
+}
+
+
 int
 vfp3_const_double_for_fract_bits (rtx operand)
 {
diff --git a/gcc/config/arm/ldrdstrd.md b/gcc/config/arm/ldrdstrd.md
index be53d010fa6dfcf5a6854ae2b17f7cfcea25db9e..cb7a6adebbc8084a2e642ff2dcbef8b3fb16f268 100644
--- a/gcc/config/arm/ldrdstrd.md
+++ b/gcc/config/arm/ldrdstrd.md
@@ -23,37 +23,22 @@
 ;; The following peephole optimizations identify consecutive memory
 ;; accesses, and try to rearrange the operands to enable generation of
 ;; ldrd/strd.
+;;
+;; In many cases they behave in the same way that patterns in ldmstm.md behave,
+;; but there is extra logic in gen_operands_ldrd_strd to try and ensure the
+;; registers used are an (r<N>, r<N + 1>) pair where N is even.
 
 (define_peephole2 ; ldrd
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 2 "memory_operand" ""))
+	(match_operand:SI 2 "memory_operand" ""))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 3 "memory_operand" ""))]
+	(match_operand:SI 3 "memory_operand" ""))]
   "TARGET_LDRD"
-  [(const_int 0)]
+  [(parallel [(set (match_dup 0) (match_dup 2))
+	      (set (match_dup 1) (match_dup 3))])]
 {
   if (!gen_operands_ldrd_strd (operands, true, false, false))
     FAIL;
-  else if (TARGET_ARM)
-  {
-    /* In ARM state, the destination registers of LDRD/STRD must be
-       consecutive. We emit DImode access.  */
-    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-    operands[2] = adjust_address (operands[2], DImode, 0);
-    /* Emit [(set (match_dup 0) (match_dup 2))] */
-    emit_insn (gen_rtx_SET (operands[0], operands[2]));
-    DONE;
-  }
-  else if (TARGET_THUMB2)
-  {
-    /* Emit the pattern:
-       [(parallel [(set (match_dup 0) (match_dup 2))
-                   (set (match_dup 1) (match_dup 3))])] */
-    rtx t1 = gen_rtx_SET (operands[0], operands[2]);
-    rtx t2 = gen_rtx_SET (operands[1], operands[3]);
-    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
-    DONE;
-  }
 })
 
 (define_peephole2 ; strd
@@ -62,117 +47,50 @@ (define_peephole2 ; strd
    (set (match_operand:SI 3 "memory_operand" "")
 	(match_operand:SI 1 "arm_general_register_operand" ""))]
   "TARGET_LDRD"
-  [(const_int 0)]
+  [(parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
 {
   if (!gen_operands_ldrd_strd (operands, false, false, false))
     FAIL;
-  else if (TARGET_ARM)
-  {
-    /* In ARM state, the destination registers of LDRD/STRD must be
-       consecutive. We emit DImode access.  */
-    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-    operands[2] = adjust_address (operands[2], DImode, 0);
-    /* Emit [(set (match_dup 2) (match_dup 0))]  */
-    emit_insn (gen_rtx_SET (operands[2], operands[0]));
-    DONE;
-  }
-  else if (TARGET_THUMB2)
-  {
-    /* Emit the pattern:
-       [(parallel [(set (match_dup 2) (match_dup 0))
-                   (set (match_dup 3) (match_dup 1))])]  */
-    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
-    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
-    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
-    DONE;
-  }
 })
 
 ;; The following peepholes reorder registers to enable LDRD/STRD.
 (define_peephole2 ; strd of constants
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 4 "const_int_operand" ""))
+	(match_operand:SI 4 "const_int_operand" ""))
    (set (match_operand:SI 2 "memory_operand" "")
-        (match_dup 0))
+	(match_dup 0))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 5 "const_int_operand" ""))
+	(match_operand:SI 5 "const_int_operand" ""))
    (set (match_operand:SI 3 "memory_operand" "")
-        (match_dup 1))]
+	(match_dup 1))]
   "TARGET_LDRD"
-  [(const_int 0)]
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
 {
   if (!gen_operands_ldrd_strd (operands, false, true, false))
     FAIL;
-  else if (TARGET_ARM)
-  {
-   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
-   operands[2] = adjust_address (operands[2], DImode, 0);
-   /* Emit the pattern:
-      [(set (match_dup 0) (match_dup 4))
-      (set (match_dup 1) (match_dup 5))
-      (set (match_dup 2) tmp)]  */
-   emit_insn (gen_rtx_SET (operands[0], operands[4]));
-   emit_insn (gen_rtx_SET (operands[1], operands[5]));
-   emit_insn (gen_rtx_SET (operands[2], tmp));
-   DONE;
-  }
-  else if (TARGET_THUMB2)
-  {
-    /* Emit the pattern:
-       [(set (match_dup 0) (match_dup 4))
-        (set (match_dup 1) (match_dup 5))
-        (parallel [(set (match_dup 2) (match_dup 0))
-                   (set (match_dup 3) (match_dup 1))])]  */
-    emit_insn (gen_rtx_SET (operands[0], operands[4]));
-    emit_insn (gen_rtx_SET (operands[1], operands[5]));
-    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
-    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
-    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
-    DONE;
-  }
 })
 
 (define_peephole2 ; strd of constants
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 4 "const_int_operand" ""))
+	(match_operand:SI 4 "const_int_operand" ""))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 5 "const_int_operand" ""))
+	(match_operand:SI 5 "const_int_operand" ""))
    (set (match_operand:SI 2 "memory_operand" "")
-        (match_dup 0))
+	(match_dup 0))
    (set (match_operand:SI 3 "memory_operand" "")
-        (match_dup 1))]
+	(match_dup 1))]
   "TARGET_LDRD"
-  [(const_int 0)]
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
 {
   if (!gen_operands_ldrd_strd (operands, false, true, false))
      FAIL;
-  else if (TARGET_ARM)
-  {
-   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
-   operands[2] = adjust_address (operands[2], DImode, 0);
-   /* Emit the pattern
-      [(set (match_dup 0) (match_dup 4))
-       (set (match_dup 1) (match_dup 5))
-       (set (match_dup 2) tmp)]  */
-   emit_insn (gen_rtx_SET (operands[0], operands[4]));
-   emit_insn (gen_rtx_SET (operands[1], operands[5]));
-   emit_insn (gen_rtx_SET (operands[2], tmp));
-   DONE;
-  }
-  else if (TARGET_THUMB2)
-  {
-    /*  Emit the pattern:
-        [(set (match_dup 0) (match_dup 4))
-         (set (match_dup 1) (match_dup 5))
-         (parallel [(set (match_dup 2) (match_dup 0))
-                    (set (match_dup 3) (match_dup 1))])]  */
-    emit_insn (gen_rtx_SET (operands[0], operands[4]));
-    emit_insn (gen_rtx_SET (operands[1], operands[5]));
-    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
-    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
-    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
-    DONE;
-  }
 })
 
 ;; The following two peephole optimizations are only relevant for ARM
@@ -181,39 +99,32 @@ (define_peephole2 ; strd of constants
 (define_peephole2 ; swap the destination registers of two loads
 		  ; before a commutative operation.
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 2 "memory_operand" ""))
+	(match_operand:SI 2 "memory_operand" ""))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 3 "memory_operand" ""))
+	(match_operand:SI 3 "memory_operand" ""))
    (set (match_operand:SI 4 "arm_general_register_operand" "")
-        (match_operator:SI 5 "commutative_binary_operator"
+	(match_operator:SI 5 "commutative_binary_operator"
 			   [(match_operand 6 "arm_general_register_operand" "")
 			    (match_operand 7 "arm_general_register_operand" "") ]))]
   "TARGET_LDRD && TARGET_ARM
    && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
-        ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+	||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
    && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
    && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
-  [(set (match_dup 0) (match_dup 2))
+  [(parallel [(set (match_dup 0) (match_dup 2))
+	      (set (match_dup 1) (match_dup 3))])
    (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
-  {
-    if (!gen_operands_ldrd_strd (operands, true, false, true))
-     {
-        FAIL;
-     }
-    else
-     {
-        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-        operands[2] = adjust_address (operands[2], DImode, 0);
-     }
-   }
-)
+{
+  if (!gen_operands_ldrd_strd (operands, true, false, true))
+    FAIL;
+})
 
 (define_peephole2 ; swap the destination registers of two loads
 		  ; before a commutative operation that sets the flags.
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 2 "memory_operand" ""))
+	(match_operand:SI 2 "memory_operand" ""))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 3 "memory_operand" ""))
+	(match_operand:SI 3 "memory_operand" ""))
    (parallel
       [(set (match_operand:SI 4 "arm_general_register_operand" "")
 	    (match_operator:SI 5 "commutative_binary_operator"
@@ -225,24 +136,62 @@ (define_peephole2 ; swap the destination registers of two loads
        ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
    && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
    && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
-  [(set (match_dup 0) (match_dup 2))
+  [(parallel [(set (match_dup 0) (match_dup 2))
+	      (set (match_dup 1) (match_dup 3))])
    (parallel
       [(set (match_dup 4)
 	    (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
        (clobber (reg:CC CC_REGNUM))])]
-  {
-    if (!gen_operands_ldrd_strd (operands, true, false, true))
-     {
-        FAIL;
-     }
-    else
-     {
-        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-        operands[2] = adjust_address (operands[2], DImode, 0);
-     }
-   }
-)
+{
+  if (!gen_operands_ldrd_strd (operands, true, false, true))
+    FAIL;
+})
 
 ;; TODO: Handle LDRD/STRD with writeback:
 ;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
 ;; (b) Patterns may be followed by an update of the base address.
+
+
+;; insns matching the LDRD/STRD patterns that will get created by the above
+;; peepholes.
+;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
+;; operands are not changed.
+(define_insn "*arm_ldrd"
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
+		   (match_operand:SI 2 "memory_operand" "m"))
+	      (set (match_operand:SI 1 "s_register_operand" "=r")
+		   (match_operand:SI 3 "memory_operand" "m"))])]
+  "TARGET_LDRD && TARGET_ARM && reload_completed
+  && valid_operands_ldrd_strd (operands, true)"
+  {
+    rtx op[2];
+    op[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+    op[1] = adjust_address (operands[2], DImode, 0);
+    return output_move_double (op, true, NULL);
+  }
+  [(set (attr "length")
+	(symbol_ref "arm_count_ldrdstrd_insns (operands, true) * 4"))
+   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
+   (set_attr "type" "load_8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_strd"
+  [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
+		   (match_operand:SI 0 "s_register_operand" "r"))
+	      (set (match_operand:SI 3 "memory_operand" "=m")
+		   (match_operand:SI 1 "s_register_operand" "r"))])]
+  "TARGET_LDRD && TARGET_ARM && reload_completed
+  && valid_operands_ldrd_strd (operands, false)"
+  {
+    rtx op[2];
+    op[0] = adjust_address (operands[2], DImode, 0);
+    op[1] = gen_rtx_REG (DImode, REGNO (operands[0]));
+    return output_move_double (op, true, NULL);
+  }
+  [(set (attr "length")
+	(symbol_ref "arm_count_ldrdstrd_insns (operands, false) * 4"))
+   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
+   (set_attr "type" "store_8")
+   (set_attr "predicable" "yes")]
+)
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr88714.c b/gcc/testsuite/gcc.c-torture/execute/pr88714.c
new file mode 100644
index 0000000000000000000000000000000000000000..614ad9ac4a0662ba752532270e2d687505504d48
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr88714.c
@@ -0,0 +1,43 @@
+/* PR bootstrap/88714 */
+
+struct S { int a, b, c; int *d; };
+struct T { int *e, *f, *g; } *t = 0;
+int *o = 0;
+
+__attribute__((noipa))
+void bar (int *x, int y, int z, int w)
+{
+  if (w == -1)
+    {
+      if (x != 0 || y != 0 || z != 0)
+	__builtin_abort ();
+    }
+  else if (w != 0 || x != t->g || y != 0 || z != 12)
+    __builtin_abort ();
+}
+
+__attribute__((noipa)) void
+foo (struct S *x, struct S *y, int *z, int w)
+{
+  *o = w;
+  if (w)
+    bar (0, 0, 0, -1);
+  x->d = z;
+  if (y->d)
+    y->c = y->c + y->d[0];
+  bar (t->g, 0, y->c, 0);
+}
+
+int
+main ()
+{
+  int a[4] = { 8, 9, 10, 11 };
+  struct S s = { 1, 2, 3, &a[0] };
+  struct T u = { 0, 0, &a[3] };
+  o = &a[2];
+  t = &u;
+  foo (&s, &s, &a[1], 5);
+  if (s.c != 12 || s.d != &a[1])
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
new file mode 100644
index 0000000000000000000000000000000000000000..ff209c5df29765441bbe9481ac8caf7bbc6af8f7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
@@ -0,0 +1,441 @@
+/* { dg-do compile } */
+/* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* } { "-mthumb" } { "" } } */
+/* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */
+
+/*
+   Test file contains testcases that are there to check.
+      1) Each peephole generates the expected patterns.
+      2) These patterns match the expected define_insns and generate ldrd/strd.
+      2) Memory alias information is not lost in the peephole transformation.
+
+   I don't check the peephole pass on most of the functions here but just check
+   the correct assembly is output.  The ldrd/strd peepholes only generate a
+   different pattern to the ldm/stm peepholes in some specific cases, and those
+   are checked.
+
+   The exceptions are tested by the crafted testcases at the end of this file
+   that are named in the pattern foo_x[[:digit:]].
+
+   The first testcase (foo_mem_11) demonstrates bug 88714 is fixed by checking
+   that both alias sets in the RTL are preserved.
+
+   All other testcases are only checked to see that they generate a LDRD or
+   STRD instruction accordingly.
+ */
+
+
+/* Example of bugzilla 88714 -- memory aliasing info needs to be retained.  */
+int __RTL (startwith ("peephole2")) foo_mem_11 (int *a, int *b)
+{
+(function "foo_mem_11"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (reg:SI r0) [1 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (reg:SI r3)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [2 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* { dg-final { scan-rtl-dump {Function foo_mem_11.*\(mem/c:SI[^\n]*\[1.*\(mem/c:SI[^\n]*\n[^\n]*\[2.*Function foo11} "peephole2" } } */
+
+/* ldrd plain peephole2.  */
+int __RTL (startwith ("peephole2")) foo11 (int *a)
+{
+(function "foo11"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (reg:SI r3)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* ldrd plain peephole2, which accepts insns initially out of order.  */
+int __RTL (startwith ("peephole2")) foo11_alt (int *a)
+{
+(function "foo11_alt"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 102 (set (reg:SI r3)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* strd plain peephole2.  */
+int __RTL (startwith ("peephole2")) foo12 (int *a)
+{
+(function "foo12"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* strd of constants -- store interleaved with constant move into register.
+   Use same register twice to ensure we use the relevant pattern.  */
+int __RTL (startwith ("peephole2")) foo13 (int *a)
+{
+(function "foo13"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r2)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r2)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* strd of constants -- stores after constant moves into registers.
+   Use registers out of order, is only way to avoid plain strd while hitting
+   this pattern.  */
+int __RTL (startwith ("peephole2")) foo14 (int *a)
+{
+(function "foo14"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r3)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r2)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* swap the destination registers of two loads before a commutative operation.
+   Here the commutative operation is what the peephole uses to know it can
+   swap the register loads around.  */
+int __RTL (startwith ("peephole2")) foo15 (int *a)
+{
+(function "foo15"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 100 (set (reg:SI r3)
+		      (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18
+       (expr_list:REG_DEAD (reg:SI r2)
+	(expr_list:REG_DEAD (reg:SI r3)
+	 (nil))))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+
+/* swap the destination registers of two loads before a commutative operation
+   that sets the flags.  */
+/*
+   NOTE Can't make a testcase for this pattern since there are no insn patterns
+   matching the parallel insn in the peephole.
+
+   i.e. until some define_insn is defined matching that insn that peephole can
+   never match in real code, and in artificial RTL code any pattern that can
+   match it will cause an ICE.
+
+int __RTL (startwith ("peephole2")) foo16 (int *a)
+{
+(function "foo16"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 100 (set (reg:SI r3)
+		      (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 103 (parallel
+		  [(set (reg:SI r0)
+			(and:SI (reg:SI r3) (reg:SI r2)))
+		  (clobber (reg:CC cc))]) "/home/matmal01/test.c":18
+       (expr_list:REG_DEAD (reg:SI r2)
+	(expr_list:REG_DEAD (reg:SI r3)
+	 (nil))))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+*/
+
+
+/* Making patterns that will behave differently between the LDM/STM peepholes
+   and LDRD/STRD peepholes.
+   gen_operands_ldrd_strd() uses peep2_find_free_register() to find spare
+   registers to use.
+   peep2_find_free_register() only ever returns registers marked in
+   call_used_regs, hence we make sure to leave register 2 and 3 available (as
+   they are always on in the defaults marked by CALL_USED_REGISTERS).  */
+
+/* gen_operands_ldrd_strd() purposefully finds an even register to look at
+   which would treat the following pattern differently to the stm peepholes.
+ */
+int __RTL (startwith ("peephole2")) foo_x1 (int *a)
+{
+(function "foo_x1"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r5)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r5)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r5)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r5)) "/home/matmal01/test.c":18
+       (expr_list:REG_DEAD (reg:SI r5)
+	 (nil)))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure we generated a parallel that started with a set from an even register.
+   i.e.
+   (parallel [
+     (set (mem
+	  (reg:SI <even>
+   */
+/* { dg-final { scan-rtl-dump {Function foo_x1.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x2} "peephole2" } } */
+
+/* Like above gen_operands_ldrd_strd() would look to start with an even
+   register while gen_const_stm_seq() doesn't care.  */
+int __RTL (startwith ("peephole2")) foo_x2 (int *a)
+{
+(function "foo_x2"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r5)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r6)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r5)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r6)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set from an even register (as foo_x1).  */
+/* { dg-final { scan-rtl-dump {Function foo_x2.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x3} "peephole2" } } */
+
+/* When storing multiple values into a register that will be used later, ldrd
+   searches for another register to use instead of just giving up.  */
+int __RTL (startwith ("peephole2")) foo_x3 (int *a)
+{
+(function "foo_x3"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r3)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r3)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r0) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set from an even register (as foo_x1).  */
+/* { dg-final { scan-rtl-dump {Function foo_x3.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x4} "peephole2" } } */
+
+/* ldrd gen_peephole2_11 but using plus 8 and plus 12 in the offsets.  */
+int __RTL (startwith ("peephole2")) foo_x4 (int *a)
+{
+(function "foo_x4"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (reg:SI r3)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set from the appropriate offset from
+   register 0.
+(parallel [
+	    (set (reg:SI ...
+		(mem/c:SI (plus:SI (reg:SI 0 r0)
+			(const_int 8 .*
+*/
+/* { dg-final { scan-rtl-dump {Function foo_x4.*\(parallel \[\n[^\n]*\(set \(reg:SI[^\n]*\n *\(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*Function foo_x5} "peephole2" } } */
+
+/* strd gen_peephole2_12 but using plus 8 and plus 12 in the offsets.  */
+int __RTL (startwith ("peephole2")) foo_x5 (int *a)
+{
+(function "foo12"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set to the appropriate offset from
+   register 0.  */
+/* { dg-final { scan-rtl-dump {Function foo_x5.*\(parallel \[\n[^\n]*\(set \(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*$} "peephole2" } } */
+
+
+/* { dg-final { scan-assembler-not "ldm" } } */
+/* { dg-final { scan-assembler-not "stm" } } */
+/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0\]} 4 } } */
+/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0, #8\]} 1 } } */
+/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0\]} 6 } } */
+/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0, #8\]} 1 } } */

Comments

Kyrill Tkachov Feb. 6, 2019, 6:45 p.m. UTC | #1
Hi Matthew,

On 05/02/19 14:44, Matthew Malcomson wrote:
> These peepholes match a pair of SImode loads or stores that can be
> implemented with a single LDRD or STRD instruction.
> When compiling for TARGET_ARM, these peepholes originally created a set
> pattern in DI mode to be caught by movdi patterns.
>
> This approach failed to take into account the possibility that the two
> matched insns operated on memory with different aliasing information.
> The peepholes lost the aliasing information on one of the insns, which
> could then cause the scheduler to make an invalid transformation.
>
> This patch changes the peepholes so they generate a PARALLEL expression
> of the two relevant loads or stores, which means the aliasing
> information of both is kept.  Such a PARALLEL pattern is what the
> peepholes currently produce for TARGET_THUMB2.
>
> In order to match these new insn patterns, we add two new define_insn's.  These
> define_insn's use the same checks as the peepholes to find valid insns.
>
> Note that the patterns now created by the peepholes for LDRD and STRD
> are very similar to those created by the peepholes for LDM and STM.
> Many patterns could be matched by the LDM and STM define_insns, which
> means we rely on the order the define_insn patterns are defined in the
> machine description, with those for LDRD/STRD defined before those for
> LDM/STM.
>
> The difference between the peepholes for LDRD/STRD and those for LDM/STM
> are mainly that those for LDRD/STRD have some logic to ensure that the
> two registers are consecutive and the first one is even.
>
> Bootstrapped and regtested on arm-none-linux-gnu.
> Demonstrated fix of bug 88714 by bootstrapping on armv7l.
>
>
> gcc/ChangeLog:
>
> 2019-02-05  Matthew Malcomson <matthew.malcomson@arm.com>
>
>         PR bootstrap/88714
>         * config/arm/arm-protos.h (valid_operands_ldrd_strd,
>         arm_count_ldrdstrd_insns): New declarations.
>         * config/arm/arm.c (mem_ok_for_ldrd_strd): Remove broken handling of
>         MINUS.
>         (valid_operands_ldrd_strd): New function.
>         (arm_count_ldrdstrd_insns): New function.
>         * config/arm/ldrdstrd.md: Change peepholes to generate PARALLEL SImode
>         sets instead of single DImode set and define new insns to match this.
>
> gcc/testsuite/ChangeLog:
>
> 2019-02-05  Matthew Malcomson <matthew.malcomson@arm.com>
>
>         * gcc.c-torture/execute/pr88714.c: New test.
>         * gcc.dg/rtl/arm/ldrd-peepholes.c: New test.
>

Please add the PR marker to the testsuite ChangeLog as well.
I've been following this PR a bit from the sidelines, I believe a substantial amount of code
(and one of the testcases) was written by Jakub, so please add him to the ChangeLog entries as well.

This looks ok to me.
Thanks for fixing this and thanks Jakub for the analysis and fixes too!

Kyrill

>
>
> ###############     Attachment also inlined for ease of reply    ###############
>
>
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index 79ede0db174fcce87abe8b4d18893550d4c7e2f6..485bc68a618d6ae4a1640368ccb025fe2c9e1420 100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -125,6 +125,7 @@ extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
>  extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
>  extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
>  extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
> +extern bool valid_operands_ldrd_strd (rtx *, bool);
>  extern int arm_gen_movmemqi (rtx *);
>  extern bool gen_movmem_ldrd_strd (rtx *);
>  extern machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
> @@ -146,6 +147,7 @@ extern const char *output_mov_long_double_arm_from_arm (rtx *);
>  extern const char *output_move_double (rtx *, bool, int *count);
>  extern const char *output_move_quad (rtx *);
>  extern int arm_count_output_move_double_insns (rtx *);
> +extern int arm_count_ldrdstrd_insns (rtx *, bool);
>  extern const char *output_move_vfp (rtx *operands);
>  extern const char *output_move_neon (rtx *operands);
>  extern int arm_attr_length_move_neon (rtx_insn *);
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 73cb8df9af1ec9d680091bb8691bcd925a1be1d3..1c336da9e5b0948ef1058c46966364510dc1ca38 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -15556,7 +15556,7 @@ mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
>        *base = addr;
>        return true;
>      }
> -  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
> +  else if (GET_CODE (addr) == PLUS)
>      {
>        *base = XEXP (addr, 0);
>        *offset = XEXP (addr, 1);
> @@ -15721,7 +15721,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
>      }
>
>    /* Make sure accesses are to consecutive memory locations.  */
> -  if (gap != 4)
> +  if (gap != GET_MODE_SIZE (SImode))
>      return false;
>
>    if (!align_ok_ldrd_strd (align[0], offset))
> @@ -15802,6 +15802,55 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
>  }
>
>
> +/* Return true if parallel execution of the two word-size accesses provided
> +   could be satisfied with a single LDRD/STRD instruction. Two word-size
> +   accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
> +   register operands and OPERANDS[2,3] are the corresponding memory operands.
> +   */
> +bool
> +valid_operands_ldrd_strd (rtx *operands, bool load)
> +{
> +  int nops = 2;
> +  HOST_WIDE_INT offsets[2], offset, align[2];
> +  rtx base = NULL_RTX;
> +  rtx cur_base, cur_offset;
> +  int i, gap;
> +
> +  /* Check that the memory references are immediate offsets from the
> +     same base register.  Extract the base register, the destination
> +     registers, and the corresponding memory offsets.  */
> +  for (i = 0; i < nops; i++)
> +    {
> +      if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
> +                                &align[i]))
> +       return false;
> +
> +      if (i == 0)
> +       base = cur_base;
> +      else if (REGNO (base) != REGNO (cur_base))
> +       return false;
> +
> +      offsets[i] = INTVAL (cur_offset);
> +      if (GET_CODE (operands[i]) == SUBREG)
> +       return false;
> +    }
> +
> +  if (offsets[0] > offsets[1])
> +    return false;
> +
> +  gap = offsets[1] - offsets[0];
> +  offset = offsets[0];
> +
> +  /* Make sure accesses are to consecutive memory locations.  */
> +  if (gap != GET_MODE_SIZE (SImode))
> +    return false;
> +
> +  if (!align_ok_ldrd_strd (align[0], offset))
> +    return false;
> +
> +  return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
> +                               false, load);
> +}
>
>
>
>
>  /* Print a symbolic form of X to the debug file, F.  */
> @@ -28474,6 +28523,26 @@ arm_count_output_move_double_insns (rtx *operands)
>    return count;
>  }
>
> +/* Same as above, but operands are a register/memory pair in SImode.
> +   Assumes operands has the base register in position 0 and memory in position
> +   2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
> +int
> +arm_count_ldrdstrd_insns (rtx *operands, bool load)
> +{
> +  int count;
> +  rtx ops[2];
> +  int regnum, memnum;
> +  if (load)
> +    regnum = 0, memnum = 1;
> +  else
> +    regnum = 1, memnum = 0;
> +  ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
> +  ops[memnum] = adjust_address (operands[2], DImode, 0);
> +  output_move_double (ops, false, &count);
> +  return count;
> +}
> +
> +
>  int
>  vfp3_const_double_for_fract_bits (rtx operand)
>  {
> diff --git a/gcc/config/arm/ldrdstrd.md b/gcc/config/arm/ldrdstrd.md
> index be53d010fa6dfcf5a6854ae2b17f7cfcea25db9e..cb7a6adebbc8084a2e642ff2dcbef8b3fb16f268 100644
> --- a/gcc/config/arm/ldrdstrd.md
> +++ b/gcc/config/arm/ldrdstrd.md
> @@ -23,37 +23,22 @@
>  ;; The following peephole optimizations identify consecutive memory
>  ;; accesses, and try to rearrange the operands to enable generation of
>  ;; ldrd/strd.
> +;;
> +;; In many cases they behave in the same way that patterns in ldmstm.md behave,
> +;; but there is extra logic in gen_operands_ldrd_strd to try and ensure the
> +;; registers used are an (r<N>, r<N + 1>) pair where N is even.
>
>  (define_peephole2 ; ldrd
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 2 "memory_operand" ""))
> +       (match_operand:SI 2 "memory_operand" ""))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 3 "memory_operand" ""))]
> +       (match_operand:SI 3 "memory_operand" ""))]
>    "TARGET_LDRD"
> -  [(const_int 0)]
> +  [(parallel [(set (match_dup 0) (match_dup 2))
> +             (set (match_dup 1) (match_dup 3))])]
>  {
>    if (!gen_operands_ldrd_strd (operands, true, false, false))
>      FAIL;
> -  else if (TARGET_ARM)
> -  {
> -    /* In ARM state, the destination registers of LDRD/STRD must be
> -       consecutive. We emit DImode access.  */
> -    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> -    operands[2] = adjust_address (operands[2], DImode, 0);
> -    /* Emit [(set (match_dup 0) (match_dup 2))] */
> -    emit_insn (gen_rtx_SET (operands[0], operands[2]));
> -    DONE;
> -  }
> -  else if (TARGET_THUMB2)
> -  {
> -    /* Emit the pattern:
> -       [(parallel [(set (match_dup 0) (match_dup 2))
> -                   (set (match_dup 1) (match_dup 3))])] */
> -    rtx t1 = gen_rtx_SET (operands[0], operands[2]);
> -    rtx t2 = gen_rtx_SET (operands[1], operands[3]);
> -    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
> -    DONE;
> -  }
>  })
>
>  (define_peephole2 ; strd
> @@ -62,117 +47,50 @@ (define_peephole2 ; strd
>     (set (match_operand:SI 3 "memory_operand" "")
>          (match_operand:SI 1 "arm_general_register_operand" ""))]
>    "TARGET_LDRD"
> -  [(const_int 0)]
> +  [(parallel [(set (match_dup 2) (match_dup 0))
> +             (set (match_dup 3) (match_dup 1))])]
>  {
>    if (!gen_operands_ldrd_strd (operands, false, false, false))
>      FAIL;
> -  else if (TARGET_ARM)
> -  {
> -    /* In ARM state, the destination registers of LDRD/STRD must be
> -       consecutive. We emit DImode access.  */
> -    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> -    operands[2] = adjust_address (operands[2], DImode, 0);
> -    /* Emit [(set (match_dup 2) (match_dup 0))]  */
> -    emit_insn (gen_rtx_SET (operands[2], operands[0]));
> -    DONE;
> -  }
> -  else if (TARGET_THUMB2)
> -  {
> -    /* Emit the pattern:
> -       [(parallel [(set (match_dup 2) (match_dup 0))
> -                   (set (match_dup 3) (match_dup 1))])]  */
> -    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
> -    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
> -    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
> -    DONE;
> -  }
>  })
>
>  ;; The following peepholes reorder registers to enable LDRD/STRD.
>  (define_peephole2 ; strd of constants
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 4 "const_int_operand" ""))
> +       (match_operand:SI 4 "const_int_operand" ""))
>     (set (match_operand:SI 2 "memory_operand" "")
> -        (match_dup 0))
> +       (match_dup 0))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 5 "const_int_operand" ""))
> +       (match_operand:SI 5 "const_int_operand" ""))
>     (set (match_operand:SI 3 "memory_operand" "")
> -        (match_dup 1))]
> +       (match_dup 1))]
>    "TARGET_LDRD"
> -  [(const_int 0)]
> +  [(set (match_dup 0) (match_dup 4))
> +   (set (match_dup 1) (match_dup 5))
> +   (parallel [(set (match_dup 2) (match_dup 0))
> +             (set (match_dup 3) (match_dup 1))])]
>  {
>    if (!gen_operands_ldrd_strd (operands, false, true, false))
>      FAIL;
> -  else if (TARGET_ARM)
> -  {
> -   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
> -   operands[2] = adjust_address (operands[2], DImode, 0);
> -   /* Emit the pattern:
> -      [(set (match_dup 0) (match_dup 4))
> -      (set (match_dup 1) (match_dup 5))
> -      (set (match_dup 2) tmp)]  */
> -   emit_insn (gen_rtx_SET (operands[0], operands[4]));
> -   emit_insn (gen_rtx_SET (operands[1], operands[5]));
> -   emit_insn (gen_rtx_SET (operands[2], tmp));
> -   DONE;
> -  }
> -  else if (TARGET_THUMB2)
> -  {
> -    /* Emit the pattern:
> -       [(set (match_dup 0) (match_dup 4))
> -        (set (match_dup 1) (match_dup 5))
> -        (parallel [(set (match_dup 2) (match_dup 0))
> -                   (set (match_dup 3) (match_dup 1))])]  */
> -    emit_insn (gen_rtx_SET (operands[0], operands[4]));
> -    emit_insn (gen_rtx_SET (operands[1], operands[5]));
> -    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
> -    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
> -    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
> -    DONE;
> -  }
>  })
>
>  (define_peephole2 ; strd of constants
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 4 "const_int_operand" ""))
> +       (match_operand:SI 4 "const_int_operand" ""))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 5 "const_int_operand" ""))
> +       (match_operand:SI 5 "const_int_operand" ""))
>     (set (match_operand:SI 2 "memory_operand" "")
> -        (match_dup 0))
> +       (match_dup 0))
>     (set (match_operand:SI 3 "memory_operand" "")
> -        (match_dup 1))]
> +       (match_dup 1))]
>    "TARGET_LDRD"
> -  [(const_int 0)]
> +  [(set (match_dup 0) (match_dup 4))
> +   (set (match_dup 1) (match_dup 5))
> +   (parallel [(set (match_dup 2) (match_dup 0))
> +             (set (match_dup 3) (match_dup 1))])]
>  {
>    if (!gen_operands_ldrd_strd (operands, false, true, false))
>       FAIL;
> -  else if (TARGET_ARM)
> -  {
> -   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
> -   operands[2] = adjust_address (operands[2], DImode, 0);
> -   /* Emit the pattern
> -      [(set (match_dup 0) (match_dup 4))
> -       (set (match_dup 1) (match_dup 5))
> -       (set (match_dup 2) tmp)]  */
> -   emit_insn (gen_rtx_SET (operands[0], operands[4]));
> -   emit_insn (gen_rtx_SET (operands[1], operands[5]));
> -   emit_insn (gen_rtx_SET (operands[2], tmp));
> -   DONE;
> -  }
> -  else if (TARGET_THUMB2)
> -  {
> -    /*  Emit the pattern:
> -        [(set (match_dup 0) (match_dup 4))
> -         (set (match_dup 1) (match_dup 5))
> -         (parallel [(set (match_dup 2) (match_dup 0))
> -                    (set (match_dup 3) (match_dup 1))])] */
> -    emit_insn (gen_rtx_SET (operands[0], operands[4]));
> -    emit_insn (gen_rtx_SET (operands[1], operands[5]));
> -    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
> -    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
> -    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
> -    DONE;
> -  }
>  })
>
>  ;; The following two peephole optimizations are only relevant for ARM
> @@ -181,39 +99,32 @@ (define_peephole2 ; strd of constants
>  (define_peephole2 ; swap the destination registers of two loads
>                    ; before a commutative operation.
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 2 "memory_operand" ""))
> +       (match_operand:SI 2 "memory_operand" ""))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 3 "memory_operand" ""))
> +       (match_operand:SI 3 "memory_operand" ""))
>     (set (match_operand:SI 4 "arm_general_register_operand" "")
> -        (match_operator:SI 5 "commutative_binary_operator"
> +       (match_operator:SI 5 "commutative_binary_operator"
>                             [(match_operand 6 "arm_general_register_operand" "")
>                              (match_operand 7 "arm_general_register_operand" "") ]))]
>    "TARGET_LDRD && TARGET_ARM
>     && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
> -        ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
> +       ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
>     && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
>     && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
> -  [(set (match_dup 0) (match_dup 2))
> +  [(parallel [(set (match_dup 0) (match_dup 2))
> +             (set (match_dup 1) (match_dup 3))])
>     (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
> -  {
> -    if (!gen_operands_ldrd_strd (operands, true, false, true))
> -     {
> -        FAIL;
> -     }
> -    else
> -     {
> -        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> -        operands[2] = adjust_address (operands[2], DImode, 0);
> -     }
> -   }
> -)
> +{
> +  if (!gen_operands_ldrd_strd (operands, true, false, true))
> +    FAIL;
> +})
>
>  (define_peephole2 ; swap the destination registers of two loads
>                    ; before a commutative operation that sets the flags.
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 2 "memory_operand" ""))
> +       (match_operand:SI 2 "memory_operand" ""))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 3 "memory_operand" ""))
> +       (match_operand:SI 3 "memory_operand" ""))
>     (parallel
>        [(set (match_operand:SI 4 "arm_general_register_operand" "")
>              (match_operator:SI 5 "commutative_binary_operator"
> @@ -225,24 +136,62 @@ (define_peephole2 ; swap the destination registers of two loads
>         ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
>     && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
>     && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
> -  [(set (match_dup 0) (match_dup 2))
> +  [(parallel [(set (match_dup 0) (match_dup 2))
> +             (set (match_dup 1) (match_dup 3))])
>     (parallel
>        [(set (match_dup 4)
>              (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
>         (clobber (reg:CC CC_REGNUM))])]
> -  {
> -    if (!gen_operands_ldrd_strd (operands, true, false, true))
> -     {
> -        FAIL;
> -     }
> -    else
> -     {
> -        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> -        operands[2] = adjust_address (operands[2], DImode, 0);
> -     }
> -   }
> -)
> +{
> +  if (!gen_operands_ldrd_strd (operands, true, false, true))
> +    FAIL;
> +})
>
>  ;; TODO: Handle LDRD/STRD with writeback:
>  ;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
>  ;; (b) Patterns may be followed by an update of the base address.
> +
> +
> +;; insns matching the LDRD/STRD patterns that will get created by the above
> +;; peepholes.
> +;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
> +;; operands are not changed.
> +(define_insn "*arm_ldrd"
> +  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
> +                  (match_operand:SI 2 "memory_operand" "m"))
> +             (set (match_operand:SI 1 "s_register_operand" "=r")
> +                  (match_operand:SI 3 "memory_operand" "m"))])]
> +  "TARGET_LDRD && TARGET_ARM && reload_completed
> +  && valid_operands_ldrd_strd (operands, true)"
> +  {
> +    rtx op[2];
> +    op[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> +    op[1] = adjust_address (operands[2], DImode, 0);
> +    return output_move_double (op, true, NULL);
> +  }
> +  [(set (attr "length")
> +       (symbol_ref "arm_count_ldrdstrd_insns (operands, true) * 4"))
> +   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
> +   (set_attr "type" "load_8")
> +   (set_attr "predicable" "yes")]
> +)
> +
> +(define_insn "*arm_strd"
> +  [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
> +                  (match_operand:SI 0 "s_register_operand" "r"))
> +             (set (match_operand:SI 3 "memory_operand" "=m")
> +                  (match_operand:SI 1 "s_register_operand" "r"))])]
> +  "TARGET_LDRD && TARGET_ARM && reload_completed
> +  && valid_operands_ldrd_strd (operands, false)"
> +  {
> +    rtx op[2];
> +    op[0] = adjust_address (operands[2], DImode, 0);
> +    op[1] = gen_rtx_REG (DImode, REGNO (operands[0]));
> +    return output_move_double (op, true, NULL);
> +  }
> +  [(set (attr "length")
> +       (symbol_ref "arm_count_ldrdstrd_insns (operands, false) * 4"))
> +   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
> +   (set_attr "type" "store_8")
> +   (set_attr "predicable" "yes")]
> +)
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr88714.c b/gcc/testsuite/gcc.c-torture/execute/pr88714.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..614ad9ac4a0662ba752532270e2d687505504d48
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr88714.c
> @@ -0,0 +1,43 @@
> +/* PR bootstrap/88714 */
> +
> +struct S { int a, b, c; int *d; };
> +struct T { int *e, *f, *g; } *t = 0;
> +int *o = 0;
> +
> +__attribute__((noipa))
> +void bar (int *x, int y, int z, int w)
> +{
> +  if (w == -1)
> +    {
> +      if (x != 0 || y != 0 || z != 0)
> +       __builtin_abort ();
> +    }
> +  else if (w != 0 || x != t->g || y != 0 || z != 12)
> +    __builtin_abort ();
> +}
> +
> +__attribute__((noipa)) void
> +foo (struct S *x, struct S *y, int *z, int w)
> +{
> +  *o = w;
> +  if (w)
> +    bar (0, 0, 0, -1);
> +  x->d = z;
> +  if (y->d)
> +    y->c = y->c + y->d[0];
> +  bar (t->g, 0, y->c, 0);
> +}
> +
> +int
> +main ()
> +{
> +  int a[4] = { 8, 9, 10, 11 };
> +  struct S s = { 1, 2, 3, &a[0] };
> +  struct T u = { 0, 0, &a[3] };
> +  o = &a[2];
> +  t = &u;
> +  foo (&s, &s, &a[1], 5);
> +  if (s.c != 12 || s.d != &a[1])
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..ff209c5df29765441bbe9481ac8caf7bbc6af8f7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
> @@ -0,0 +1,441 @@
> +/* { dg-do compile } */
> +/* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* } { "-mthumb" } { "" } } */
> +/* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */
> +
> +/*
> +   Test file contains testcases that are there to check.
> +      1) Each peephole generates the expected patterns.
> +      2) These patterns match the expected define_insns and generate ldrd/strd.
> +      2) Memory alias information is not lost in the peephole transformation.
> +
> +   I don't check the peephole pass on most of the functions here but just check
> +   the correct assembly is output.  The ldrd/strd peepholes only generate a
> +   different pattern to the ldm/stm peepholes in some specific cases, and those
> +   are checked.
> +
> +   The exceptions are tested by the crafted testcases at the end of this file
> +   that are named in the pattern foo_x[[:digit:]].
> +
> +   The first testcase (foo_mem_11) demonstrates bug 88714 is fixed by checking
> +   that both alias sets in the RTL are preserved.
> +
> +   All other testcases are only checked to see that they generate a LDRD or
> +   STRD instruction accordingly.
> + */
> +
> +
> +/* Example of bugzilla 88714 -- memory aliasing info needs to be retained.  */
> +int __RTL (startwith ("peephole2")) foo_mem_11 (int *a, int *b)
> +{
> +(function "foo_mem_11"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (reg:SI r0) [1 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (reg:SI r3)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [2 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* { dg-final { scan-rtl-dump {Function foo_mem_11.*\(mem/c:SI[^\n]*\[1.*\(mem/c:SI[^\n]*\n[^\n]*\[2.*Function foo11} "peephole2" } } */
> +
> +/* ldrd plain peephole2.  */
> +int __RTL (startwith ("peephole2")) foo11 (int *a)
> +{
> +(function "foo11"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (reg:SI r3)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* ldrd plain peephole2, which accepts insns initially out of order.  */
> +int __RTL (startwith ("peephole2")) foo11_alt (int *a)
> +{
> +(function "foo11_alt"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 102 (set (reg:SI r3)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* strd plain peephole2.  */
> +int __RTL (startwith ("peephole2")) foo12 (int *a)
> +{
> +(function "foo12"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* strd of constants -- store interleaved with constant move into register.
> +   Use same register twice to ensure we use the relevant pattern.  */
> +int __RTL (startwith ("peephole2")) foo13 (int *a)
> +{
> +(function "foo13"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r2)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r2)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* strd of constants -- stores after constant moves into registers.
> +   Use registers out of order, is only way to avoid plain strd while hitting
> +   this pattern.  */
> +int __RTL (startwith ("peephole2")) foo14 (int *a)
> +{
> +(function "foo14"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r3)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r2)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* swap the destination registers of two loads before a commutative operation.
> +   Here the commutative operation is what the peephole uses to know it can
> +   swap the register loads around.  */
> +int __RTL (startwith ("peephole2")) foo15 (int *a)
> +{
> +(function "foo15"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 100 (set (reg:SI r3)
> +                     (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18
> +       (expr_list:REG_DEAD (reg:SI r2)
> +       (expr_list:REG_DEAD (reg:SI r3)
> +        (nil))))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +
> +/* swap the destination registers of two loads before a commutative operation
> +   that sets the flags.  */
> +/*
> +   NOTE Can't make a testcase for this pattern since there are no insn patterns
> +   matching the parallel insn in the peephole.
> +
> +   i.e. until some define_insn is defined matching that insn that peephole can
> +   never match in real code, and in artificial RTL code any pattern that can
> +   match it will cause an ICE.
> +
> +int __RTL (startwith ("peephole2")) foo16 (int *a)
> +{
> +(function "foo16"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 100 (set (reg:SI r3)
> +                     (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (parallel
> +                 [(set (reg:SI r0)
> +                       (and:SI (reg:SI r3) (reg:SI r2)))
> +                 (clobber (reg:CC cc))]) "/home/matmal01/test.c":18
> +       (expr_list:REG_DEAD (reg:SI r2)
> +       (expr_list:REG_DEAD (reg:SI r3)
> +        (nil))))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +*/
> +
> +
> +/* Making patterns that will behave differently between the LDM/STM peepholes
> +   and LDRD/STRD peepholes.
> +   gen_operands_ldrd_strd() uses peep2_find_free_register() to find spare
> +   registers to use.
> +   peep2_find_free_register() only ever returns registers marked in
> +   call_used_regs, hence we make sure to leave register 2 and 3 available (as
> +   they are always on in the defaults marked by CALL_USED_REGISTERS).  */
> +
> +/* gen_operands_ldrd_strd() purposefully finds an even register to look at
> +   which would treat the following pattern differently to the stm peepholes.
> + */
> +int __RTL (startwith ("peephole2")) foo_x1 (int *a)
> +{
> +(function "foo_x1"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r5)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r5)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r5)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r5)) "/home/matmal01/test.c":18
> +       (expr_list:REG_DEAD (reg:SI r5)
> +        (nil)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure we generated a parallel that started with a set from an even register.
> +   i.e.
> +   (parallel [
> +     (set (mem
> +         (reg:SI <even>
> +   */
> +/* { dg-final { scan-rtl-dump {Function foo_x1.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x2} "peephole2" } } */
> +
> +/* Like above gen_operands_ldrd_strd() would look to start with an even
> +   register while gen_const_stm_seq() doesn't care.  */
> +int __RTL (startwith ("peephole2")) foo_x2 (int *a)
> +{
> +(function "foo_x2"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r5)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r6)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r5)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r6)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure generated parallel starts with a set from an even register (as foo_x1).  */
> +/* { dg-final { scan-rtl-dump {Function foo_x2.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x3} "peephole2" } } */
> +
> +/* When storing multiple values into a register that will be used later, ldrd
> +   searches for another register to use instead of just giving up.  */
> +int __RTL (startwith ("peephole2")) foo_x3 (int *a)
> +{
> +(function "foo_x3"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r3)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r3)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r0) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure generated parallel starts with a set from an even register (as foo_x1).  */
> +/* { dg-final { scan-rtl-dump {Function foo_x3.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x4} "peephole2" } } */
> +
> +/* ldrd gen_peephole2_11 but using plus 8 and plus 12 in the offsets.  */
> +int __RTL (startwith ("peephole2")) foo_x4 (int *a)
> +{
> +(function "foo_x4"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (reg:SI r3)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure generated parallel starts with a set from the appropriate offset from
> +   register 0.
> +(parallel [
> +           (set (reg:SI ...
> +               (mem/c:SI (plus:SI (reg:SI 0 r0)
> +                       (const_int 8 .*
> +*/
> +/* { dg-final { scan-rtl-dump {Function foo_x4.*\(parallel \[\n[^\n]*\(set \(reg:SI[^\n]*\n *\(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*Function foo_x5} "peephole2" } } */
> +
> +/* strd gen_peephole2_12 but using plus 8 and plus 12 in the offsets.  */
> +int __RTL (startwith ("peephole2")) foo_x5 (int *a)
> +{
> +(function "foo12"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure generated parallel starts with a set to the appropriate offset from
> +   register 0.  */
> +/* { dg-final { scan-rtl-dump {Function foo_x5.*\(parallel \[\n[^\n]*\(set \(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*$} "peephole2" } } */
> +
> +
> +/* { dg-final { scan-assembler-not "ldm" } } */
> +/* { dg-final { scan-assembler-not "stm" } } */
> +/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0\]} 4 } } */
> +/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0, #8\]} 1 } } */
> +/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0\]} 6 } } */
> +/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0, #8\]} 1 } } */
>
Matthew Malcomson Feb. 7, 2019, 2:57 p.m. UTC | #2
>>
> 
> Please add the PR marker to the testsuite ChangeLog as well.
> I've been following this PR a bit from the sidelines, I believe a 
> substantial amount of code
> (and one of the testcases) was written by Jakub, so please add him to 
> the ChangeLog entries as well.
> 
> This looks ok to me.
> Thanks for fixing this and thanks Jakub for the analysis and fixes too!
> 
> Kyrill
> 

Thanks Kyrill,
I've committed with the changelog below.


gcc/ChangeLog:

2019-02-07  Matthew Malcomson  <matthew.malcomson@arm.com>
	    Jakub Jelinek  <jakub@redhat.com>

	PR bootstrap/88714
	* config/arm/arm-protos.h (valid_operands_ldrd_strd,
	arm_count_ldrdstrd_insns): New declarations.
	* config/arm/arm.c (mem_ok_for_ldrd_strd): Remove broken handling of
	MINUS.
	(valid_operands_ldrd_strd): New function.
	(arm_count_ldrdstrd_insns): New function.
	* config/arm/ldrdstrd.md: Change peepholes to generate PARALLEL SImode
	sets instead of single DImode set and define new insns to match this.


gcc/testsuite/ChangeLog:

2019-02-07  Matthew Malcomson  <matthew.malcomson@arm.com>
	    Jakub Jelinek  <jakub@redhat.com>

	PR bootstrap/88714
	* gcc.c-torture/execute/pr88714.c: New test.
	* gcc.dg/rtl/arm/ldrd-peepholes.c: New test.
Christophe Lyon Feb. 8, 2019, 9:18 a.m. UTC | #3
On Tue, 5 Feb 2019 at 15:44, Matthew Malcomson
<Matthew.Malcomson@arm.com> wrote:
>
> These peepholes match a pair of SImode loads or stores that can be
> implemented with a single LDRD or STRD instruction.
> When compiling for TARGET_ARM, these peepholes originally created a set
> pattern in DI mode to be caught by movdi patterns.
>
> This approach failed to take into account the possibility that the two
> matched insns operated on memory with different aliasing information.
> The peepholes lost the aliasing information on one of the insns, which
> could then cause the scheduler to make an invalid transformation.
>
> This patch changes the peepholes so they generate a PARALLEL expression
> of the two relevant loads or stores, which means the aliasing
> information of both is kept.  Such a PARALLEL pattern is what the
> peepholes currently produce for TARGET_THUMB2.
>
> In order to match these new insn patterns, we add two new define_insn's.  These
> define_insn's use the same checks as the peepholes to find valid insns.
>
> Note that the patterns now created by the peepholes for LDRD and STRD
> are very similar to those created by the peepholes for LDM and STM.
> Many patterns could be matched by the LDM and STM define_insns, which
> means we rely on the order the define_insn patterns are defined in the
> machine description, with those for LDRD/STRD defined before those for
> LDM/STM.
>
> The difference between the peepholes for LDRD/STRD and those for LDM/STM
> are mainly that those for LDRD/STRD have some logic to ensure that the
> two registers are consecutive and the first one is even.
>
> Bootstrapped and regtested on arm-none-linux-gnu.
> Demonstrated fix of bug 88714 by bootstrapping on armv7l.
>
>
> gcc/ChangeLog:
>
> 2019-02-05  Matthew Malcomson  <matthew.malcomson@arm.com>
>
>         PR bootstrap/88714
>         * config/arm/arm-protos.h (valid_operands_ldrd_strd,
>         arm_count_ldrdstrd_insns): New declarations.
>         * config/arm/arm.c (mem_ok_for_ldrd_strd): Remove broken handling of
>         MINUS.
>         (valid_operands_ldrd_strd): New function.
>         (arm_count_ldrdstrd_insns): New function.
>         * config/arm/ldrdstrd.md: Change peepholes to generate PARALLEL SImode
>         sets instead of single DImode set and define new insns to match this.
>
> gcc/testsuite/ChangeLog:
>
> 2019-02-05  Matthew Malcomson  <matthew.malcomson@arm.com>
>
>         * gcc.c-torture/execute/pr88714.c: New test.
>         * gcc.dg/rtl/arm/ldrd-peepholes.c: New test.
>

Hi!

I'm afaid this patch causes several regressions. Maybe they have
already been fixed post-commit (I have several validations for later
commits still running)?

For the whole picture, see:
http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/268644/report-build-info.html

Namely there are some ICEs:
--target arm-none-linux-gnueabi
--with-mode arm
--with-cpu cortex-a9
--with-fpu default
    gcc.c-torture/execute/builtins/memcpy-chk.c compilation,  -O2
-flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler
error)
    gcc.c-torture/execute/builtins/memmove-chk.c compilation,  -O2
-flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler
error)
    gcc.c-torture/execute/builtins/mempcpy-chk.c compilation,  -O2
-flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler
error)
    gcc.c-torture/execute/builtins/memset-chk.c compilation,  -O2
-flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler
error)
    gcc.c-torture/execute/builtins/sprintf-chk.c compilation,  -O2
(internal compiler error)
    gcc.c-torture/execute/builtins/sprintf-chk.c compilation,  -O2
-flto -fno-use-linker-plugin -flto-partition=none  (internal compiler
error)
    gcc.c-torture/execute/builtins/sprintf-chk.c compilation,  -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions  (internal compiler error)
    gcc.c-torture/execute/builtins/sprintf-chk.c compilation,  -O3 -g
(internal compiler error)
    gcc.c-torture/execute/builtins/stpcpy-chk.c compilation,  -O2
(internal compiler error)
    gcc.c-torture/execute/builtins/stpcpy-chk.c compilation,  -O2
-flto -fno-use-linker-plugin -flto-partition=none  (internal compiler
error)
    gcc.c-torture/execute/builtins/stpcpy-chk.c compilation,  -O2
-flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler
error)
    gcc.c-torture/execute/builtins/stpcpy-chk.c compilation,  -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions  (internal compiler error)
    gcc.c-torture/execute/builtins/stpcpy-chk.c compilation,  -O3 -g
(internal compiler error)
    gcc.c-torture/execute/builtins/strcat-chk.c compilation,  -O2
(internal compiler error)
    gcc.c-torture/execute/builtins/strcat-chk.c compilation,  -O2
-flto -fno-use-linker-plugin -flto-partition=none  (internal compiler
error)
    gcc.c-torture/execute/builtins/strcat-chk.c compilation,  -O2
-flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler
error)
    gcc.c-torture/execute/builtins/strcat-chk.c compilation,  -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions  (internal compiler error)
    gcc.c-torture/execute/builtins/strcat-chk.c compilation,  -O3 -g
(internal compiler error)
    gcc.c-torture/execute/builtins/strncat-chk.c compilation,  -O2
-flto -fuse-linker-plugin -fno-fat-lto-objects  (internal compiler
error)

Failing assembler scans:
--target arm-none-linux-gnueabi
--with-mode arm
--with-cpu cortex-a9
--with-fpu default
Dejagnu flags: -march=armv5t
(ie. same config as above, but forcing -march=armv5t when running the
tests: this avoids the ICE, but the scan-assembler fails)
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-assembler-not ldm
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-assembler-not stm
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-assembler-times
ldrd\\tr[2468], \\[r0, #8\\] 1
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-assembler-times
ldrd\\tr[2468], \\[r0\\] 4
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-assembler-times
strd\\tr[2468], \\[r0, #8\\] 1
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-assembler-times
strd\\tr[2468], \\[r0\\] 6
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-rtl-dump peephole2 "Function
foo_x1.*\\(parallel \\[\\n[^\\n]*\\(set
\\(mem[^\\n]*\\n[^\\n]*\\(reg:SI (?:[12])?[2468]
r(?:[12])?[2468]\\).*Function foo_x2"
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-rtl-dump peephole2 "Function
foo_x2.*\\(parallel \\[\\n[^\\n]*\\(set
\\(mem[^\\n]*\\n[^\\n]*\\(reg:SI (?:[12])?[2468]
r(?:[12])?[2468]\\).*Function foo_x3"
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-rtl-dump peephole2 "Function
foo_x3.*\\(parallel \\[\\n[^\\n]*\\(set
\\(mem[^\\n]*\\n[^\\n]*\\(reg:SI (?:[12])?[2468]
r(?:[12])?[2468]\\).*Function foo_x4"
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-rtl-dump peephole2 "Function
foo_x4.*\\(parallel \\[\\n[^\\n]*\\(set \\(reg:SI[^\\n]*\\n
*\\(mem/c:SI \\(plus:SI \\(reg:SI 0 r0\\)\\n *\\(const_int 8.*Function
foo_x5"
    gcc.dg/rtl/arm/ldrd-peepholes.c scan-rtl-dump peephole2 "Function
foo_x5.*\\(parallel \\[\\n[^\\n]*\\(set \\(mem/c:SI \\(plus:SI
\\(reg:SI 0 r0\\)\\n *\\(const_int 8.*$"

A few more ICEs:
--target arm-none-linux-gnueabihf
--with-mode arm
--with-cpu cortex-a57
--with-fpu crypto-neon-fp-armv8
    gcc.dg/torture/stackalign/nested-6.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none -fpic (internal compiler
error)
    gcc.dg/torture/stackalign/nested-6.c   -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects -fpic (internal compiler
error)
    gcc.dg/torture/stackalign/nested-6.c   -O2 -fpic (internal compiler error)
    gcc.dg/torture/stackalign/nested-6.c   -O3 -g -fpic (internal
compiler error)

And GCC fails to build:
--target arm-none-linux-gnueabihf
--with-mode arm
--with-cpu cortex-a5
--with-fpu vfpv3-d16-fp16
when compiling libsanitizer/libacktrace:
Makefile:564: recipe for target 'cp-demangle.lo' failed
make[4]: *** [cp-demangle.lo] Error 1

/home/christophe.lyon/src/GCC/sources/gcc-fsf/trunk/libsanitizer/libbacktrace/../../libiberty/cp-demangle.c:
In function 'is_ctor_or_dtor':
/home/christophe.lyon/src/GCC/sources/gcc-fsf/trunk/libsanitizer/libbacktrace/../../libiberty/cp-demangle.c:6615:1:
error: insn does not satisfy its constraints:
 6615 | }
      | ^
(insn 236 107 218 2 (parallel [
            (set (mem/c:SI (plus:SI (reg/f:SI 11 fp)
                        (const_int -56 [0xffffffffffffffc8])) [1
di.num_comps+0 S4 A32])
                (reg:SI 12 ip [131]))
            (set (mem/f/c:SI (plus:SI (reg/f:SI 11 fp)
                        (const_int -52 [0xffffffffffffffcc])) [26
di.subs+0 S4 A32])
                (reg/f:SI 13 sp))
        ]) "/home/christophe.lyon/src/GCC/sources/gcc-fsf/trunk/libsanitizer/libbacktrace/../../libiberty/cp-demangle.c":6567:13
347 {*arm_strd}
     (nil))
during RTL pass: cprop_hardreg
/home/christophe.lyon/src/GCC/sources/gcc-fsf/trunk/libsanitizer/libbacktrace/../../libiberty/cp-demangle.c:6615:1:
internal compiler error: in extract_constrain_insn, at recog.c:2211
0x5a8795 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
        /home/christophe.lyon/src/GCC/sources/gcc-fsf/trunk/gcc/rtl-error.c:108
0x5a87bb _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
        /home/christophe.lyon/src/GCC/sources/gcc-fsf/trunk/gcc/rtl-error.c:119
0xb7c15d extract_constrain_insn(rtx_insn*)
        /home/christophe.lyon/src/GCC/sources/gcc-fsf/trunk/gcc/recog.c:2211
0xb7fd26 copyprop_hardreg_forward_1
        /home/christophe.lyon/src/GCC/sources/gcc-fsf/trunk/gcc/regcprop.c:801
0xb80b22 execute
        /home/christophe.lyon/src/GCC/sources/gcc-fsf/trunk/gcc/regcprop.c:1307

Thanks,

Christophe

>
>
> ###############     Attachment also inlined for ease of reply    ###############
>
>
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index 79ede0db174fcce87abe8b4d18893550d4c7e2f6..485bc68a618d6ae4a1640368ccb025fe2c9e1420 100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -125,6 +125,7 @@ extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
>  extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
>  extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
>  extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
> +extern bool valid_operands_ldrd_strd (rtx *, bool);
>  extern int arm_gen_movmemqi (rtx *);
>  extern bool gen_movmem_ldrd_strd (rtx *);
>  extern machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
> @@ -146,6 +147,7 @@ extern const char *output_mov_long_double_arm_from_arm (rtx *);
>  extern const char *output_move_double (rtx *, bool, int *count);
>  extern const char *output_move_quad (rtx *);
>  extern int arm_count_output_move_double_insns (rtx *);
> +extern int arm_count_ldrdstrd_insns (rtx *, bool);
>  extern const char *output_move_vfp (rtx *operands);
>  extern const char *output_move_neon (rtx *operands);
>  extern int arm_attr_length_move_neon (rtx_insn *);
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 73cb8df9af1ec9d680091bb8691bcd925a1be1d3..1c336da9e5b0948ef1058c46966364510dc1ca38 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -15556,7 +15556,7 @@ mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
>        *base = addr;
>        return true;
>      }
> -  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
> +  else if (GET_CODE (addr) == PLUS)
>      {
>        *base = XEXP (addr, 0);
>        *offset = XEXP (addr, 1);
> @@ -15721,7 +15721,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
>      }
>
>    /* Make sure accesses are to consecutive memory locations.  */
> -  if (gap != 4)
> +  if (gap != GET_MODE_SIZE (SImode))
>      return false;
>
>    if (!align_ok_ldrd_strd (align[0], offset))
> @@ -15802,6 +15802,55 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
>  }
>
>
> +/* Return true if parallel execution of the two word-size accesses provided
> +   could be satisfied with a single LDRD/STRD instruction.  Two word-size
> +   accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
> +   register operands and OPERANDS[2,3] are the corresponding memory operands.
> +   */
> +bool
> +valid_operands_ldrd_strd (rtx *operands, bool load)
> +{
> +  int nops = 2;
> +  HOST_WIDE_INT offsets[2], offset, align[2];
> +  rtx base = NULL_RTX;
> +  rtx cur_base, cur_offset;
> +  int i, gap;
> +
> +  /* Check that the memory references are immediate offsets from the
> +     same base register.  Extract the base register, the destination
> +     registers, and the corresponding memory offsets.  */
> +  for (i = 0; i < nops; i++)
> +    {
> +      if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
> +                                &align[i]))
> +       return false;
> +
> +      if (i == 0)
> +       base = cur_base;
> +      else if (REGNO (base) != REGNO (cur_base))
> +       return false;
> +
> +      offsets[i] = INTVAL (cur_offset);
> +      if (GET_CODE (operands[i]) == SUBREG)
> +       return false;
> +    }
> +
> +  if (offsets[0] > offsets[1])
> +    return false;
> +
> +  gap = offsets[1] - offsets[0];
> +  offset = offsets[0];
> +
> +  /* Make sure accesses are to consecutive memory locations.  */
> +  if (gap != GET_MODE_SIZE (SImode))
> +    return false;
> +
> +  if (!align_ok_ldrd_strd (align[0], offset))
> +    return false;
> +
> +  return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
> +                               false, load);
> +}
>
>
>
>
>  /* Print a symbolic form of X to the debug file, F.  */
> @@ -28474,6 +28523,26 @@ arm_count_output_move_double_insns (rtx *operands)
>    return count;
>  }
>
> +/* Same as above, but operands are a register/memory pair in SImode.
> +   Assumes operands has the base register in position 0 and memory in position
> +   2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
> +int
> +arm_count_ldrdstrd_insns (rtx *operands, bool load)
> +{
> +  int count;
> +  rtx ops[2];
> +  int regnum, memnum;
> +  if (load)
> +    regnum = 0, memnum = 1;
> +  else
> +    regnum = 1, memnum = 0;
> +  ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
> +  ops[memnum] = adjust_address (operands[2], DImode, 0);
> +  output_move_double (ops, false, &count);
> +  return count;
> +}
> +
> +
>  int
>  vfp3_const_double_for_fract_bits (rtx operand)
>  {
> diff --git a/gcc/config/arm/ldrdstrd.md b/gcc/config/arm/ldrdstrd.md
> index be53d010fa6dfcf5a6854ae2b17f7cfcea25db9e..cb7a6adebbc8084a2e642ff2dcbef8b3fb16f268 100644
> --- a/gcc/config/arm/ldrdstrd.md
> +++ b/gcc/config/arm/ldrdstrd.md
> @@ -23,37 +23,22 @@
>  ;; The following peephole optimizations identify consecutive memory
>  ;; accesses, and try to rearrange the operands to enable generation of
>  ;; ldrd/strd.
> +;;
> +;; In many cases they behave in the same way that patterns in ldmstm.md behave,
> +;; but there is extra logic in gen_operands_ldrd_strd to try and ensure the
> +;; registers used are an (r<N>, r<N + 1>) pair where N is even.
>
>  (define_peephole2 ; ldrd
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 2 "memory_operand" ""))
> +       (match_operand:SI 2 "memory_operand" ""))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 3 "memory_operand" ""))]
> +       (match_operand:SI 3 "memory_operand" ""))]
>    "TARGET_LDRD"
> -  [(const_int 0)]
> +  [(parallel [(set (match_dup 0) (match_dup 2))
> +             (set (match_dup 1) (match_dup 3))])]
>  {
>    if (!gen_operands_ldrd_strd (operands, true, false, false))
>      FAIL;
> -  else if (TARGET_ARM)
> -  {
> -    /* In ARM state, the destination registers of LDRD/STRD must be
> -       consecutive. We emit DImode access.  */
> -    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> -    operands[2] = adjust_address (operands[2], DImode, 0);
> -    /* Emit [(set (match_dup 0) (match_dup 2))] */
> -    emit_insn (gen_rtx_SET (operands[0], operands[2]));
> -    DONE;
> -  }
> -  else if (TARGET_THUMB2)
> -  {
> -    /* Emit the pattern:
> -       [(parallel [(set (match_dup 0) (match_dup 2))
> -                   (set (match_dup 1) (match_dup 3))])] */
> -    rtx t1 = gen_rtx_SET (operands[0], operands[2]);
> -    rtx t2 = gen_rtx_SET (operands[1], operands[3]);
> -    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
> -    DONE;
> -  }
>  })
>
>  (define_peephole2 ; strd
> @@ -62,117 +47,50 @@ (define_peephole2 ; strd
>     (set (match_operand:SI 3 "memory_operand" "")
>         (match_operand:SI 1 "arm_general_register_operand" ""))]
>    "TARGET_LDRD"
> -  [(const_int 0)]
> +  [(parallel [(set (match_dup 2) (match_dup 0))
> +             (set (match_dup 3) (match_dup 1))])]
>  {
>    if (!gen_operands_ldrd_strd (operands, false, false, false))
>      FAIL;
> -  else if (TARGET_ARM)
> -  {
> -    /* In ARM state, the destination registers of LDRD/STRD must be
> -       consecutive. We emit DImode access.  */
> -    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> -    operands[2] = adjust_address (operands[2], DImode, 0);
> -    /* Emit [(set (match_dup 2) (match_dup 0))]  */
> -    emit_insn (gen_rtx_SET (operands[2], operands[0]));
> -    DONE;
> -  }
> -  else if (TARGET_THUMB2)
> -  {
> -    /* Emit the pattern:
> -       [(parallel [(set (match_dup 2) (match_dup 0))
> -                   (set (match_dup 3) (match_dup 1))])]  */
> -    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
> -    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
> -    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
> -    DONE;
> -  }
>  })
>
>  ;; The following peepholes reorder registers to enable LDRD/STRD.
>  (define_peephole2 ; strd of constants
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 4 "const_int_operand" ""))
> +       (match_operand:SI 4 "const_int_operand" ""))
>     (set (match_operand:SI 2 "memory_operand" "")
> -        (match_dup 0))
> +       (match_dup 0))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 5 "const_int_operand" ""))
> +       (match_operand:SI 5 "const_int_operand" ""))
>     (set (match_operand:SI 3 "memory_operand" "")
> -        (match_dup 1))]
> +       (match_dup 1))]
>    "TARGET_LDRD"
> -  [(const_int 0)]
> +  [(set (match_dup 0) (match_dup 4))
> +   (set (match_dup 1) (match_dup 5))
> +   (parallel [(set (match_dup 2) (match_dup 0))
> +             (set (match_dup 3) (match_dup 1))])]
>  {
>    if (!gen_operands_ldrd_strd (operands, false, true, false))
>      FAIL;
> -  else if (TARGET_ARM)
> -  {
> -   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
> -   operands[2] = adjust_address (operands[2], DImode, 0);
> -   /* Emit the pattern:
> -      [(set (match_dup 0) (match_dup 4))
> -      (set (match_dup 1) (match_dup 5))
> -      (set (match_dup 2) tmp)]  */
> -   emit_insn (gen_rtx_SET (operands[0], operands[4]));
> -   emit_insn (gen_rtx_SET (operands[1], operands[5]));
> -   emit_insn (gen_rtx_SET (operands[2], tmp));
> -   DONE;
> -  }
> -  else if (TARGET_THUMB2)
> -  {
> -    /* Emit the pattern:
> -       [(set (match_dup 0) (match_dup 4))
> -        (set (match_dup 1) (match_dup 5))
> -        (parallel [(set (match_dup 2) (match_dup 0))
> -                   (set (match_dup 3) (match_dup 1))])]  */
> -    emit_insn (gen_rtx_SET (operands[0], operands[4]));
> -    emit_insn (gen_rtx_SET (operands[1], operands[5]));
> -    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
> -    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
> -    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
> -    DONE;
> -  }
>  })
>
>  (define_peephole2 ; strd of constants
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 4 "const_int_operand" ""))
> +       (match_operand:SI 4 "const_int_operand" ""))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 5 "const_int_operand" ""))
> +       (match_operand:SI 5 "const_int_operand" ""))
>     (set (match_operand:SI 2 "memory_operand" "")
> -        (match_dup 0))
> +       (match_dup 0))
>     (set (match_operand:SI 3 "memory_operand" "")
> -        (match_dup 1))]
> +       (match_dup 1))]
>    "TARGET_LDRD"
> -  [(const_int 0)]
> +  [(set (match_dup 0) (match_dup 4))
> +   (set (match_dup 1) (match_dup 5))
> +   (parallel [(set (match_dup 2) (match_dup 0))
> +             (set (match_dup 3) (match_dup 1))])]
>  {
>    if (!gen_operands_ldrd_strd (operands, false, true, false))
>       FAIL;
> -  else if (TARGET_ARM)
> -  {
> -   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
> -   operands[2] = adjust_address (operands[2], DImode, 0);
> -   /* Emit the pattern
> -      [(set (match_dup 0) (match_dup 4))
> -       (set (match_dup 1) (match_dup 5))
> -       (set (match_dup 2) tmp)]  */
> -   emit_insn (gen_rtx_SET (operands[0], operands[4]));
> -   emit_insn (gen_rtx_SET (operands[1], operands[5]));
> -   emit_insn (gen_rtx_SET (operands[2], tmp));
> -   DONE;
> -  }
> -  else if (TARGET_THUMB2)
> -  {
> -    /*  Emit the pattern:
> -        [(set (match_dup 0) (match_dup 4))
> -         (set (match_dup 1) (match_dup 5))
> -         (parallel [(set (match_dup 2) (match_dup 0))
> -                    (set (match_dup 3) (match_dup 1))])]  */
> -    emit_insn (gen_rtx_SET (operands[0], operands[4]));
> -    emit_insn (gen_rtx_SET (operands[1], operands[5]));
> -    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
> -    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
> -    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
> -    DONE;
> -  }
>  })
>
>  ;; The following two peephole optimizations are only relevant for ARM
> @@ -181,39 +99,32 @@ (define_peephole2 ; strd of constants
>  (define_peephole2 ; swap the destination registers of two loads
>                   ; before a commutative operation.
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 2 "memory_operand" ""))
> +       (match_operand:SI 2 "memory_operand" ""))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 3 "memory_operand" ""))
> +       (match_operand:SI 3 "memory_operand" ""))
>     (set (match_operand:SI 4 "arm_general_register_operand" "")
> -        (match_operator:SI 5 "commutative_binary_operator"
> +       (match_operator:SI 5 "commutative_binary_operator"
>                            [(match_operand 6 "arm_general_register_operand" "")
>                             (match_operand 7 "arm_general_register_operand" "") ]))]
>    "TARGET_LDRD && TARGET_ARM
>     && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
> -        ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
> +       ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
>     && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
>     && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
> -  [(set (match_dup 0) (match_dup 2))
> +  [(parallel [(set (match_dup 0) (match_dup 2))
> +             (set (match_dup 1) (match_dup 3))])
>     (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
> -  {
> -    if (!gen_operands_ldrd_strd (operands, true, false, true))
> -     {
> -        FAIL;
> -     }
> -    else
> -     {
> -        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> -        operands[2] = adjust_address (operands[2], DImode, 0);
> -     }
> -   }
> -)
> +{
> +  if (!gen_operands_ldrd_strd (operands, true, false, true))
> +    FAIL;
> +})
>
>  (define_peephole2 ; swap the destination registers of two loads
>                   ; before a commutative operation that sets the flags.
>    [(set (match_operand:SI 0 "arm_general_register_operand" "")
> -        (match_operand:SI 2 "memory_operand" ""))
> +       (match_operand:SI 2 "memory_operand" ""))
>     (set (match_operand:SI 1 "arm_general_register_operand" "")
> -        (match_operand:SI 3 "memory_operand" ""))
> +       (match_operand:SI 3 "memory_operand" ""))
>     (parallel
>        [(set (match_operand:SI 4 "arm_general_register_operand" "")
>             (match_operator:SI 5 "commutative_binary_operator"
> @@ -225,24 +136,62 @@ (define_peephole2 ; swap the destination registers of two loads
>         ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
>     && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
>     && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
> -  [(set (match_dup 0) (match_dup 2))
> +  [(parallel [(set (match_dup 0) (match_dup 2))
> +             (set (match_dup 1) (match_dup 3))])
>     (parallel
>        [(set (match_dup 4)
>             (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
>         (clobber (reg:CC CC_REGNUM))])]
> -  {
> -    if (!gen_operands_ldrd_strd (operands, true, false, true))
> -     {
> -        FAIL;
> -     }
> -    else
> -     {
> -        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> -        operands[2] = adjust_address (operands[2], DImode, 0);
> -     }
> -   }
> -)
> +{
> +  if (!gen_operands_ldrd_strd (operands, true, false, true))
> +    FAIL;
> +})
>
>  ;; TODO: Handle LDRD/STRD with writeback:
>  ;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
>  ;; (b) Patterns may be followed by an update of the base address.
> +
> +
> +;; insns matching the LDRD/STRD patterns that will get created by the above
> +;; peepholes.
> +;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
> +;; operands are not changed.
> +(define_insn "*arm_ldrd"
> +  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
> +                  (match_operand:SI 2 "memory_operand" "m"))
> +             (set (match_operand:SI 1 "s_register_operand" "=r")
> +                  (match_operand:SI 3 "memory_operand" "m"))])]
> +  "TARGET_LDRD && TARGET_ARM && reload_completed
> +  && valid_operands_ldrd_strd (operands, true)"
> +  {
> +    rtx op[2];
> +    op[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
> +    op[1] = adjust_address (operands[2], DImode, 0);
> +    return output_move_double (op, true, NULL);
> +  }
> +  [(set (attr "length")
> +       (symbol_ref "arm_count_ldrdstrd_insns (operands, true) * 4"))
> +   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
> +   (set_attr "type" "load_8")
> +   (set_attr "predicable" "yes")]
> +)
> +
> +(define_insn "*arm_strd"
> +  [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
> +                  (match_operand:SI 0 "s_register_operand" "r"))
> +             (set (match_operand:SI 3 "memory_operand" "=m")
> +                  (match_operand:SI 1 "s_register_operand" "r"))])]
> +  "TARGET_LDRD && TARGET_ARM && reload_completed
> +  && valid_operands_ldrd_strd (operands, false)"
> +  {
> +    rtx op[2];
> +    op[0] = adjust_address (operands[2], DImode, 0);
> +    op[1] = gen_rtx_REG (DImode, REGNO (operands[0]));
> +    return output_move_double (op, true, NULL);
> +  }
> +  [(set (attr "length")
> +       (symbol_ref "arm_count_ldrdstrd_insns (operands, false) * 4"))
> +   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
> +   (set_attr "type" "store_8")
> +   (set_attr "predicable" "yes")]
> +)
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr88714.c b/gcc/testsuite/gcc.c-torture/execute/pr88714.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..614ad9ac4a0662ba752532270e2d687505504d48
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr88714.c
> @@ -0,0 +1,43 @@
> +/* PR bootstrap/88714 */
> +
> +struct S { int a, b, c; int *d; };
> +struct T { int *e, *f, *g; } *t = 0;
> +int *o = 0;
> +
> +__attribute__((noipa))
> +void bar (int *x, int y, int z, int w)
> +{
> +  if (w == -1)
> +    {
> +      if (x != 0 || y != 0 || z != 0)
> +       __builtin_abort ();
> +    }
> +  else if (w != 0 || x != t->g || y != 0 || z != 12)
> +    __builtin_abort ();
> +}
> +
> +__attribute__((noipa)) void
> +foo (struct S *x, struct S *y, int *z, int w)
> +{
> +  *o = w;
> +  if (w)
> +    bar (0, 0, 0, -1);
> +  x->d = z;
> +  if (y->d)
> +    y->c = y->c + y->d[0];
> +  bar (t->g, 0, y->c, 0);
> +}
> +
> +int
> +main ()
> +{
> +  int a[4] = { 8, 9, 10, 11 };
> +  struct S s = { 1, 2, 3, &a[0] };
> +  struct T u = { 0, 0, &a[3] };
> +  o = &a[2];
> +  t = &u;
> +  foo (&s, &s, &a[1], 5);
> +  if (s.c != 12 || s.d != &a[1])
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..ff209c5df29765441bbe9481ac8caf7bbc6af8f7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
> @@ -0,0 +1,441 @@
> +/* { dg-do compile } */
> +/* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* } { "-mthumb" } { "" } } */
> +/* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */
> +
> +/*
> +   Test file contains testcases that are there to check.
> +      1) Each peephole generates the expected patterns.
> +      2) These patterns match the expected define_insns and generate ldrd/strd.
> +      2) Memory alias information is not lost in the peephole transformation.
> +
> +   I don't check the peephole pass on most of the functions here but just check
> +   the correct assembly is output.  The ldrd/strd peepholes only generate a
> +   different pattern to the ldm/stm peepholes in some specific cases, and those
> +   are checked.
> +
> +   The exceptions are tested by the crafted testcases at the end of this file
> +   that are named in the pattern foo_x[[:digit:]].
> +
> +   The first testcase (foo_mem_11) demonstrates bug 88714 is fixed by checking
> +   that both alias sets in the RTL are preserved.
> +
> +   All other testcases are only checked to see that they generate a LDRD or
> +   STRD instruction accordingly.
> + */
> +
> +
> +/* Example of bugzilla 88714 -- memory aliasing info needs to be retained.  */
> +int __RTL (startwith ("peephole2")) foo_mem_11 (int *a, int *b)
> +{
> +(function "foo_mem_11"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (reg:SI r0) [1 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (reg:SI r3)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [2 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* { dg-final { scan-rtl-dump {Function foo_mem_11.*\(mem/c:SI[^\n]*\[1.*\(mem/c:SI[^\n]*\n[^\n]*\[2.*Function foo11} "peephole2" } } */
> +
> +/* ldrd plain peephole2.  */
> +int __RTL (startwith ("peephole2")) foo11 (int *a)
> +{
> +(function "foo11"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (reg:SI r3)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* ldrd plain peephole2, which accepts insns initially out of order.  */
> +int __RTL (startwith ("peephole2")) foo11_alt (int *a)
> +{
> +(function "foo11_alt"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 102 (set (reg:SI r3)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* strd plain peephole2.  */
> +int __RTL (startwith ("peephole2")) foo12 (int *a)
> +{
> +(function "foo12"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* strd of constants -- store interleaved with constant move into register.
> +   Use same register twice to ensure we use the relevant pattern.  */
> +int __RTL (startwith ("peephole2")) foo13 (int *a)
> +{
> +(function "foo13"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r2)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r2)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* strd of constants -- stores after constant moves into registers.
> +   Use registers out of order, is only way to avoid plain strd while hitting
> +   this pattern.  */
> +int __RTL (startwith ("peephole2")) foo14 (int *a)
> +{
> +(function "foo14"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r3)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r2)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +/* swap the destination registers of two loads before a commutative operation.
> +   Here the commutative operation is what the peephole uses to know it can
> +   swap the register loads around.  */
> +int __RTL (startwith ("peephole2")) foo15 (int *a)
> +{
> +(function "foo15"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 100 (set (reg:SI r3)
> +                     (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18
> +       (expr_list:REG_DEAD (reg:SI r2)
> +       (expr_list:REG_DEAD (reg:SI r3)
> +        (nil))))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +
> +
> +/* swap the destination registers of two loads before a commutative operation
> +   that sets the flags.  */
> +/*
> +   NOTE Can't make a testcase for this pattern since there are no insn patterns
> +   matching the parallel insn in the peephole.
> +
> +   i.e. until some define_insn is defined matching that insn that peephole can
> +   never match in real code, and in artificial RTL code any pattern that can
> +   match it will cause an ICE.
> +
> +int __RTL (startwith ("peephole2")) foo16 (int *a)
> +{
> +(function "foo16"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 100 (set (reg:SI r3)
> +                     (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (parallel
> +                 [(set (reg:SI r0)
> +                       (and:SI (reg:SI r3) (reg:SI r2)))
> +                 (clobber (reg:CC cc))]) "/home/matmal01/test.c":18
> +       (expr_list:REG_DEAD (reg:SI r2)
> +       (expr_list:REG_DEAD (reg:SI r3)
> +        (nil))))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +*/
> +
> +
> +/* Making patterns that will behave differently between the LDM/STM peepholes
> +   and LDRD/STRD peepholes.
> +   gen_operands_ldrd_strd() uses peep2_find_free_register() to find spare
> +   registers to use.
> +   peep2_find_free_register() only ever returns registers marked in
> +   call_used_regs, hence we make sure to leave register 2 and 3 available (as
> +   they are always on in the defaults marked by CALL_USED_REGISTERS).  */
> +
> +/* gen_operands_ldrd_strd() purposefully finds an even register to look at
> +   which would treat the following pattern differently to the stm peepholes.
> + */
> +int __RTL (startwith ("peephole2")) foo_x1 (int *a)
> +{
> +(function "foo_x1"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r5)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r5)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r5)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r5)) "/home/matmal01/test.c":18
> +       (expr_list:REG_DEAD (reg:SI r5)
> +        (nil)))
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure we generated a parallel that started with a set from an even register.
> +   i.e.
> +   (parallel [
> +     (set (mem
> +         (reg:SI <even>
> +   */
> +/* { dg-final { scan-rtl-dump {Function foo_x1.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x2} "peephole2" } } */
> +
> +/* Like above gen_operands_ldrd_strd() would look to start with an even
> +   register while gen_const_stm_seq() doesn't care.  */
> +int __RTL (startwith ("peephole2")) foo_x2 (int *a)
> +{
> +(function "foo_x2"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r5)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r6)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r5)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r6)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure generated parallel starts with a set from an even register (as foo_x1).  */
> +/* { dg-final { scan-rtl-dump {Function foo_x2.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x3} "peephole2" } } */
> +
> +/* When storing multiple values into a register that will be used later, ldrd
> +   searches for another register to use instead of just giving up.  */
> +int __RTL (startwith ("peephole2")) foo_x3 (int *a)
> +{
> +(function "foo_x3"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 99  (set (reg:SI r3)
> +                     (const_int 1)) "/home/matmal01/test.c":18)
> +      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (cinsn 100 (set (reg:SI r3)
> +                     (const_int 0)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r0) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure generated parallel starts with a set from an even register (as foo_x1).  */
> +/* { dg-final { scan-rtl-dump {Function foo_x3.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x4} "peephole2" } } */
> +
> +/* ldrd gen_peephole2_11 but using plus 8 and plus 12 in the offsets.  */
> +int __RTL (startwith ("peephole2")) foo_x4 (int *a)
> +{
> +(function "foo_x4"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (reg:SI r2)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (reg:SI r3)
> +                     (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])) "/home/matmal01/test.c":18)
> +      (cinsn 103 (set (reg:SI r0)
> +                     (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure generated parallel starts with a set from the appropriate offset from
> +   register 0.
> +(parallel [
> +           (set (reg:SI ...
> +               (mem/c:SI (plus:SI (reg:SI 0 r0)
> +                       (const_int 8 .*
> +*/
> +/* { dg-final { scan-rtl-dump {Function foo_x4.*\(parallel \[\n[^\n]*\(set \(reg:SI[^\n]*\n *\(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*Function foo_x5} "peephole2" } } */
> +
> +/* strd gen_peephole2_12 but using plus 8 and plus 12 in the offsets.  */
> +int __RTL (startwith ("peephole2")) foo_x5 (int *a)
> +{
> +(function "foo12"
> +  (insn-chain
> +    (cnote 1 NOTE_INSN_DELETED)
> +    (block 2
> +      (edge-from entry (flags "FALLTHRU"))
> +      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +      (cinsn 101 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])
> +                     (reg:SI r2)) "/home/matmal01/test.c":18)
> +      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])
> +                     (reg:SI r3)) "/home/matmal01/test.c":18)
> +      (edge-to exit (flags "FALLTHRU"))
> +    ) ;; block 2
> +  ) ;; insn-chain
> +  (crtl
> +    (return_rtx
> +      (reg/i:SI r0)
> +    ) ;; return_rtx
> +  ) ;; crtl
> +) ;; function "main"
> +}
> +/* Ensure generated parallel starts with a set to the appropriate offset from
> +   register 0.  */
> +/* { dg-final { scan-rtl-dump {Function foo_x5.*\(parallel \[\n[^\n]*\(set \(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*$} "peephole2" } } */
> +
> +
> +/* { dg-final { scan-assembler-not "ldm" } } */
> +/* { dg-final { scan-assembler-not "stm" } } */
> +/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0\]} 4 } } */
> +/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0, #8\]} 1 } } */
> +/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0\]} 6 } } */
> +/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0, #8\]} 1 } } */
>
Jakub Jelinek Feb. 8, 2019, 9:51 a.m. UTC | #4
On Fri, Feb 08, 2019 at 10:18:03AM +0100, Christophe Lyon wrote:
> I'm afaid this patch causes several regressions. Maybe they have
> already been fixed post-commit (I have several validations for later
> commits still running)?

The following patch fixes the single ICE I've tried to reproduce.
While iwmmxt.md movdi pattern uses =m, r and =r, m constraints, both
arm.md and vfp.md movdi patterns use =m, q and =q, m constraints and thus
allow also ip register.  The following patch is an attempt to do the same
thing, just in the same patterns through arch_enabled attribute.

Completely untested.

2019-02-08  Jakub Jelinek  <jakub@redhat.com>

	PR bootstrap/88714
	* config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Add alternative with
	q constraint instead of r, enable it only if not TARGET_REALLY_IWMMXT.

--- gcc/config/arm/ldrdstrd.md.jj	2019-02-07 17:33:38.841669141 +0100
+++ gcc/config/arm/ldrdstrd.md	2019-02-08 10:42:56.515325579 +0100
@@ -157,10 +157,10 @@ (define_peephole2 ; swap the destination
 ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
 ;; operands are not changed.
 (define_insn "*arm_ldrd"
-  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
-		   (match_operand:SI 2 "memory_operand" "m"))
-	      (set (match_operand:SI 1 "s_register_operand" "=r")
-		   (match_operand:SI 3 "memory_operand" "m"))])]
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q,r")
+		   (match_operand:SI 2 "memory_operand" "m,m"))
+	      (set (match_operand:SI 1 "s_register_operand" "=q,r")
+		   (match_operand:SI 3 "memory_operand" "m,m"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, true)"
   {
@@ -173,14 +173,17 @@ (define_insn "*arm_ldrd"
 	(symbol_ref "arm_count_ldrdstrd_insns (operands, true) * 4"))
    (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
    (set_attr "type" "load_8")
-   (set_attr "predicable" "yes")]
-)
+   (set_attr "predicable" "yes")
+   (set (attr "arch_enabled")
+   	(if_then_else (and (match_test "TARGET_REALLY_IWMMXT")
+			   (eq_attr "alternative" "0"))
+		      (const_string "no") (const_string "yes")))])
 
 (define_insn "*arm_strd"
-  [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
-		   (match_operand:SI 0 "s_register_operand" "r"))
-	      (set (match_operand:SI 3 "memory_operand" "=m")
-		   (match_operand:SI 1 "s_register_operand" "r"))])]
+  [(parallel [(set (match_operand:SI 2 "memory_operand" "=m,m")
+		   (match_operand:SI 0 "s_register_operand" "q,r"))
+	      (set (match_operand:SI 3 "memory_operand" "=m,m")
+		   (match_operand:SI 1 "s_register_operand" "q,r"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, false)"
   {
@@ -193,5 +196,8 @@ (define_insn "*arm_strd"
 	(symbol_ref "arm_count_ldrdstrd_insns (operands, false) * 4"))
    (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
    (set_attr "type" "store_8")
-   (set_attr "predicable" "yes")]
-)
+   (set_attr "predicable" "yes")
+   (set (attr "arch_enabled")
+   	(if_then_else (and (match_test "TARGET_REALLY_IWMMXT")
+			   (eq_attr "alternative" "0"))
+		      (const_string "no") (const_string "yes")))])

	Jakub
Christophe Lyon Feb. 8, 2019, 10:06 a.m. UTC | #5
On Fri, 8 Feb 2019 at 10:51, Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Fri, Feb 08, 2019 at 10:18:03AM +0100, Christophe Lyon wrote:
> > I'm afaid this patch causes several regressions. Maybe they have
> > already been fixed post-commit (I have several validations for later
> > commits still running)?
>
> The following patch fixes the single ICE I've tried to reproduce.
> While iwmmxt.md movdi pattern uses =m, r and =r, m constraints, both
> arm.md and vfp.md movdi patterns use =m, q and =q, m constraints and thus
> allow also ip register.  The following patch is an attempt to do the same
> thing, just in the same patterns through arch_enabled attribute.
>
> Completely untested.
>
> 2019-02-08  Jakub Jelinek  <jakub@redhat.com>
>
>         PR bootstrap/88714
>         * config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Add alternative with
>         q constraint instead of r, enable it only if not TARGET_REALLY_IWMMXT.

I've started validations with this patch, I expect the results later today.
Thanks

>
> --- gcc/config/arm/ldrdstrd.md.jj       2019-02-07 17:33:38.841669141 +0100
> +++ gcc/config/arm/ldrdstrd.md  2019-02-08 10:42:56.515325579 +0100
> @@ -157,10 +157,10 @@ (define_peephole2 ; swap the destination
>  ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
>  ;; operands are not changed.
>  (define_insn "*arm_ldrd"
> -  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
> -                  (match_operand:SI 2 "memory_operand" "m"))
> -             (set (match_operand:SI 1 "s_register_operand" "=r")
> -                  (match_operand:SI 3 "memory_operand" "m"))])]
> +  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q,r")
> +                  (match_operand:SI 2 "memory_operand" "m,m"))
> +             (set (match_operand:SI 1 "s_register_operand" "=q,r")
> +                  (match_operand:SI 3 "memory_operand" "m,m"))])]
>    "TARGET_LDRD && TARGET_ARM && reload_completed
>    && valid_operands_ldrd_strd (operands, true)"
>    {
> @@ -173,14 +173,17 @@ (define_insn "*arm_ldrd"
>         (symbol_ref "arm_count_ldrdstrd_insns (operands, true) * 4"))
>     (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
>     (set_attr "type" "load_8")
> -   (set_attr "predicable" "yes")]
> -)
> +   (set_attr "predicable" "yes")
> +   (set (attr "arch_enabled")
> +       (if_then_else (and (match_test "TARGET_REALLY_IWMMXT")
> +                          (eq_attr "alternative" "0"))
> +                     (const_string "no") (const_string "yes")))])
>
>  (define_insn "*arm_strd"
> -  [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
> -                  (match_operand:SI 0 "s_register_operand" "r"))
> -             (set (match_operand:SI 3 "memory_operand" "=m")
> -                  (match_operand:SI 1 "s_register_operand" "r"))])]
> +  [(parallel [(set (match_operand:SI 2 "memory_operand" "=m,m")
> +                  (match_operand:SI 0 "s_register_operand" "q,r"))
> +             (set (match_operand:SI 3 "memory_operand" "=m,m")
> +                  (match_operand:SI 1 "s_register_operand" "q,r"))])]
>    "TARGET_LDRD && TARGET_ARM && reload_completed
>    && valid_operands_ldrd_strd (operands, false)"
>    {
> @@ -193,5 +196,8 @@ (define_insn "*arm_strd"
>         (symbol_ref "arm_count_ldrdstrd_insns (operands, false) * 4"))
>     (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
>     (set_attr "type" "store_8")
> -   (set_attr "predicable" "yes")]
> -)
> +   (set_attr "predicable" "yes")
> +   (set (attr "arch_enabled")
> +       (if_then_else (and (match_test "TARGET_REALLY_IWMMXT")
> +                          (eq_attr "alternative" "0"))
> +                     (const_string "no") (const_string "yes")))])
>
>         Jakub
Jakub Jelinek Feb. 8, 2019, 10:23 a.m. UTC | #6
On Fri, Feb 08, 2019 at 11:06:02AM +0100, Christophe Lyon wrote:
> On Fri, 8 Feb 2019 at 10:51, Jakub Jelinek <jakub@redhat.com> wrote:
> >
> > On Fri, Feb 08, 2019 at 10:18:03AM +0100, Christophe Lyon wrote:
> > > I'm afaid this patch causes several regressions. Maybe they have
> > > already been fixed post-commit (I have several validations for later
> > > commits still running)?
> >
> > The following patch fixes the single ICE I've tried to reproduce.
> > While iwmmxt.md movdi pattern uses =m, r and =r, m constraints, both
> > arm.md and vfp.md movdi patterns use =m, q and =q, m constraints and thus
> > allow also ip register.  The following patch is an attempt to do the same
> > thing, just in the same patterns through arch_enabled attribute.
> >
> > Completely untested.
> >
> > 2019-02-08  Jakub Jelinek  <jakub@redhat.com>
> >
> >         PR bootstrap/88714
> >         * config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Add alternative with
> >         q constraint instead of r, enable it only if not TARGET_REALLY_IWMMXT.
> 
> I've started validations with this patch, I expect the results later today.

Thanks.  Note, I don't understand why iwmmxt.md doesn't use q constraint, if
it is only some omission, or some hw requirement.  So, the patch just
follows what iwmmxt.md does.  I guess because iwmmxt.md movsi also uses r
constraint, ip really shouldn't appear on that target.  But if just changing
all r constraints to q without any arch_enabled works, that would be even
simpler.

	Jakub
Matthew Malcomson Feb. 8, 2019, 11:29 a.m. UTC | #7
On 08/02/19 10:23, Jakub Jelinek wrote:
> On Fri, Feb 08, 2019 at 11:06:02AM +0100, Christophe Lyon wrote:
>> On Fri, 8 Feb 2019 at 10:51, Jakub Jelinek <jakub@redhat.com> wrote:
>>>
>>> On Fri, Feb 08, 2019 at 10:18:03AM +0100, Christophe Lyon wrote:
>>>> I'm afaid this patch causes several regressions. Maybe they have
>>>> already been fixed post-commit (I have several validations for later
>>>> commits still running)?
>>>
>>> The following patch fixes the single ICE I've tried to reproduce.
>>> While iwmmxt.md movdi pattern uses =m, r and =r, m constraints, both
>>> arm.md and vfp.md movdi patterns use =m, q and =q, m constraints and thus
>>> allow also ip register.  The following patch is an attempt to do the same
>>> thing, just in the same patterns through arch_enabled attribute.
>>>
>>> Completely untested.
>>>
>>> 2019-02-08  Jakub Jelinek  <jakub@redhat.com>
>>>
>>>          PR bootstrap/88714
>>>          * config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Add alternative with
>>>          q constraint instead of r, enable it only if not TARGET_REALLY_IWMMXT.
>>
>> I've started validations with this patch, I expect the results later today.
> 
> Thanks.  Note, I don't understand why iwmmxt.md doesn't use q constraint, if
> it is only some omission, or some hw requirement.  So, the patch just
> follows what iwmmxt.md does.  I guess because iwmmxt.md movsi also uses r
> constraint, ip really shouldn't appear on that target.  But if just changing
> all r constraints to q without any arch_enabled works, that would be even
> simpler.
> 
> 	Jakub
> 

Apologies for the break to everyone, and thanks Jakub for the fix.

I believe my mistake was that I didn't match the behaviour of the 
function `operands_ok_ldrd_strd` (that the peepholes use) and the 
constraints: so the peepholes allow using ip while the insn patterns don't.


I'm pretty sure there's no difference between the iwmmxt target and 
others so believe your simpler fix of just using 'q' is a good idea.
(there's no difference in gas and no documentation I have found mentions 
a difference).


I had decided to use 'r' instead of 'q' based on the phrase "ARM 
strongly recommends that you do not use R12 for Rt." under the 
"doubleword register restrictions" heading in the link below
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0802a/CIHGJHED.html

But don't think the statement is something that *must* be followed, 
since it's a recommendation in the same list as hard requirements. 
There's no mention of this in any ARM ARM that I can find.

Matthew
Jakub Jelinek Feb. 8, 2019, 11:40 a.m. UTC | #8
On Fri, Feb 08, 2019 at 11:29:10AM +0000, Matthew Malcomson wrote:
> I'm pretty sure there's no difference between the iwmmxt target and 
> others so believe your simpler fix of just using 'q' is a good idea.
> (there's no difference in gas and no documentation I have found mentions 
> a difference).

The simpler patch would be then (but of course in that case the question is
why iwmmxt.md doesn't use those q constraints for the output_move_double
alternatives).

2019-02-08  Jakub Jelinek  <jakub@redhat.com>

	PR bootstrap/88714
	* config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use q constraint
	instead of r.

--- gcc/config/arm/ldrdstrd.md.jj	2019-02-08 11:25:42.368916124 +0100
+++ gcc/config/arm/ldrdstrd.md	2019-02-08 12:38:33.647585108 +0100
@@ -157,9 +157,9 @@ (define_peephole2 ; swap the destination
 ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
 ;; operands are not changed.
 (define_insn "*arm_ldrd"
-  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
 		   (match_operand:SI 2 "memory_operand" "m"))
-	      (set (match_operand:SI 1 "s_register_operand" "=r")
+	      (set (match_operand:SI 1 "s_register_operand" "=q")
 		   (match_operand:SI 3 "memory_operand" "m"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, true)"
@@ -178,9 +178,9 @@ (define_insn "*arm_ldrd"
 
 (define_insn "*arm_strd"
   [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
-		   (match_operand:SI 0 "s_register_operand" "r"))
+		   (match_operand:SI 0 "s_register_operand" "q"))
 	      (set (match_operand:SI 3 "memory_operand" "=m")
-		   (match_operand:SI 1 "s_register_operand" "r"))])]
+		   (match_operand:SI 1 "s_register_operand" "q"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, false)"
   {


	Jakub
Christophe Lyon Feb. 10, 2019, 9:42 a.m. UTC | #9
On Fri, 8 Feb 2019 at 12:40, Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Fri, Feb 08, 2019 at 11:29:10AM +0000, Matthew Malcomson wrote:
> > I'm pretty sure there's no difference between the iwmmxt target and
> > others so believe your simpler fix of just using 'q' is a good idea.
> > (there's no difference in gas and no documentation I have found mentions
> > a difference).
>
> The simpler patch would be then (but of course in that case the question is
> why iwmmxt.md doesn't use those q constraints for the output_move_double
> alternatives).
>
> 2019-02-08  Jakub Jelinek  <jakub@redhat.com>
>
>         PR bootstrap/88714
>         * config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use q constraint
>         instead of r.
>

Both this simple patch or the previous fix all the ICEs I reported, thanks.

Of course, the scan-assembler failures remain to be fixed.

> --- gcc/config/arm/ldrdstrd.md.jj       2019-02-08 11:25:42.368916124 +0100
> +++ gcc/config/arm/ldrdstrd.md  2019-02-08 12:38:33.647585108 +0100
> @@ -157,9 +157,9 @@ (define_peephole2 ; swap the destination
>  ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
>  ;; operands are not changed.
>  (define_insn "*arm_ldrd"
> -  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
> +  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
>                    (match_operand:SI 2 "memory_operand" "m"))
> -             (set (match_operand:SI 1 "s_register_operand" "=r")
> +             (set (match_operand:SI 1 "s_register_operand" "=q")
>                    (match_operand:SI 3 "memory_operand" "m"))])]
>    "TARGET_LDRD && TARGET_ARM && reload_completed
>    && valid_operands_ldrd_strd (operands, true)"
> @@ -178,9 +178,9 @@ (define_insn "*arm_ldrd"
>
>  (define_insn "*arm_strd"
>    [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
> -                  (match_operand:SI 0 "s_register_operand" "r"))
> +                  (match_operand:SI 0 "s_register_operand" "q"))
>               (set (match_operand:SI 3 "memory_operand" "=m")
> -                  (match_operand:SI 1 "s_register_operand" "r"))])]
> +                  (match_operand:SI 1 "s_register_operand" "q"))])]
>    "TARGET_LDRD && TARGET_ARM && reload_completed
>    && valid_operands_ldrd_strd (operands, false)"
>    {
>
>
>         Jakub
Jakub Jelinek Feb. 10, 2019, 11:11 a.m. UTC | #10
On Sun, Feb 10, 2019 at 10:42:55AM +0100, Christophe Lyon wrote:
> > 2019-02-08  Jakub Jelinek  <jakub@redhat.com>
> >
> >         PR bootstrap/88714
> >         * config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use q constraint
> >         instead of r.
> >
> 
> Both this simple patch or the previous fix all the ICEs I reported, thanks.
> 
> Of course, the scan-assembler failures remain to be fixed.

Thanks.  Is the patch ok for trunk then (which one)?

There is yet another variant I guess, using =q constraint just on the
operand 0, because valid_operands_ldrd_strd requires that the first reg
is even and second one higher and as the only difference between q and r
(CORE_REGS vs. GENERAL_REGS) is the ip register which has regno 12, the
second operand must not be ip anyway.

> > --- gcc/config/arm/ldrdstrd.md.jj       2019-02-08 11:25:42.368916124 +0100
> > +++ gcc/config/arm/ldrdstrd.md  2019-02-08 12:38:33.647585108 +0100
> > @@ -157,9 +157,9 @@ (define_peephole2 ; swap the destination
> >  ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
> >  ;; operands are not changed.
> >  (define_insn "*arm_ldrd"
> > -  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
> > +  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
> >                    (match_operand:SI 2 "memory_operand" "m"))
> > -             (set (match_operand:SI 1 "s_register_operand" "=r")
> > +             (set (match_operand:SI 1 "s_register_operand" "=q")
> >                    (match_operand:SI 3 "memory_operand" "m"))])]
> >    "TARGET_LDRD && TARGET_ARM && reload_completed
> >    && valid_operands_ldrd_strd (operands, true)"
> > @@ -178,9 +178,9 @@ (define_insn "*arm_ldrd"
> >
> >  (define_insn "*arm_strd"
> >    [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
> > -                  (match_operand:SI 0 "s_register_operand" "r"))
> > +                  (match_operand:SI 0 "s_register_operand" "q"))
> >               (set (match_operand:SI 3 "memory_operand" "=m")
> > -                  (match_operand:SI 1 "s_register_operand" "r"))])]
> > +                  (match_operand:SI 1 "s_register_operand" "q"))])]
> >    "TARGET_LDRD && TARGET_ARM && reload_completed
> >    && valid_operands_ldrd_strd (operands, false)"
> >    {

	Jakub
Kyrill Tkachov Feb. 11, 2019, 10:32 a.m. UTC | #11
Hi Jakub,


On 08/02/19 11:40, Jakub Jelinek wrote:
> On Fri, Feb 08, 2019 at 11:29:10AM +0000, Matthew Malcomson wrote:
> > I'm pretty sure there's no difference between the iwmmxt target and
> > others so believe your simpler fix of just using 'q' is a good idea.
> > (there's no difference in gas and no documentation I have found mentions
> > a difference).
>
> The simpler patch would be then (but of course in that case the question is
> why iwmmxt.md doesn't use those q constraints for the output_move_double
> alternatives).
>

I think this is ok.

The "q" constraint was introduced after the iwmmxt.md patterns were written and it seems
that they were just never updated to use it.
It's hard for anyone to get a hold of the relevant hardware to test iwmmxt these days,
so I suspect that path hasn't been thoroughly tested.
In my opinion the ldrd/strd-related logic there should follow the same approach as the rest of
the arm backend, that is, using 'q'.

A patch to update the iwmmxt.md constraints in that way is pre-approved.

Thanks,
Kyrill

> 2019-02-08  Jakub Jelinek  <jakub@redhat.com>
>
>         PR bootstrap/88714
>         * config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use q constraint
>         instead of r.
>
> --- gcc/config/arm/ldrdstrd.md.jj       2019-02-08 11:25:42.368916124 +0100
> +++ gcc/config/arm/ldrdstrd.md  2019-02-08 12:38:33.647585108 +0100
> @@ -157,9 +157,9 @@ (define_peephole2 ; swap the destination
>  ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
>  ;; operands are not changed.
>  (define_insn "*arm_ldrd"
> -  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
> +  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
>                     (match_operand:SI 2 "memory_operand" "m"))
> -             (set (match_operand:SI 1 "s_register_operand" "=r")
> +             (set (match_operand:SI 1 "s_register_operand" "=q")
>                     (match_operand:SI 3 "memory_operand" "m"))])]
>    "TARGET_LDRD && TARGET_ARM && reload_completed
>    && valid_operands_ldrd_strd (operands, true)"
> @@ -178,9 +178,9 @@ (define_insn "*arm_ldrd"
>
>  (define_insn "*arm_strd"
>    [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
> -                  (match_operand:SI 0 "s_register_operand" "r"))
> +                  (match_operand:SI 0 "s_register_operand" "q"))
>                (set (match_operand:SI 3 "memory_operand" "=m")
> -                  (match_operand:SI 1 "s_register_operand" "r"))])]
> +                  (match_operand:SI 1 "s_register_operand" "q"))])]
>    "TARGET_LDRD && TARGET_ARM && reload_completed
>    && valid_operands_ldrd_strd (operands, false)"
>    {
>
>
>         Jakub
Jakub Jelinek Feb. 11, 2019, 11:08 a.m. UTC | #12
On Mon, Feb 11, 2019 at 10:32:23AM +0000, Kyrill Tkachov wrote:
> I think this is ok.

Ok, committed the simpler version.

> The "q" constraint was introduced after the iwmmxt.md patterns were written and it seems
> that they were just never updated to use it.
> It's hard for anyone to get a hold of the relevant hardware to test iwmmxt these days,
> so I suspect that path hasn't been thoroughly tested.
> In my opinion the ldrd/strd-related logic there should follow the same approach as the rest of
> the arm backend, that is, using 'q'.
> 
> A patch to update the iwmmxt.md constraints in that way is pre-approved.

Thinking about it some more, given that the "q" constraint has been
introduced exactly for the ldrdstrd.md created movdi patterns
(https://gcc.gnu.org/ml/gcc-patches/2013-02/msg00604.html),
we don't generate those anymore, and r13 aka sp is a FIXED_REGISTERS, I
wonder if instead of that we shouldn't change *arm_movdi and *movdi_vfp
patterns (what about *movdf_soft_insn ?) to use r constraint again - the RA
shouldn't be putting DImode regs at ip, because only half of that register
is non-fixed and previously the only way to get there was ldrdstrd
peephole2s, but those use *arm_ldrd/*arm_strd patterns now.

So like the patch below (though, I have only limited possibilities to test
this, can throw it in armv7hl-linux-gnueabi distro build).

2019-02-11  Jakub Jelinek  <jakub@redhat.com>

	PR bootstrap/88714
	* config/arm/arm.md (*arm_movdi, *movdf_soft_insn): Use "r" instead of
	"q" constraint.
	* config/arm/vfp.md (*movdi_vfp): Likewise.
	* config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use "r" instead of
	"q" constraint for operands[1].

--- gcc/config/arm/arm.md.jj	2019-01-31 00:26:04.417738975 +0100
+++ gcc/config/arm/arm.md	2019-02-11 12:02:32.778707056 +0100
@@ -5817,8 +5817,8 @@ (define_expand "movdi"
 )
 
 (define_insn "*arm_movdi"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
-	(match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,q"))]
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
+	(match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r"))]
   "TARGET_32BIT
    && !(TARGET_HARD_FLOAT)
    && !TARGET_IWMMXT
@@ -7102,8 +7102,8 @@ (define_expand "reload_outdf"
 )
 
 (define_insn "*movdf_soft_insn"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
-	(match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
+	(match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
   "TARGET_32BIT && TARGET_SOFT_FLOAT
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
--- gcc/config/arm/vfp.md.jj	2019-01-31 00:26:04.312740661 +0100
+++ gcc/config/arm/vfp.md	2019-02-11 12:03:13.232045976 +0100
@@ -307,8 +307,8 @@ (define_insn "*thumb2_movsi_vfp"
 ;; DImode moves
 
 (define_insn "*movdi_vfp"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,!r,w,w, Uv")
-	(match_operand:DI 1 "di_operand"	      "r,rDa,Db,Dc,mi,mi,q,r,w,w,UvTu,w"))]
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
+	(match_operand:DI 1 "di_operand"	      "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]
   "TARGET_32BIT && TARGET_HARD_FLOAT
    && (   register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode))
--- gcc/config/arm/ldrdstrd.md.jj	2019-02-11 11:39:39.977125795 +0100
+++ gcc/config/arm/ldrdstrd.md	2019-02-11 12:03:57.978314745 +0100
@@ -159,7 +159,7 @@ (define_peephole2 ; swap the destination
 (define_insn "*arm_ldrd"
   [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
 		   (match_operand:SI 2 "memory_operand" "m"))
-	      (set (match_operand:SI 1 "s_register_operand" "=q")
+	      (set (match_operand:SI 1 "s_register_operand" "=r")
 		   (match_operand:SI 3 "memory_operand" "m"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, true)"
@@ -180,7 +180,7 @@ (define_insn "*arm_strd"
   [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
 		   (match_operand:SI 0 "s_register_operand" "q"))
 	      (set (match_operand:SI 3 "memory_operand" "=m")
-		   (match_operand:SI 1 "s_register_operand" "q"))])]
+		   (match_operand:SI 1 "s_register_operand" "r"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, false)"
   {


	Jakub
Matthew Malcomson Feb. 11, 2019, 2:35 p.m. UTC | #13
On 10/02/19 09:42, Christophe Lyon wrote:
> 
> Both this simple patch or the previous fix all the ICEs I reported, thanks.
> 
> Of course, the scan-assembler failures remain to be fixed.
> 

In the testcase I failed to account for targets that don't support arm 
mode or
targets that do not support the ldrd/strd instructions.

This patch accounts for both of these by adding some
dg-require-effective-target lines to the testcase.

This patch also adds a new effective-target procedure to check a target
supports arm ldrd/strd.
This check uses the 'r' constraint to ensure SP is not used so that it will
work for thumb mode code generation as well as arm mode.

Tested by running this testcase with cross compilers using "-march=armv5t",
"-mcpu=cortex-m3", "-mcpu-arm7tdmi", "-mcpu=cortex-a9 -march=armv5t" for 
both
arm-none-eabi and arm-none-linux-gnueabihf.
Also ran this testcase with `make check` natively.

Ok for trunk?

gcc/testsuite/ChangeLog:

2019-02-11  Matthew Malcomson  <matthew.malcomson@arm.com>

	* gcc.dg/rtl/arm/ldrd-peepholes.c: Restrict testcase.
	* lib/target-supports.exp: Add procedure to check for ldrd.



diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c 
b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
index 
4c3949c0963b8482545df670c31db2d9ec0f26b3..cbb64a770f5d796250601cafe481d7c2ea13f2eb 
100644
--- a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
+++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
@@ -1,4 +1,6 @@
  /* { dg-do compile { target arm*-*-* } } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-require-effective-target arm_ldrd_strd_ok } */
  /* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* 
} { "-mthumb" } { "" } } */
  /* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 
a0b4b99067f9ae225bde3b6bc719e89e1ea8e0e1..16dd018e8020fdf8e104690fed6a4e8919aa4aa1 
100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4918,6 +4918,27 @@ proc check_effective_target_arm_prefer_ldrd_strd 
{ } {
      }  "-O2 -mthumb" ]
  }

+# Return true if LDRD/STRD instructions are available on this target.
+proc check_effective_target_arm_ldrd_strd_ok { } {
+    if { ![check_effective_target_arm32] } {
+      return 0;
+    }
+
+    return [check_no_compiler_messages arm_ldrd_strd_ok object {
+      int main(void)
+      {
+        __UINT64_TYPE__ a = 1, b = 10;
+        __UINT64_TYPE__ *c = &b;
+        // `a` will be in a valid register since it's a DImode quantity.
+        asm ("ldrd %0, %1"
+             : "=r" (a)
+             : "m" (c));
+        return a == 10;
+      }
+    }]
+}
+
  # Return 1 if this is a PowerPC target supporting -meabi.

  proc check_effective_target_powerpc_eabi_ok { } {
Kyrill Tkachov Feb. 14, 2019, 2:24 p.m. UTC | #14
On 2/11/19 2:35 PM, Matthew Malcomson wrote:
> On 10/02/19 09:42, Christophe Lyon wrote:
> >
> > Both this simple patch or the previous fix all the ICEs I reported, 
> thanks.
> >
> > Of course, the scan-assembler failures remain to be fixed.
> >
>
> In the testcase I failed to account for targets that don't support arm
> mode or
> targets that do not support the ldrd/strd instructions.
>
> This patch accounts for both of these by adding some
> dg-require-effective-target lines to the testcase.
>
> This patch also adds a new effective-target procedure to check a target
> supports arm ldrd/strd.
> This check uses the 'r' constraint to ensure SP is not used so that it 
> will
> work for thumb mode code generation as well as arm mode.
>
> Tested by running this testcase with cross compilers using 
> "-march=armv5t",
> "-mcpu=cortex-m3", "-mcpu-arm7tdmi", "-mcpu=cortex-a9 -march=armv5t" for
> both
> arm-none-eabi and arm-none-linux-gnueabihf.
> Also ran this testcase with `make check` natively.
>
> Ok for trunk?
>
Ok.

Thanks,

Kyrill


> gcc/testsuite/ChangeLog:
>
> 2019-02-11  Matthew Malcomson <matthew.malcomson@arm.com>
>
>         * gcc.dg/rtl/arm/ldrd-peepholes.c: Restrict testcase.
>         * lib/target-supports.exp: Add procedure to check for ldrd.
>
>
>
> diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
> b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
> index
> 4c3949c0963b8482545df670c31db2d9ec0f26b3..cbb64a770f5d796250601cafe481d7c2ea13f2eb 
>
> 100644
> --- a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
> +++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
> @@ -1,4 +1,6 @@
>   /* { dg-do compile { target arm*-*-* } } */
> +/* { dg-require-effective-target arm_arm_ok } */
> +/* { dg-require-effective-target arm_ldrd_strd_ok } */
>   /* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-*
> } { "-mthumb" } { "" } } */
>   /* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */
>
> diff --git a/gcc/testsuite/lib/target-supports.exp
> b/gcc/testsuite/lib/target-supports.exp
> index
> a0b4b99067f9ae225bde3b6bc719e89e1ea8e0e1..16dd018e8020fdf8e104690fed6a4e8919aa4aa1 
>
> 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -4918,6 +4918,27 @@ proc check_effective_target_arm_prefer_ldrd_strd
> { } {
>       }  "-O2 -mthumb" ]
>   }
>
> +# Return true if LDRD/STRD instructions are available on this target.
> +proc check_effective_target_arm_ldrd_strd_ok { } {
> +    if { ![check_effective_target_arm32] } {
> +      return 0;
> +    }
> +
> +    return [check_no_compiler_messages arm_ldrd_strd_ok object {
> +      int main(void)
> +      {
> +        __UINT64_TYPE__ a = 1, b = 10;
> +        __UINT64_TYPE__ *c = &b;
> +        // `a` will be in a valid register since it's a DImode quantity.
> +        asm ("ldrd %0, %1"
> +             : "=r" (a)
> +             : "m" (c));
> +        return a == 10;
> +      }
> +    }]
> +}
> +
>   # Return 1 if this is a PowerPC target supporting -meabi.
>
>   proc check_effective_target_powerpc_eabi_ok { } {
>
Jakub Jelinek Feb. 17, 2019, 7:29 a.m. UTC | #15
On Mon, Feb 11, 2019 at 12:08:32PM +0100, Jakub Jelinek wrote:
> So like the patch below (though, I have only limited possibilities to test
> this, can throw it in armv7hl-linux-gnueabi distro build).

Actually, that patch was bad, I misread the CORE_REGS vs. GENERAL_REGS
hardregset difference, it is actually sp that is not GENERAL_REGS but is
CORE_REGS, not ip.  So here is an updated patch, same except that in
ldrdstrd.md the q constraints are kept in the right spot.
To repeat, I don't think the q constraints on movdi are now needed, because
ldrdstrd doesn't use those DImode patterns and RA will not allocate a DImode hard
reg starting at ip because sp is a fixed register.

Bootstrapped/regtested on armv7hl-linux-gnueabi (distro build), ok for
trunk?

2019-02-17  Jakub Jelinek  <jakub@redhat.com>

	PR bootstrap/88714
	* config/arm/arm.md (*arm_movdi, *movdf_soft_insn): Use "r" instead of
	"q" constraint.
	* config/arm/vfp.md (*movdi_vfp): Likewise.
	* config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use "r" instead of
	"q" constraint for operands[0].

--- gcc/config/arm/arm.md.jj	2019-01-31 00:26:04.417738975 +0100
+++ gcc/config/arm/arm.md	2019-02-11 12:02:32.778707056 +0100
@@ -5817,8 +5817,8 @@ (define_expand "movdi"
 )
 
 (define_insn "*arm_movdi"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
-	(match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,q"))]
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
+	(match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r"))]
   "TARGET_32BIT
    && !(TARGET_HARD_FLOAT)
    && !TARGET_IWMMXT
@@ -7102,8 +7102,8 @@ (define_expand "reload_outdf"
 )
 
 (define_insn "*movdf_soft_insn"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
-	(match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
+	(match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
   "TARGET_32BIT && TARGET_SOFT_FLOAT
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
--- gcc/config/arm/vfp.md.jj	2019-01-31 00:26:04.312740661 +0100
+++ gcc/config/arm/vfp.md	2019-02-11 12:03:13.232045976 +0100
@@ -307,8 +307,8 @@ (define_insn "*thumb2_movsi_vfp"
 ;; DImode moves
 
 (define_insn "*movdi_vfp"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,!r,w,w, Uv")
-	(match_operand:DI 1 "di_operand"	      "r,rDa,Db,Dc,mi,mi,q,r,w,w,UvTu,w"))]
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
+	(match_operand:DI 1 "di_operand"	      "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]
   "TARGET_32BIT && TARGET_HARD_FLOAT
    && (   register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode))
--- gcc/config/arm/ldrdstrd.md.jj	2019-02-11 11:39:39.977125795 +0100
+++ gcc/config/arm/ldrdstrd.md	2019-02-11 12:03:57.978314745 +0100
@@ -157,7 +157,7 @@
 ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
 ;; operands are not changed.
 (define_insn "*arm_ldrd"
-  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
 		   (match_operand:SI 2 "memory_operand" "m"))
 	      (set (match_operand:SI 1 "s_register_operand" "=q")
 		   (match_operand:SI 3 "memory_operand" "m"))])]
@@ -178,7 +178,7 @@
 
 (define_insn "*arm_strd"
   [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
-		   (match_operand:SI 0 "s_register_operand" "q"))
+		   (match_operand:SI 0 "s_register_operand" "r"))
 	      (set (match_operand:SI 3 "memory_operand" "=m")
 		   (match_operand:SI 1 "s_register_operand" "q"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed


	Jakub
Kyrill Tkachov Feb. 18, 2019, 12:47 p.m. UTC | #16
On 2/17/19 7:29 AM, Jakub Jelinek wrote:
> On Mon, Feb 11, 2019 at 12:08:32PM +0100, Jakub Jelinek wrote:
>> So like the patch below (though, I have only limited possibilities to test
>> this, can throw it in armv7hl-linux-gnueabi distro build).
> Actually, that patch was bad, I misread the CORE_REGS vs. GENERAL_REGS
> hardregset difference, it is actually sp that is not GENERAL_REGS but is
> CORE_REGS, not ip.  So here is an updated patch, same except that in
> ldrdstrd.md the q constraints are kept in the right spot.
> To repeat, I don't think the q constraints on movdi are now needed, because
> ldrdstrd doesn't use those DImode patterns and RA will not allocate a DImode hard
> reg starting at ip because sp is a fixed register.
>
> Bootstrapped/regtested on armv7hl-linux-gnueabi (distro build), ok for
> trunk?

Ok.

Thanks for working on this.

Kyrill

> 2019-02-17  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR bootstrap/88714
> 	* config/arm/arm.md (*arm_movdi, *movdf_soft_insn): Use "r" instead of
> 	"q" constraint.
> 	* config/arm/vfp.md (*movdi_vfp): Likewise.
> 	* config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use "r" instead of
> 	"q" constraint for operands[0].
>
> --- gcc/config/arm/arm.md.jj	2019-01-31 00:26:04.417738975 +0100
> +++ gcc/config/arm/arm.md	2019-02-11 12:02:32.778707056 +0100
> @@ -5817,8 +5817,8 @@ (define_expand "movdi"
>   )
>   
>   (define_insn "*arm_movdi"
> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
> -	(match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,q"))]
> +  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
> +	(match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r"))]
>     "TARGET_32BIT
>      && !(TARGET_HARD_FLOAT)
>      && !TARGET_IWMMXT
> @@ -7102,8 +7102,8 @@ (define_expand "reload_outdf"
>   )
>   
>   (define_insn "*movdf_soft_insn"
> -  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
> -	(match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
> +  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
> +	(match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
>     "TARGET_32BIT && TARGET_SOFT_FLOAT
>      && (   register_operand (operands[0], DFmode)
>          || register_operand (operands[1], DFmode))"
> --- gcc/config/arm/vfp.md.jj	2019-01-31 00:26:04.312740661 +0100
> +++ gcc/config/arm/vfp.md	2019-02-11 12:03:13.232045976 +0100
> @@ -307,8 +307,8 @@ (define_insn "*thumb2_movsi_vfp"
>   ;; DImode moves
>   
>   (define_insn "*movdi_vfp"
> -  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,!r,w,w, Uv")
> -	(match_operand:DI 1 "di_operand"	      "r,rDa,Db,Dc,mi,mi,q,r,w,w,UvTu,w"))]
> +  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv")
> +	(match_operand:DI 1 "di_operand"	      "r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]
>     "TARGET_32BIT && TARGET_HARD_FLOAT
>      && (   register_operand (operands[0], DImode)
>          || register_operand (operands[1], DImode))
> --- gcc/config/arm/ldrdstrd.md.jj	2019-02-11 11:39:39.977125795 +0100
> +++ gcc/config/arm/ldrdstrd.md	2019-02-11 12:03:57.978314745 +0100
> @@ -157,7 +157,7 @@
>   ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
>   ;; operands are not changed.
>   (define_insn "*arm_ldrd"
> -  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
> +  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
>   		   (match_operand:SI 2 "memory_operand" "m"))
>   	      (set (match_operand:SI 1 "s_register_operand" "=q")
>   		   (match_operand:SI 3 "memory_operand" "m"))])]
> @@ -178,7 +178,7 @@
>   
>   (define_insn "*arm_strd"
>     [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
> -		   (match_operand:SI 0 "s_register_operand" "q"))
> +		   (match_operand:SI 0 "s_register_operand" "r"))
>   	      (set (match_operand:SI 3 "memory_operand" "=m")
>   		   (match_operand:SI 1 "s_register_operand" "q"))])]
>     "TARGET_LDRD && TARGET_ARM && reload_completed
>
>
> 	Jakub
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 79ede0db174fcce87abe8b4d18893550d4c7e2f6..485bc68a618d6ae4a1640368ccb025fe2c9e1420 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -125,6 +125,7 @@  extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
 extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
 extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
 extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
+extern bool valid_operands_ldrd_strd (rtx *, bool);
 extern int arm_gen_movmemqi (rtx *);
 extern bool gen_movmem_ldrd_strd (rtx *);
 extern machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
@@ -146,6 +147,7 @@  extern const char *output_mov_long_double_arm_from_arm (rtx *);
 extern const char *output_move_double (rtx *, bool, int *count);
 extern const char *output_move_quad (rtx *);
 extern int arm_count_output_move_double_insns (rtx *);
+extern int arm_count_ldrdstrd_insns (rtx *, bool);
 extern const char *output_move_vfp (rtx *operands);
 extern const char *output_move_neon (rtx *operands);
 extern int arm_attr_length_move_neon (rtx_insn *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 73cb8df9af1ec9d680091bb8691bcd925a1be1d3..1c336da9e5b0948ef1058c46966364510dc1ca38 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15556,7 +15556,7 @@  mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
       *base = addr;
       return true;
     }
-  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+  else if (GET_CODE (addr) == PLUS)
     {
       *base = XEXP (addr, 0);
       *offset = XEXP (addr, 1);
@@ -15721,7 +15721,7 @@  gen_operands_ldrd_strd (rtx *operands, bool load,
     }
 
   /* Make sure accesses are to consecutive memory locations.  */
-  if (gap != 4)
+  if (gap != GET_MODE_SIZE (SImode))
     return false;
 
   if (!align_ok_ldrd_strd (align[0], offset))
@@ -15802,6 +15802,55 @@  gen_operands_ldrd_strd (rtx *operands, bool load,
 }
 
 
+/* Return true if parallel execution of the two word-size accesses provided
+   could be satisfied with a single LDRD/STRD instruction.  Two word-size
+   accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
+   register operands and OPERANDS[2,3] are the corresponding memory operands.
+   */
+bool
+valid_operands_ldrd_strd (rtx *operands, bool load)
+{
+  int nops = 2;
+  HOST_WIDE_INT offsets[2], offset, align[2];
+  rtx base = NULL_RTX;
+  rtx cur_base, cur_offset;
+  int i, gap;
+
+  /* Check that the memory references are immediate offsets from the
+     same base register.  Extract the base register, the destination
+     registers, and the corresponding memory offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
+				 &align[i]))
+	return false;
+
+      if (i == 0)
+	base = cur_base;
+      else if (REGNO (base) != REGNO (cur_base))
+	return false;
+
+      offsets[i] = INTVAL (cur_offset);
+      if (GET_CODE (operands[i]) == SUBREG)
+	return false;
+    }
+
+  if (offsets[0] > offsets[1])
+    return false;
+
+  gap = offsets[1] - offsets[0];
+  offset = offsets[0];
+
+  /* Make sure accesses are to consecutive memory locations.  */
+  if (gap != GET_MODE_SIZE (SImode))
+    return false;
+
+  if (!align_ok_ldrd_strd (align[0], offset))
+    return false;
+
+  return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
+				false, load);
+}
 
 


 /* Print a symbolic form of X to the debug file, F.  */
@@ -28474,6 +28523,26 @@  arm_count_output_move_double_insns (rtx *operands)
   return count;
 }
 
+/* Same as above, but operands are a register/memory pair in SImode.
+   Assumes operands has the base register in position 0 and memory in position
+   2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
+int
+arm_count_ldrdstrd_insns (rtx *operands, bool load)
+{
+  int count;
+  rtx ops[2];
+  int regnum, memnum;
+  if (load)
+    regnum = 0, memnum = 1;
+  else
+    regnum = 1, memnum = 0;
+  ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  ops[memnum] = adjust_address (operands[2], DImode, 0);
+  output_move_double (ops, false, &count);
+  return count;
+}
+
+
 int
 vfp3_const_double_for_fract_bits (rtx operand)
 {
diff --git a/gcc/config/arm/ldrdstrd.md b/gcc/config/arm/ldrdstrd.md
index be53d010fa6dfcf5a6854ae2b17f7cfcea25db9e..cb7a6adebbc8084a2e642ff2dcbef8b3fb16f268 100644
--- a/gcc/config/arm/ldrdstrd.md
+++ b/gcc/config/arm/ldrdstrd.md
@@ -23,37 +23,22 @@ 
 ;; The following peephole optimizations identify consecutive memory
 ;; accesses, and try to rearrange the operands to enable generation of
 ;; ldrd/strd.
+;;
+;; In many cases they behave in the same way that patterns in ldmstm.md behave,
+;; but there is extra logic in gen_operands_ldrd_strd to try and ensure the
+;; registers used are an (r<N>, r<N + 1>) pair where N is even.
 
 (define_peephole2 ; ldrd
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 2 "memory_operand" ""))
+	(match_operand:SI 2 "memory_operand" ""))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 3 "memory_operand" ""))]
+	(match_operand:SI 3 "memory_operand" ""))]
   "TARGET_LDRD"
-  [(const_int 0)]
+  [(parallel [(set (match_dup 0) (match_dup 2))
+	      (set (match_dup 1) (match_dup 3))])]
 {
   if (!gen_operands_ldrd_strd (operands, true, false, false))
     FAIL;
-  else if (TARGET_ARM)
-  {
-    /* In ARM state, the destination registers of LDRD/STRD must be
-       consecutive. We emit DImode access.  */
-    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-    operands[2] = adjust_address (operands[2], DImode, 0);
-    /* Emit [(set (match_dup 0) (match_dup 2))] */
-    emit_insn (gen_rtx_SET (operands[0], operands[2]));
-    DONE;
-  }
-  else if (TARGET_THUMB2)
-  {
-    /* Emit the pattern:
-       [(parallel [(set (match_dup 0) (match_dup 2))
-                   (set (match_dup 1) (match_dup 3))])] */
-    rtx t1 = gen_rtx_SET (operands[0], operands[2]);
-    rtx t2 = gen_rtx_SET (operands[1], operands[3]);
-    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
-    DONE;
-  }
 })
 
 (define_peephole2 ; strd
@@ -62,117 +47,50 @@  (define_peephole2 ; strd
    (set (match_operand:SI 3 "memory_operand" "")
 	(match_operand:SI 1 "arm_general_register_operand" ""))]
   "TARGET_LDRD"
-  [(const_int 0)]
+  [(parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
 {
   if (!gen_operands_ldrd_strd (operands, false, false, false))
     FAIL;
-  else if (TARGET_ARM)
-  {
-    /* In ARM state, the destination registers of LDRD/STRD must be
-       consecutive. We emit DImode access.  */
-    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-    operands[2] = adjust_address (operands[2], DImode, 0);
-    /* Emit [(set (match_dup 2) (match_dup 0))]  */
-    emit_insn (gen_rtx_SET (operands[2], operands[0]));
-    DONE;
-  }
-  else if (TARGET_THUMB2)
-  {
-    /* Emit the pattern:
-       [(parallel [(set (match_dup 2) (match_dup 0))
-                   (set (match_dup 3) (match_dup 1))])]  */
-    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
-    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
-    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
-    DONE;
-  }
 })
 
 ;; The following peepholes reorder registers to enable LDRD/STRD.
 (define_peephole2 ; strd of constants
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 4 "const_int_operand" ""))
+	(match_operand:SI 4 "const_int_operand" ""))
    (set (match_operand:SI 2 "memory_operand" "")
-        (match_dup 0))
+	(match_dup 0))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 5 "const_int_operand" ""))
+	(match_operand:SI 5 "const_int_operand" ""))
    (set (match_operand:SI 3 "memory_operand" "")
-        (match_dup 1))]
+	(match_dup 1))]
   "TARGET_LDRD"
-  [(const_int 0)]
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
 {
   if (!gen_operands_ldrd_strd (operands, false, true, false))
     FAIL;
-  else if (TARGET_ARM)
-  {
-   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
-   operands[2] = adjust_address (operands[2], DImode, 0);
-   /* Emit the pattern:
-      [(set (match_dup 0) (match_dup 4))
-      (set (match_dup 1) (match_dup 5))
-      (set (match_dup 2) tmp)]  */
-   emit_insn (gen_rtx_SET (operands[0], operands[4]));
-   emit_insn (gen_rtx_SET (operands[1], operands[5]));
-   emit_insn (gen_rtx_SET (operands[2], tmp));
-   DONE;
-  }
-  else if (TARGET_THUMB2)
-  {
-    /* Emit the pattern:
-       [(set (match_dup 0) (match_dup 4))
-        (set (match_dup 1) (match_dup 5))
-        (parallel [(set (match_dup 2) (match_dup 0))
-                   (set (match_dup 3) (match_dup 1))])]  */
-    emit_insn (gen_rtx_SET (operands[0], operands[4]));
-    emit_insn (gen_rtx_SET (operands[1], operands[5]));
-    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
-    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
-    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
-    DONE;
-  }
 })
 
 (define_peephole2 ; strd of constants
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 4 "const_int_operand" ""))
+	(match_operand:SI 4 "const_int_operand" ""))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 5 "const_int_operand" ""))
+	(match_operand:SI 5 "const_int_operand" ""))
    (set (match_operand:SI 2 "memory_operand" "")
-        (match_dup 0))
+	(match_dup 0))
    (set (match_operand:SI 3 "memory_operand" "")
-        (match_dup 1))]
+	(match_dup 1))]
   "TARGET_LDRD"
-  [(const_int 0)]
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 5))
+   (parallel [(set (match_dup 2) (match_dup 0))
+	      (set (match_dup 3) (match_dup 1))])]
 {
   if (!gen_operands_ldrd_strd (operands, false, true, false))
      FAIL;
-  else if (TARGET_ARM)
-  {
-   rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
-   operands[2] = adjust_address (operands[2], DImode, 0);
-   /* Emit the pattern
-      [(set (match_dup 0) (match_dup 4))
-       (set (match_dup 1) (match_dup 5))
-       (set (match_dup 2) tmp)]  */
-   emit_insn (gen_rtx_SET (operands[0], operands[4]));
-   emit_insn (gen_rtx_SET (operands[1], operands[5]));
-   emit_insn (gen_rtx_SET (operands[2], tmp));
-   DONE;
-  }
-  else if (TARGET_THUMB2)
-  {
-    /*  Emit the pattern:
-        [(set (match_dup 0) (match_dup 4))
-         (set (match_dup 1) (match_dup 5))
-         (parallel [(set (match_dup 2) (match_dup 0))
-                    (set (match_dup 3) (match_dup 1))])]  */
-    emit_insn (gen_rtx_SET (operands[0], operands[4]));
-    emit_insn (gen_rtx_SET (operands[1], operands[5]));
-    rtx t1 = gen_rtx_SET (operands[2], operands[0]);
-    rtx t2 = gen_rtx_SET (operands[3], operands[1]);
-    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
-    DONE;
-  }
 })
 
 ;; The following two peephole optimizations are only relevant for ARM
@@ -181,39 +99,32 @@  (define_peephole2 ; strd of constants
 (define_peephole2 ; swap the destination registers of two loads
 		  ; before a commutative operation.
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 2 "memory_operand" ""))
+	(match_operand:SI 2 "memory_operand" ""))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 3 "memory_operand" ""))
+	(match_operand:SI 3 "memory_operand" ""))
    (set (match_operand:SI 4 "arm_general_register_operand" "")
-        (match_operator:SI 5 "commutative_binary_operator"
+	(match_operator:SI 5 "commutative_binary_operator"
 			   [(match_operand 6 "arm_general_register_operand" "")
 			    (match_operand 7 "arm_general_register_operand" "") ]))]
   "TARGET_LDRD && TARGET_ARM
    && (  ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
-        ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+	||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
    && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
    && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
-  [(set (match_dup 0) (match_dup 2))
+  [(parallel [(set (match_dup 0) (match_dup 2))
+	      (set (match_dup 1) (match_dup 3))])
    (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
-  {
-    if (!gen_operands_ldrd_strd (operands, true, false, true))
-     {
-        FAIL;
-     }
-    else
-     {
-        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-        operands[2] = adjust_address (operands[2], DImode, 0);
-     }
-   }
-)
+{
+  if (!gen_operands_ldrd_strd (operands, true, false, true))
+    FAIL;
+})
 
 (define_peephole2 ; swap the destination registers of two loads
 		  ; before a commutative operation that sets the flags.
   [(set (match_operand:SI 0 "arm_general_register_operand" "")
-        (match_operand:SI 2 "memory_operand" ""))
+	(match_operand:SI 2 "memory_operand" ""))
    (set (match_operand:SI 1 "arm_general_register_operand" "")
-        (match_operand:SI 3 "memory_operand" ""))
+	(match_operand:SI 3 "memory_operand" ""))
    (parallel
       [(set (match_operand:SI 4 "arm_general_register_operand" "")
 	    (match_operator:SI 5 "commutative_binary_operator"
@@ -225,24 +136,62 @@  (define_peephole2 ; swap the destination registers of two loads
        ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
    && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
    && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
-  [(set (match_dup 0) (match_dup 2))
+  [(parallel [(set (match_dup 0) (match_dup 2))
+	      (set (match_dup 1) (match_dup 3))])
    (parallel
       [(set (match_dup 4)
 	    (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
        (clobber (reg:CC CC_REGNUM))])]
-  {
-    if (!gen_operands_ldrd_strd (operands, true, false, true))
-     {
-        FAIL;
-     }
-    else
-     {
-        operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
-        operands[2] = adjust_address (operands[2], DImode, 0);
-     }
-   }
-)
+{
+  if (!gen_operands_ldrd_strd (operands, true, false, true))
+    FAIL;
+})
 
 ;; TODO: Handle LDRD/STRD with writeback:
 ;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
 ;; (b) Patterns may be followed by an update of the base address.
+
+
+;; insns matching the LDRD/STRD patterns that will get created by the above
+;; peepholes.
+;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
+;; operands are not changed.
+(define_insn "*arm_ldrd"
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
+		   (match_operand:SI 2 "memory_operand" "m"))
+	      (set (match_operand:SI 1 "s_register_operand" "=r")
+		   (match_operand:SI 3 "memory_operand" "m"))])]
+  "TARGET_LDRD && TARGET_ARM && reload_completed
+  && valid_operands_ldrd_strd (operands, true)"
+  {
+    rtx op[2];
+    op[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+    op[1] = adjust_address (operands[2], DImode, 0);
+    return output_move_double (op, true, NULL);
+  }
+  [(set (attr "length")
+	(symbol_ref "arm_count_ldrdstrd_insns (operands, true) * 4"))
+   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
+   (set_attr "type" "load_8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_strd"
+  [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
+		   (match_operand:SI 0 "s_register_operand" "r"))
+	      (set (match_operand:SI 3 "memory_operand" "=m")
+		   (match_operand:SI 1 "s_register_operand" "r"))])]
+  "TARGET_LDRD && TARGET_ARM && reload_completed
+  && valid_operands_ldrd_strd (operands, false)"
+  {
+    rtx op[2];
+    op[0] = adjust_address (operands[2], DImode, 0);
+    op[1] = gen_rtx_REG (DImode, REGNO (operands[0]));
+    return output_move_double (op, true, NULL);
+  }
+  [(set (attr "length")
+	(symbol_ref "arm_count_ldrdstrd_insns (operands, false) * 4"))
+   (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
+   (set_attr "type" "store_8")
+   (set_attr "predicable" "yes")]
+)
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr88714.c b/gcc/testsuite/gcc.c-torture/execute/pr88714.c
new file mode 100644
index 0000000000000000000000000000000000000000..614ad9ac4a0662ba752532270e2d687505504d48
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr88714.c
@@ -0,0 +1,43 @@ 
+/* PR bootstrap/88714 */
+
+struct S { int a, b, c; int *d; };
+struct T { int *e, *f, *g; } *t = 0;
+int *o = 0;
+
+__attribute__((noipa))
+void bar (int *x, int y, int z, int w)
+{
+  if (w == -1)
+    {
+      if (x != 0 || y != 0 || z != 0)
+	__builtin_abort ();
+    }
+  else if (w != 0 || x != t->g || y != 0 || z != 12)
+    __builtin_abort ();
+}
+
+__attribute__((noipa)) void
+foo (struct S *x, struct S *y, int *z, int w)
+{
+  *o = w;
+  if (w)
+    bar (0, 0, 0, -1);
+  x->d = z;
+  if (y->d)
+    y->c = y->c + y->d[0];
+  bar (t->g, 0, y->c, 0);
+}
+
+int
+main ()
+{
+  int a[4] = { 8, 9, 10, 11 };
+  struct S s = { 1, 2, 3, &a[0] };
+  struct T u = { 0, 0, &a[3] };
+  o = &a[2];
+  t = &u;
+  foo (&s, &s, &a[1], 5);
+  if (s.c != 12 || s.d != &a[1])
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
new file mode 100644
index 0000000000000000000000000000000000000000..ff209c5df29765441bbe9481ac8caf7bbc6af8f7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
@@ -0,0 +1,441 @@ 
+/* { dg-do compile } */
+/* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* } { "-mthumb" } { "" } } */
+/* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */
+
+/*
+   Test file contains testcases that are there to check.
+      1) Each peephole generates the expected patterns.
+      2) These patterns match the expected define_insns and generate ldrd/strd.
+      2) Memory alias information is not lost in the peephole transformation.
+
+   I don't check the peephole pass on most of the functions here but just check
+   the correct assembly is output.  The ldrd/strd peepholes only generate a
+   different pattern to the ldm/stm peepholes in some specific cases, and those
+   are checked.
+
+   The exceptions are tested by the crafted testcases at the end of this file
+   that are named in the pattern foo_x[[:digit:]].
+
+   The first testcase (foo_mem_11) demonstrates bug 88714 is fixed by checking
+   that both alias sets in the RTL are preserved.
+
+   All other testcases are only checked to see that they generate a LDRD or
+   STRD instruction accordingly.
+ */
+
+
+/* Example of bugzilla 88714 -- memory aliasing info needs to be retained.  */
+int __RTL (startwith ("peephole2")) foo_mem_11 (int *a, int *b)
+{
+(function "foo_mem_11"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (reg:SI r0) [1 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (reg:SI r3)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [2 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* { dg-final { scan-rtl-dump {Function foo_mem_11.*\(mem/c:SI[^\n]*\[1.*\(mem/c:SI[^\n]*\n[^\n]*\[2.*Function foo11} "peephole2" } } */
+
+/* ldrd plain peephole2.  */
+int __RTL (startwith ("peephole2")) foo11 (int *a)
+{
+(function "foo11"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (reg:SI r3)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* ldrd plain peephole2, which accepts insns initially out of order.  */
+int __RTL (startwith ("peephole2")) foo11_alt (int *a)
+{
+(function "foo11_alt"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 102 (set (reg:SI r3)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* strd plain peephole2.  */
+int __RTL (startwith ("peephole2")) foo12 (int *a)
+{
+(function "foo12"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* strd of constants -- store interleaved with constant move into register.
+   Use same register twice to ensure we use the relevant pattern.  */
+int __RTL (startwith ("peephole2")) foo13 (int *a)
+{
+(function "foo13"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r2)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r2)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* strd of constants -- stores after constant moves into registers.
+   Use registers out of order, is only way to avoid plain strd while hitting
+   this pattern.  */
+int __RTL (startwith ("peephole2")) foo14 (int *a)
+{
+(function "foo14"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r3)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r2)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+/* swap the destination registers of two loads before a commutative operation.
+   Here the commutative operation is what the peephole uses to know it can
+   swap the register loads around.  */
+int __RTL (startwith ("peephole2")) foo15 (int *a)
+{
+(function "foo15"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 100 (set (reg:SI r3)
+		      (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18
+       (expr_list:REG_DEAD (reg:SI r2)
+	(expr_list:REG_DEAD (reg:SI r3)
+	 (nil))))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+
+
+/* swap the destination registers of two loads before a commutative operation
+   that sets the flags.  */
+/*
+   NOTE Can't make a testcase for this pattern since there are no insn patterns
+   matching the parallel insn in the peephole.
+
+   i.e. until some define_insn is defined matching that insn that peephole can
+   never match in real code, and in artificial RTL code any pattern that can
+   match it will cause an ICE.
+
+int __RTL (startwith ("peephole2")) foo16 (int *a)
+{
+(function "foo16"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 100 (set (reg:SI r3)
+		      (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 103 (parallel
+		  [(set (reg:SI r0)
+			(and:SI (reg:SI r3) (reg:SI r2)))
+		  (clobber (reg:CC cc))]) "/home/matmal01/test.c":18
+       (expr_list:REG_DEAD (reg:SI r2)
+	(expr_list:REG_DEAD (reg:SI r3)
+	 (nil))))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+*/
+
+
+/* Making patterns that will behave differently between the LDM/STM peepholes
+   and LDRD/STRD peepholes.
+   gen_operands_ldrd_strd() uses peep2_find_free_register() to find spare
+   registers to use.
+   peep2_find_free_register() only ever returns registers marked in
+   call_used_regs, hence we make sure to leave register 2 and 3 available (as
+   they are always on in the defaults marked by CALL_USED_REGISTERS).  */
+
+/* gen_operands_ldrd_strd() purposefully finds an even register to look at
+   which would treat the following pattern differently to the stm peepholes.
+ */
+int __RTL (startwith ("peephole2")) foo_x1 (int *a)
+{
+(function "foo_x1"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r5)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r5)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r5)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r5)) "/home/matmal01/test.c":18
+       (expr_list:REG_DEAD (reg:SI r5)
+	 (nil)))
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure we generated a parallel that started with a set from an even register.
+   i.e.
+   (parallel [
+     (set (mem
+	  (reg:SI <even>
+   */
+/* { dg-final { scan-rtl-dump {Function foo_x1.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x2} "peephole2" } } */
+
+/* Like above gen_operands_ldrd_strd() would look to start with an even
+   register while gen_const_stm_seq() doesn't care.  */
+int __RTL (startwith ("peephole2")) foo_x2 (int *a)
+{
+(function "foo_x2"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r5)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r6)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r5)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r6)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set from an even register (as foo_x1).  */
+/* { dg-final { scan-rtl-dump {Function foo_x2.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x3} "peephole2" } } */
+
+/* When storing multiple values into a register that will be used later, ldrd
+   searches for another register to use instead of just giving up.  */
+int __RTL (startwith ("peephole2")) foo_x3 (int *a)
+{
+(function "foo_x3"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 99  (set (reg:SI r3)
+		      (const_int 1)) "/home/matmal01/test.c":18)
+      (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (cinsn 100 (set (reg:SI r3)
+		      (const_int 0)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r0) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set from an even register (as foo_x1).  */
+/* { dg-final { scan-rtl-dump {Function foo_x3.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x4} "peephole2" } } */
+
+/* ldrd gen_peephole2_11 but using plus 8 and plus 12 in the offsets.  */
+int __RTL (startwith ("peephole2")) foo_x4 (int *a)
+{
+(function "foo_x4"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (reg:SI r2)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (reg:SI r3)
+		      (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])) "/home/matmal01/test.c":18)
+      (cinsn 103 (set (reg:SI r0)
+		      (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set from the appropriate offset from
+   register 0.
+(parallel [
+	    (set (reg:SI ...
+		(mem/c:SI (plus:SI (reg:SI 0 r0)
+			(const_int 8 .*
+*/
+/* { dg-final { scan-rtl-dump {Function foo_x4.*\(parallel \[\n[^\n]*\(set \(reg:SI[^\n]*\n *\(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*Function foo_x5} "peephole2" } } */
+
+/* strd gen_peephole2_12 but using plus 8 and plus 12 in the offsets.  */
+int __RTL (startwith ("peephole2")) foo_x5 (int *a)
+{
+(function "foo12"
+  (insn-chain
+    (cnote 1 NOTE_INSN_DELETED)
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cinsn 101 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])
+		      (reg:SI r2)) "/home/matmal01/test.c":18)
+      (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])
+		      (reg:SI r3)) "/home/matmal01/test.c":18)
+      (edge-to exit (flags "FALLTHRU"))
+    ) ;; block 2
+  ) ;; insn-chain
+  (crtl
+    (return_rtx
+      (reg/i:SI r0)
+    ) ;; return_rtx
+  ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set to the appropriate offset from
+   register 0.  */
+/* { dg-final { scan-rtl-dump {Function foo_x5.*\(parallel \[\n[^\n]*\(set \(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*$} "peephole2" } } */
+
+
+/* { dg-final { scan-assembler-not "ldm" } } */
+/* { dg-final { scan-assembler-not "stm" } } */
+/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0\]} 4 } } */
+/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0, #8\]} 1 } } */
+/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0\]} 6 } } */
+/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0, #8\]} 1 } } */