diff mbox series

[pushed] aarch64: Stop +mops clobbering variable values

Message ID mptilrnsd3e.fsf@arm.com
State New
Headers show
Series [pushed] aarch64: Stop +mops clobbering variable values | expand

Commit Message

Richard Sandiford April 5, 2022, 4:35 p.m. UTC
The mops cpy* patterns take three registers: a destination address,
a source address, and a size.  The patterns clobber all three registers
as part of the operation.  The set* patterns take a destination address,
a size, and a store value, and they clobber the first two registers as
part of the operation.

However, the associated expanders would try to use existing source,
destination and size registers where possible.  Any variables in
those registers could therefore change unexpectedly.

For example:

    void
    copy1 (int *x, int *y, long z, int **res)
    {
      __builtin_memcpy (x, y, z);
      *res = x;
    }

generated:

        cpyfp   [x0]!, [x1]!, x2!
        cpyfm   [x0]!, [x1]!, x2!
        cpyfe   [x0]!, [x1]!, x2!
        str     x0, [x3]
        ret

which stores the incremented x at *res.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
	* config/aarch64/aarch64.md (aarch64_cpymemdi): Turn into a
	define_expand and turn operands 0 and 1 from REGs to MEMs.
	(*aarch64_cpymemdi): New pattern.
	(aarch64_setmemdi): Turn into a define_expand and turn operand 0
	from a REG to a MEM.
	(*aarch64_setmemdi): New pattern.
	* config/aarch64/aarch64.cc (aarch64_expand_cpymem_mops): Use
	copy_to_mode_reg on all three registers.  Replace the original
	MEM addresses rather than creating wild reads and writes.
	(aarch64_expand_setmem_mops): Likewise for the size and for the
	destination memory and address.

gcc/testsuite/
	* gcc.target/aarch64/mops_4.c: New test.
---
 gcc/config/aarch64/aarch64.cc             |  38 ++++---
 gcc/config/aarch64/aarch64.md             |  53 +++++++---
 gcc/testsuite/gcc.target/aarch64/mops_4.c | 115 ++++++++++++++++++++++
 3 files changed, 171 insertions(+), 35 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/mops_4.c
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 18f80499079..3e2a6fb6472 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24531,17 +24531,15 @@  aarch64_expand_cpymem_mops (rtx *operands)
 {
   if (!TARGET_MOPS)
     return false;
-  rtx addr_dst = XEXP (operands[0], 0);
-  rtx addr_src = XEXP (operands[1], 0);
-  rtx sz_reg = operands[2];
-
-  if (!REG_P (sz_reg))
-    sz_reg = force_reg (DImode, sz_reg);
-  if (!REG_P (addr_dst))
-    addr_dst = force_reg (DImode, addr_dst);
-  if (!REG_P (addr_src))
-    addr_src = force_reg (DImode, addr_src);
-  emit_insn (gen_aarch64_cpymemdi (addr_dst, addr_src, sz_reg));
+
+  /* All three registers are changed by the instruction, so each one
+     must be a fresh pseudo.  */
+  rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+  rtx src_addr = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+  rtx dst_mem = replace_equiv_address (operands[0], dst_addr);
+  rtx src_mem = replace_equiv_address (operands[1], src_addr);
+  rtx sz_reg = copy_to_mode_reg (DImode, operands[2]);
+  emit_insn (gen_aarch64_cpymemdi (dst_mem, src_mem, sz_reg));
 
   return true;
 }
@@ -24718,17 +24716,15 @@  aarch64_expand_setmem_mops (rtx *operands)
   if (!TARGET_MOPS)
     return false;
 
-  rtx addr_dst = XEXP (operands[0], 0);
-  rtx sz_reg = operands[1];
+  /* The first two registers are changed by the instruction, so both
+     of them must be a fresh pseudo.  */
+  rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+  rtx dst_mem = replace_equiv_address (operands[0], dst_addr);
+  rtx sz_reg = copy_to_mode_reg (DImode, operands[1]);
   rtx val = operands[2];
-
-  if (!REG_P (sz_reg))
-   sz_reg = force_reg (DImode, sz_reg);
-  if (!REG_P (addr_dst))
-   addr_dst = force_reg (DImode, addr_dst);
-  if (!REG_P (val) && val != CONST0_RTX (QImode))
-   val = force_reg (QImode, val);
-  emit_insn (gen_aarch64_setmemdi (addr_dst, val, sz_reg));
+  if (val != CONST0_RTX (QImode))
+    val = force_reg (QImode, val);
+  emit_insn (gen_aarch64_setmemdi (dst_mem, val, sz_reg));
   return true;
 }
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c98525075a0..f5c635938ad 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1581,16 +1581,29 @@  (define_split
   }
 )
 
-(define_insn "aarch64_cpymemdi"
-  [(parallel [
-   (set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
+(define_expand "aarch64_cpymemdi"
+  [(parallel
+     [(set (match_operand 2) (const_int 0))
+      (clobber (match_dup 3))
+      (clobber (match_dup 4))
+      (set (match_operand 0)
+	   (unspec:BLK [(match_operand 1) (match_dup 2)] UNSPEC_CPYMEM))])]
+  "TARGET_MOPS"
+  {
+    operands[3] = XEXP (operands[0], 0);
+    operands[4] = XEXP (operands[1], 0);
+  }
+)
+
+(define_insn "*aarch64_cpymemdi"
+  [(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
    (clobber (match_operand:DI 0 "register_operand" "+&r"))
    (clobber (match_operand:DI 1 "register_operand" "+&r"))
    (set (mem:BLK (match_dup 0))
-        (unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_CPYMEM))])]
- "TARGET_MOPS"
- "cpyfp\t[%x0]!, [%x1]!, %x2!\;cpyfm\t[%x0]!, [%x1]!, %x2!\;cpyfe\t[%x0]!, [%x1]!, %x2!"
- [(set_attr "length" "12")]
+        (unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_CPYMEM))]
+  "TARGET_MOPS"
+  "cpyfp\t[%x0]!, [%x1]!, %x2!\;cpyfm\t[%x0]!, [%x1]!, %x2!\;cpyfe\t[%x0]!, [%x1]!, %x2!"
+  [(set_attr "length" "12")]
 )
 
 ;; 0 is dst
@@ -1657,16 +1670,28 @@  (define_expand "movmemdi"
 }
 )
 
-(define_insn "aarch64_setmemdi"
-  [(parallel [
-   (set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
+(define_expand "aarch64_setmemdi"
+  [(parallel
+     [(set (match_operand 2) (const_int 0))
+      (clobber (match_dup 3))
+      (set (match_operand 0)
+	   (unspec:BLK [(match_operand 1)
+			(match_dup 2)] UNSPEC_SETMEM))])]
+  "TARGET_MOPS"
+  {
+    operands[3] = XEXP (operands[0], 0);
+  }
+)
+
+(define_insn "*aarch64_setmemdi"
+  [(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
    (clobber (match_operand:DI 0 "register_operand" "+&r"))
    (set (mem:BLK (match_dup 0))
         (unspec:BLK [(match_operand:QI 1 "aarch64_reg_or_zero" "rZ")
-                    (match_dup 2)] UNSPEC_SETMEM))])]
- "TARGET_MOPS"
- "setp\t[%x0]!, %x2!, %x1\;setm\t[%x0]!, %x2!, %x1\;sete\t[%x0]!, %x2!, %x1"
- [(set_attr "length" "12")]
+		     (match_dup 2)] UNSPEC_SETMEM))]
+  "TARGET_MOPS"
+  "setp\t[%x0]!, %x2!, %x1\;setm\t[%x0]!, %x2!, %x1\;sete\t[%x0]!, %x2!, %x1"
+  [(set_attr "length" "12")]
 )
 
 ;; 0 is dst
diff --git a/gcc/testsuite/gcc.target/aarch64/mops_4.c b/gcc/testsuite/gcc.target/aarch64/mops_4.c
new file mode 100644
index 00000000000..1b87759cb5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mops_4.c
@@ -0,0 +1,115 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.6-a+mops" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+/*
+** copy1:
+**	mov	(x[0-9]+), x0
+**	cpyfp	\[\1\]!, \[x1\]!, x2!
+**	cpyfm	\[\1\]!, \[x1\]!, x2!
+**	cpyfe	\[\1\]!, \[x1\]!, x2!
+**	str	x0, \[x3\]
+**	ret
+*/
+void
+copy1 (int *x, int *y, long z, int **res)
+{
+  __builtin_memcpy (x, y, z);
+  *res = x;
+}
+
+/*
+** copy2:
+**	mov	(x[0-9]+), x1
+**	cpyfp	\[x0\]!, \[\1\]!, x2!
+**	cpyfm	\[x0\]!, \[\1\]!, x2!
+**	cpyfe	\[x0\]!, \[\1\]!, x2!
+**	str	x1, \[x3\]
+**	ret
+*/
+void
+copy2 (int *x, int *y, long z, int **res)
+{
+  __builtin_memcpy (x, y, z);
+  *res = y;
+}
+
+/*
+** copy3:
+**	mov	(x[0-9]+), x2
+**	cpyfp	\[x0\]!, \[x1\]!, \1!
+**	cpyfm	\[x0\]!, \[x1\]!, \1!
+**	cpyfe	\[x0\]!, \[x1\]!, \1!
+**	str	x2, \[x3\]
+**	ret
+*/
+void
+copy3 (int *x, int *y, long z, long *res)
+{
+  __builtin_memcpy (x, y, z);
+  *res = z;
+}
+
+/*
+** set1:
+**	mov	(x[0-9]+), x0
+**	setp	\[\1\]!, x2!, x1
+**	setm	\[\1\]!, x2!, x1
+**	sete	\[\1\]!, x2!, x1
+**	str	x0, \[x3\]
+**	ret
+*/
+void
+set1 (char *x, char y, long z, char **res)
+{
+  __builtin_memset (x, y, z);
+  *res = x;
+}
+
+/*
+** set2:
+**	ldrb	w([0-9]+), \[x1\]
+**	setp	\[x0\]!, x2!, x\1
+**	setm	\[x0\]!, x2!, x\1
+**	sete	\[x0\]!, x2!, x\1
+**	strb	w\1, \[x3\]
+**	ret
+*/
+void
+set2 (char *x, char *yptr, long z, char *res)
+{
+  char y = *yptr;
+  __builtin_memset (x, y, z);
+  *res = y;
+}
+
+/*
+** set3:
+**	mov	(x[0-9]+), x2
+**	setp	\[x0\]!, \1!, x1
+**	setm	\[x0\]!, \1!, x1
+**	sete	\[x0\]!, \1!, x1
+**	str	x2, \[x3\]
+**	ret
+*/
+void
+set3 (char *x, char y, long z, long *res)
+{
+  __builtin_memset (x, y, z);
+  *res = z;
+}
+
+/*
+** set4:
+**	setp	\[x0\]!, x1!, xzr
+**	setm	\[x0\]!, x1!, xzr
+**	sete	\[x0\]!, x1!, xzr
+**	strb	wzr, \[x2\]
+**	ret
+*/
+void
+set4 (char *x, long z, char *res)
+{
+  __builtin_memset (x, 0, z);
+  *res = 0;
+}