diff mbox series

rs6000: Fix predicate for const vector in sldoi_to_mov [PR109069]

Message ID 61d8cd78-e20f-e545-5a22-188794168e7f@linux.ibm.com
State New
Headers show
Series rs6000: Fix predicate for const vector in sldoi_to_mov [PR109069] | expand

Commit Message

Kewen.Lin March 27, 2023, 8:09 a.m. UTC
Hi,

As PR109069 shows, commit r12-6537-g080a06fcb076b3 which
introduces define_insn_and_split sldoi_to_mov adopts
easy_vector_constant for const vector of interest, but it's
wrong since predicate easy_vector_constant doesn't guarantee
each byte in the const vector is the same.  One counter
example is the const vector in pr109069-1.c.  This patch is
to introduce new predicate const_vector_each_byte_same to
ensure all bytes in the given const vector are the same by
considering both int and float, meanwhile for the constants
which don't meet easy_vector_constant we need to gen a move
instead of just a set, and uses VECTOR_MEM_ALTIVEC_OR_VSX_P
rather than VECTOR_UNIT_ALTIVEC_OR_VSX_P for V2DImode support
under VSX since vector long long type of vec_sld is guarded
under stanza vsx.

Bootstrapped and regtested on powerpc64-linux-gnu P7/P8/P9
and powerpc64le-linux-gnu P9 and P10.

Is it ok for trunk?

BR,
Kewen
-----
	PR target/109069

gcc/ChangeLog:

	* config/rs6000/altivec.md (sldoi_to_mov<mode>): Replace predicate
	easy_vector_constant with const_vector_each_byte_same, add
	handlings in preparation for !easy_vector_constant, and update
	VECTOR_UNIT_ALTIVEC_OR_VSX_P with VECTOR_MEM_ALTIVEC_OR_VSX_P.
	* config/rs6000/predicates.md (const_vector_each_byte_same): New
	predicate.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr109069-1.c: New test.
	* gcc.target/powerpc/pr109069-2-run.c: New test.
	* gcc.target/powerpc/pr109069-2.c: New test.
	* gcc.target/powerpc/pr109069-2.h: New test.
---
 gcc/config/rs6000/altivec.md                  | 14 +++-
 gcc/config/rs6000/predicates.md               | 37 +++++++++
 gcc/testsuite/gcc.target/powerpc/pr109069-1.c | 25 ++++++
 .../gcc.target/powerpc/pr109069-2-run.c       | 50 +++++++++++
 gcc/testsuite/gcc.target/powerpc/pr109069-2.c | 12 +++
 gcc/testsuite/gcc.target/powerpc/pr109069-2.h | 83 +++++++++++++++++++
 6 files changed, 218 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109069-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109069-2-run.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109069-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109069-2.h

--
2.31.1

Comments

Segher Boessenkool April 24, 2023, 11:40 a.m. UTC | #1
Hi!

On Mon, Mar 27, 2023 at 04:09:39PM +0800, Kewen.Lin wrote:
> As PR109069 shows, commit r12-6537-g080a06fcb076b3 which
> introduces define_insn_and_split sldoi_to_mov adopts
> easy_vector_constant for const vector of interest, but it's
> wrong since predicate easy_vector_constant doesn't guarantee
> each byte in the const vector is the same.

> 	* config/rs6000/altivec.md (sldoi_to_mov<mode>): Replace predicate
> 	easy_vector_constant with const_vector_each_byte_same, add
> 	handlings in preparation for !easy_vector_constant, and update
> 	VECTOR_UNIT_ALTIVEC_OR_VSX_P with VECTOR_MEM_ALTIVEC_OR_VSX_P.
> 	* config/rs6000/predicates.md (const_vector_each_byte_same): New
> 	predicate.

Okay for trunk.  Let's backport this to 13 once 13.1 has been released,
this patch is not very trivial so there is some risk.

Thanks!


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 30606b8ab21..183c3005694 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -385,14 +385,22 @@  (define_split

 (define_insn_and_split "sldoi_to_mov<mode>"
   [(set (match_operand:VM 0 "altivec_register_operand")
-	(unspec:VM [(match_operand:VM 1 "easy_vector_constant")
+	(unspec:VM [(match_operand:VM 1 "const_vector_each_byte_same")
 	            (match_dup 1)
 		    (match_operand:QI 2 "u5bit_cint_operand")]
 		    UNSPEC_VSLDOI))]
-  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && can_create_pseudo_p ()"
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && can_create_pseudo_p ()"
   "#"
   "&& 1"
-  [(set (match_dup 0) (match_dup 1))])
+  [(set (match_dup 0) (match_dup 1))]
+  "{
+     if (!easy_vector_constant (operands[1], <MODE>mode))
+       {
+	 rtx dest = gen_reg_rtx (<MODE>mode);
+	 emit_move_insn (dest, operands[1]);
+	 operands[1] = dest;
+       }
+  }")

 (define_insn "get_vrsave_internal"
   [(set (match_operand:SI 0 "register_operand" "=r")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..a16ee30f0c0 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -798,6 +798,43 @@  (define_predicate "easy_vector_constant_vsldoi"
 	    (and (match_test "easy_altivec_constant (op, mode)")
 		 (match_test "vspltis_shifted (op) != 0")))))

+;; Return true if this is a vector constant and each byte in
+;; it is the same.
+(define_predicate "const_vector_each_byte_same"
+  (match_code "const_vector")
+{
+  rtx elt;
+  if (!const_vec_duplicate_p (op, &elt))
+    return false;
+
+  machine_mode emode = GET_MODE_INNER (mode);
+  unsigned HOST_WIDE_INT eval;
+  if (CONST_INT_P (elt))
+    eval = INTVAL (elt);
+  else if (CONST_DOUBLE_AS_FLOAT_P (elt))
+    {
+      gcc_assert (emode == SFmode || emode == DFmode);
+      long l[2];
+      real_to_target (l, CONST_DOUBLE_REAL_VALUE (elt), emode);
+      /* real_to_target puts 32-bit pieces in each long.  */
+      eval = zext_hwi (l[0], 32);
+      eval |= zext_hwi (l[1], 32) << 32;
+    }
+  else
+    return false;
+
+  unsigned int esize = GET_MODE_SIZE (emode);
+  unsigned char byte0 = eval & 0xff;
+  for (unsigned int i = 1; i < esize; i++)
+    {
+      eval >>= BITS_PER_UNIT;
+      if (byte0 != (eval & 0xff))
+	return false;
+    }
+
+  return true;
+})
+
 ;; Return 1 if operand is a vector int register or is either a vector constant
 ;; of all 0 bits of a vector constant of all 1 bits.
 (define_predicate "vector_int_reg_or_same_bit"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr109069-1.c b/gcc/testsuite/gcc.target/powerpc/pr109069-1.c
new file mode 100644
index 00000000000..eb4d73a1e66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr109069-1.c
@@ -0,0 +1,25 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-options "-O2 -maltivec" } */
+
+/* Verify it run successfully.  */
+
+#include <altivec.h>
+
+__attribute__ ((noipa))
+vector signed int
+test ()
+{
+  vector signed int v = {-16, -16, -16, -16};
+  vector signed int res = vec_sld (v, v, 3);
+  return res;
+}
+
+int
+main ()
+{
+  vector signed int res = test ();
+  if (res[0] != 0xf0ffffff)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr109069-2-run.c b/gcc/testsuite/gcc.target/powerpc/pr109069-2-run.c
new file mode 100644
index 00000000000..fad9fa5df73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr109069-2-run.c
@@ -0,0 +1,50 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Verify it doesn't generate wrong code.  */
+
+#include "pr109069-2.h"
+
+int
+main ()
+{
+  vector unsigned char res1 = test1 ();
+  for (int i = 0; i < 16; i++)
+    if (res1[i] != 0xd)
+      __builtin_abort ();
+
+  vector signed short res2 = test2 ();
+  for (int i = 0; i < 8; i++)
+    if (res2[i] != 0x7777)
+      __builtin_abort ();
+
+  vector signed int res3 = test3 ();
+  vector unsigned int res4 = test4 ();
+  vector float res6 = test6 ();
+  for (int i = 0; i < 4; i++)
+    {
+      if (res3[i] != 0xbbbbbbbb)
+	__builtin_abort ();
+      if (res4[i] != 0x7070707)
+	__builtin_abort ();
+      U32b u;
+      u.f = res6[i];
+      if (u.i != 0x17171717)
+	__builtin_abort ();
+    }
+
+  vector unsigned long long res5 = test5 ();
+  vector double res7 = test7 ();
+  for (int i = 0; i < 2; i++)
+    {
+      if (res5[i] != 0x4545454545454545ll)
+	__builtin_abort ();
+      U64b u;
+      u.f = res7[i];
+      if (u.i != 0x5454545454545454ll)
+	__builtin_abort ();
+    }
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr109069-2.c b/gcc/testsuite/gcc.target/powerpc/pr109069-2.c
new file mode 100644
index 00000000000..e71bbeb5df5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr109069-2.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* Disable rs6000 optimize_swaps as it drops some REG_EQUAL
+   notes on const vector and affects test point here.  */
+/* { dg-options "-O2 -mvsx -mno-optimize-swaps" } */
+
+/* Verify we can optimize away vector shifting if every byte
+   of vector is the same.  */
+
+#include "pr109069-2.h"
+
+/* { dg-final { scan-assembler-not {\mvsldoi\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr109069-2.h b/gcc/testsuite/gcc.target/powerpc/pr109069-2.h
new file mode 100644
index 00000000000..8b03bfb65fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr109069-2.h
@@ -0,0 +1,83 @@ 
+#include <altivec.h>
+
+typedef union
+{
+  unsigned int i;
+  float f;
+} U32b;
+
+typedef union
+{
+  unsigned long long i;
+  double f;
+} U64b;
+
+__attribute__ ((noipa))
+vector unsigned char
+test1 ()
+{
+  vector unsigned char v = {0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd,
+			    0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd};
+  vector unsigned char res = vec_sld (v, v, 3);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector signed short
+test2 ()
+{
+  vector signed short v
+    = {0x7777, 0x7777, 0x7777, 0x7777, 0x7777, 0x7777, 0x7777, 0x7777};
+  vector signed short res = vec_sld (v, v, 5);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector signed int
+test3 ()
+{
+  vector signed int v = {0xbbbbbbbb, 0xbbbbbbbb, 0xbbbbbbbb, 0xbbbbbbbb};
+  vector signed int res = vec_sld (v, v, 7);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector unsigned int
+test4 ()
+{
+  vector unsigned int v = {0x07070707, 0x07070707, 0x07070707, 0x07070707};
+  vector unsigned int res = vec_sld (v, v, 9);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector unsigned long long
+test5 ()
+{
+  vector unsigned long long v = {0x4545454545454545ll, 0x4545454545454545ll};
+  vector unsigned long long res = vec_sld (v, v, 10);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector float
+test6 ()
+{
+  U32b u;
+  u.i = 0x17171717;
+  vector float vf = {u.f, u.f, u.f, u.f};
+  vector float res = vec_sld (vf, vf, 11);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector double
+test7 ()
+{
+  U64b u;
+  u.i = 0x5454545454545454ll;
+  vector double vf = {u.f, u.f};
+  vector double res = vec_sld (vf, vf, 13);
+  return res;
+}
+