diff mbox series

Force broadcast constant to mem for vec_dup{v4di, v8si, v4df, v8df} when TARGET_AVX2 is not available.

Message ID 20231213073940.3120920-1-hongtao.liu@intel.com
State New
Headers show
Series Force broadcast constant to mem for vec_dup{v4di, v8si, v4df, v8df} when TARGET_AVX2 is not available. | expand

Commit Message

Liu, Hongtao Dec. 13, 2023, 7:39 a.m. UTC
vpbroadcastd/vpbroadcastq is avaiable under TARGET_AVX2, but
vec_dup{v4di,v8si} pattern is avaiable under AVX with memory operand.
And it will cause LRA/Reload to generate spill and reload if we put
constant in register.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

	PR target/112992
	* config/i386/i386-expand.cc
	(ix86_convert_const_wide_int_to_broadcast): Don't convert to
	broadcast for vec_dup{v4di,v8si} when TARGET_AVX2 is not
	available.
	(ix86_broadcast_from_constant): Allow broadcast for V4DI/V8SI
	when !TARGET_AVX2 since it will be forced to memory later.
	(ix86_expand_vector_move): Force constant to mem for
	vec_dup{vssi,v4di} when TARGET_AVX2 is not available.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr100865-7a.c: Adjust testcase.
	* gcc.target/i386/pr100865-7c.c: Ditto.
	* gcc.target/i386/pr112992.c: New test.
---
 gcc/config/i386/i386-expand.cc              | 48 +++++++++++++--------
 gcc/testsuite/gcc.target/i386/pr100865-7a.c |  3 +-
 gcc/testsuite/gcc.target/i386/pr100865-7c.c |  3 +-
 gcc/testsuite/gcc.target/i386/pr112992.c    | 30 +++++++++++++
 4 files changed, 62 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr112992.c
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a53d69d5400..fad4f34f905 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -297,6 +297,12 @@  ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
   if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
     return nullptr;
 
+  unsigned int msize = GET_MODE_SIZE (mode);
+
+  /* Only optimized for vpbroadcast[bwsd]/vbroadcastss with xmm/ymm/zmm.  */
+  if (msize != 16 && msize != 32 && msize != 64)
+    return nullptr;
+
   /* Convert CONST_WIDE_INT to a non-standard SSE constant integer
      broadcast only if vector broadcast is available.  */
   if (!TARGET_AVX
@@ -309,18 +315,23 @@  ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
   HOST_WIDE_INT val = CONST_WIDE_INT_ELT (op, 0);
   HOST_WIDE_INT val_broadcast;
   scalar_int_mode broadcast_mode;
-  if (TARGET_AVX2
+  /* vpbroadcastb zmm requires TARGET_AVX512BW.  */
+  if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2)
       && ix86_broadcast (val, GET_MODE_BITSIZE (QImode),
 			 val_broadcast))
     broadcast_mode = QImode;
-  else if (TARGET_AVX2
+  else if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2)
 	   && ix86_broadcast (val, GET_MODE_BITSIZE (HImode),
 			      val_broadcast))
     broadcast_mode = HImode;
-  else if (ix86_broadcast (val, GET_MODE_BITSIZE (SImode),
+  /* vbroadcasts[sd] only support memory operand w/o AVX2.
+     When msize == 16, pshufs is used for vec_duplicate.
+     when msize == 64, vpbroadcastd is used, and TARGET_AVX512F must be existed.  */
+  else if ((msize != 32 || TARGET_AVX2)
+	   && ix86_broadcast (val, GET_MODE_BITSIZE (SImode),
 			   val_broadcast))
     broadcast_mode = SImode;
-  else if (TARGET_64BIT
+  else if (TARGET_64BIT && (msize != 32 || TARGET_AVX2)
 	   && ix86_broadcast (val, GET_MODE_BITSIZE (DImode),
 			      val_broadcast))
     broadcast_mode = DImode;
@@ -596,23 +607,17 @@  ix86_broadcast_from_constant (machine_mode mode, rtx op)
       && INTEGRAL_MODE_P (mode))
     return nullptr;
 
+  unsigned int msize = GET_MODE_SIZE (mode);
+  unsigned int inner_size = GET_MODE_SIZE (GET_MODE_INNER ((mode)));
+
   /* Convert CONST_VECTOR to a non-standard SSE constant integer
      broadcast only if vector broadcast is available.  */
-  if (!(TARGET_AVX2
-	|| (TARGET_AVX
-	    && (GET_MODE_INNER (mode) == SImode
-		|| GET_MODE_INNER (mode) == DImode))
-	|| FLOAT_MODE_P (mode))
-      || standard_sse_constant_p (op, mode))
+  if (standard_sse_constant_p (op, mode))
     return nullptr;
 
-  /* Don't broadcast from a 64-bit integer constant in 32-bit mode.
-     We can still put 64-bit integer constant in memory when
-     avx512 embed broadcast is available.  */
-  if (GET_MODE_INNER (mode) == DImode && !TARGET_64BIT
-      && (!TARGET_AVX512F
-	  || (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512)
-	  || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL)))
+  /* vpbroadcast[b,w] is available under TARGET_AVX2.
+     or TARGET_AVX512BW for zmm.  */
+  if (inner_size < 4 && !(msize == 64 ? TARGET_AVX512BW : TARGET_AVX2))
     return nullptr;
 
   if (GET_MODE_INNER (mode) == TImode)
@@ -710,7 +715,14 @@  ix86_expand_vector_move (machine_mode mode, rtx operands[])
 	     constant or scalar mem.  */
 	  op1 = gen_reg_rtx (mode);
 	  if (FLOAT_MODE_P (mode)
-	      || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode))
+	      || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)
+	      /* vbroadcastss/vbroadcastsd only supports memory operand
+		 w/o AVX2, force them into memory to avoid spill to
+		 memory.  */
+	      || (GET_MODE_SIZE (mode) == 32
+		  && (GET_MODE_INNER (mode) == DImode
+		      || GET_MODE_INNER (mode) == SImode)
+		  && !TARGET_AVX2))
 	    first = force_const_mem (GET_MODE_INNER (mode), first);
 	  bool ok = ix86_expand_vector_init_duplicate (false, mode,
 						       op1, first);
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7a.c b/gcc/testsuite/gcc.target/i386/pr100865-7a.c
index f6f2be91120..7de7d4a3ce3 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7a.c
@@ -11,7 +11,6 @@  foo (void)
     array[i] = -45;
 }
 
-/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1  } } */
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */
-/* { dg-final { scan-assembler-not "vpbroadcastq" { target ia32 } } } */
 /* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7c.c b/gcc/testsuite/gcc.target/i386/pr100865-7c.c
index 4d50bb7e2f6..edbfd5b09ed 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7c.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7c.c
@@ -11,7 +11,6 @@  foo (void)
     array[i] = -45;
 }
 
-/* { dg-final { scan-assembler-times "vbroadcastsd" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vbroadcastsd" 1  } } */
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */
-/* { dg-final { scan-assembler-not "vbroadcastsd" { target ia32 } } } */
 /* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr112992.c b/gcc/testsuite/gcc.target/i386/pr112992.c
new file mode 100644
index 00000000000..743e64dccba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112992.c
@@ -0,0 +1,30 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx -mno-avx2 -O2 " } */
+/* { dg-final { scan-assembler-not {(?n)(%rsp)} } } */
+
+typedef unsigned long long v4di __attribute((vector_size(32)));
+typedef unsigned int v8si __attribute((vector_size(32)));
+typedef unsigned short v16hi __attribute((vector_size(32)));
+typedef unsigned char v32qi __attribute((vector_size(32)));
+
+#define MASK  0x01010101
+#define MASKL 0x0101010101010101ULL
+#define MASKS 0x0101
+
+v4di fooq() {
+  return (v4di){MASKL,MASKL,MASKL,MASKL};
+}
+
+v8si food() {
+  return (v8si){MASK,MASK,MASK,MASK,MASK,MASK,MASK,MASK};
+}
+
+v16hi foow() {
+  return (v16hi){MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,
+    MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS};
+}
+
+v32qi foob() {
+  return (v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+}