diff mbox series

i386: Autovectorize 4-byte vectors

Message ID CAFULd4Ync9T9AVNp+Dj+CvM0m+9A66zT1_n8O06zBHMQb+JtiQ@mail.gmail.com
State New
Headers show
Series i386: Autovectorize 4-byte vectors | expand

Commit Message

Uros Bizjak May 26, 2021, 6:49 p.m. UTC
2021-05-26  Uroš Bizjak  <ubizjak@gmail.com>

gcc/
    * config/i386/i386.c (ix86_autovectorize_vector_modes):
    Add V4QImode and V16QImode for TARGET_SSE2.
    * doc/sourcebuild.texi (Vector-specific attributes):
    Add vect64 and vect32 description.

gcc/testsuite/

    * lib/target-supports.exp (check_effective_target_vect32): New.
    (available_vector_sizes): Append 32 for x86 targets.
    * gcc.dg/vect/pr71264.c (dg-final): Xfail scan dump for vect32 targets.
    * gcc.dg/vect/slp-28.c (dg-final): Adjust dumps for vect32 targets.
    * gcc.dg/vect/slp-3.c (dg-final): Ditto.

    * gcc.target/i386/pr100637-3b.c: New test.
    * gcc.target/i386/pr100637-3w.c: Ditto.
    * gcc.target/i386/pr100637-4b.c: Ditto.
    * gcc.target/i386/pr100637-4w.c: Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 28e6113a609..04649b42122 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22190,12 +22190,15 @@  ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
       modes->safe_push (V16QImode);
       modes->safe_push (V32QImode);
     }
-  else if (TARGET_MMX_WITH_SSE)
+  else if (TARGET_SSE2)
     modes->safe_push (V16QImode);
 
   if (TARGET_MMX_WITH_SSE)
     modes->safe_push (V8QImode);
 
+  if (TARGET_SSE2)
+    modes->safe_push (V4QImode);
+
   return 0;
 }
 
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index cf3098749c0..16c6a3b8e99 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1740,6 +1740,12 @@  circumstances.
 @item vect_variable_length
 Target has variable-length vectors.
 
+@item vect64
+Target supports vectors of 64 bits.
+
+@item vect32
+Target supports vectors of 32 bits.
+
 @item vect_widen_sum_hi_to_si
 Target supports a vector widening summation of @code{short} operands
 into @code{int} results, or can promote (unpack) from @code{short}
diff --git a/gcc/testsuite/gcc.dg/vect/pr71264.c b/gcc/testsuite/gcc.dg/vect/pr71264.c
index dc849bf2797..1381e0ed132 100644
--- a/gcc/testsuite/gcc.dg/vect/pr71264.c
+++ b/gcc/testsuite/gcc.dg/vect/pr71264.c
@@ -19,5 +19,4 @@  void test(uint8_t *ptr, uint8_t *mask)
     }
 }
 
-/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { xfail s390*-*-* sparc*-*-* } } } */
-
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { xfail { { s390*-*-* sparc*-*-* } || vect32 } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-28.c b/gcc/testsuite/gcc.dg/vect/slp-28.c
index 7778bad4465..0bb5f0eb0e4 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-28.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-28.c
@@ -88,6 +88,7 @@  int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect32 } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect32 } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect32 } } } } */
   
diff --git a/gcc/testsuite/gcc.dg/vect/slp-3.c b/gcc/testsuite/gcc.dg/vect/slp-3.c
index 46ab584419a..80ded1840ad 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-3.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-3.c
@@ -141,8 +141,8 @@  int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { ! vect_partial_vectors } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target vect_partial_vectors } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target { ! vect_partial_vectors } } } }*/
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target vect_partial_vectors } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { ! { vect_partial_vectors || vect32 } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_partial_vectors || vect32 } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target { ! { vect_partial_vectors || vect32 } } } } }*/
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { vect_partial_vectors || vect32 } } } } */
   
diff --git a/gcc/testsuite/gcc.target/i386/pr100637-3b.c b/gcc/testsuite/gcc.target/i386/pr100637-3b.c
new file mode 100644
index 00000000000..16df70059a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100637-3b.c
@@ -0,0 +1,56 @@ 
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse4" } */
+
+char r[4], a[4], b[4];
+unsigned char ur[4], ua[4], ub[4];
+
+void maxs (void)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    r[i] = a[i] > b[i] ? a[i] : b[i];
+}
+
+/* { dg-final { scan-assembler "pmaxsb" } } */
+
+void maxu (void)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    ur[i] = ua[i] > ub[i] ? ua[i] : ub[i];
+}
+
+/* { dg-final { scan-assembler "pmaxub" } } */
+
+void mins (void)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    r[i] = a[i] < b[i] ? a[i] : b[i];
+}
+
+/* { dg-final { scan-assembler "pminsb" } } */
+
+void minu (void)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    ur[i] = ua[i] < ub[i] ? ua[i] : ub[i];
+}
+
+/* { dg-final { scan-assembler "pminub" } } */
+
+void _abs (void)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    r[i] = a[i] < 0 ? -a[i] : a[i];
+}
+
+/* { dg-final { scan-assembler "pabsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100637-3w.c b/gcc/testsuite/gcc.target/i386/pr100637-3w.c
new file mode 100644
index 00000000000..7f1882e7a56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100637-3w.c
@@ -0,0 +1,86 @@ 
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse4" } */
+
+short r[2], a[2], b[2];
+unsigned short ur[2], ua[2], ub[2];
+
+void mulh (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    r[i] = ((int) a[i] * b[i]) >> 16;
+}
+
+/* { dg-final { scan-assembler "pmulhw" { xfail *-*-* } } } */
+
+void mulhu (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    ur[i] = ((unsigned int) ua[i] * ub[i]) >> 16;
+}
+
+/* { dg-final { scan-assembler "pmulhuw" { xfail *-*-* } } } */
+
+void mulhrs (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    r[i] = ((((int) a[i] * b[i]) >> 14) + 1) >> 1;
+}
+
+/* { dg-final { scan-assembler "pmulhrsw" } } */
+
+void maxs (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    r[i] = a[i] > b[i] ? a[i] : b[i];
+}
+
+/* { dg-final { scan-assembler "pmaxsw" } } */
+
+void maxu (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    ur[i] = ua[i] > ub[i] ? ua[i] : ub[i];
+}
+
+/* { dg-final { scan-assembler "pmaxuw" } } */
+
+void mins (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    r[i] = a[i] < b[i] ? a[i] : b[i];
+}
+
+/* { dg-final { scan-assembler "pminsw" } } */
+
+void minu (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    ur[i] = ua[i] < ub[i] ? ua[i] : ub[i];
+}
+
+/* { dg-final { scan-assembler "pminuw" } } */
+
+void _abs (void)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    r[i] = a[i] < 0 ? -a[i] : a[i];
+}
+
+/* { dg-final { scan-assembler "pabsw" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100637-4b.c b/gcc/testsuite/gcc.target/i386/pr100637-4b.c
new file mode 100644
index 00000000000..198e3dd3352
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100637-4b.c
@@ -0,0 +1,19 @@ 
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+typedef char T;
+
+#define M 4
+
+extern T a[M], b[M], s1[M], s2[M], r[M];
+
+void foo (void)
+{
+  int j;
+
+  for (j = 0; j < M; j++)
+    r[j] = (a[j] < b[j]) ? s1[j] : s2[j];
+}
+
+/* { dg-final { scan-assembler "pcmpgtb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100637-4w.c b/gcc/testsuite/gcc.target/i386/pr100637-4w.c
new file mode 100644
index 00000000000..0f5dacce906
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100637-4w.c
@@ -0,0 +1,19 @@ 
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+typedef short T;
+
+#define M 2
+
+extern T a[M], b[M], s1[M], s2[M], r[M];
+
+void foo (void)
+{
+  int j;
+
+  for (j = 0; j < M; j++)
+    r[j] = (a[j] < b[j]) ? s1[j] : s2[j];
+}
+
+/* { dg-final { scan-assembler "pcmpgtw" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 849f1bbeda5..7f78c5593ac 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7626,6 +7626,7 @@  proc available_vector_sizes { } {
 	if { ![is-effective-target ia32] } {
 	    lappend result 64
 	}
+	lappend result 32
     } elseif { [istarget sparc*-*-*] } {
 	lappend result 64
     } elseif { [istarget amdgcn*-*-*] } {
@@ -7655,6 +7656,12 @@  proc check_effective_target_vect64 { } {
     return [expr { [lsearch -exact [available_vector_sizes] 64] >= 0 }]
 }
 
+# Return 1 if the target supports vectors of 32 bits.
+
+proc check_effective_target_vect32 { } {
+    return [expr { [lsearch -exact [available_vector_sizes] 32] >= 0 }]
+}
+
 # Return 1 if the target supports vector copysignf calls.
 
 proc check_effective_target_vect_call_copysignf { } {