diff mbox

Allow XMM16-XMM31 in vpbroadcast*

Message ID 20160513172034.GT28550@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek May 13, 2016, 5:20 p.m. UTC
Hi!

These insns are either AVX512VL or AVX512VL & BW, this patch allows using
XMM16+ where possible.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-05-13  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (pbroadcast_evex_isa): New mode attr.
	(avx2_pbroadcast<mode>): Add another alternative with v instead
	of x constraints in it, using <pbroadcast_evex_isa> isa.
	(avx2_pbroadcast<mode>_1): Similarly, add two such alternatives.

	* gcc.target/i386/avx512bw-vpbroadcast-1.c: New test.
	* gcc.target/i386/avx512bw-vpbroadcast-2.c: New test.
	* gcc.target/i386/avx512bw-vpbroadcast-3.c: New test.
	* gcc.target/i386/avx512vl-vpbroadcast-1.c: New test.
	* gcc.target/i386/avx512vl-vpbroadcast-2.c: New test.
	* gcc.target/i386/avx512vl-vpbroadcast-3.c: New test.


	Jakub

Comments

Kirill Yukhin May 18, 2016, 8:52 a.m. UTC | #1
On 13 May 19:20, Jakub Jelinek wrote:
> Hi!
> 
> These insns are either AVX512VL or AVX512VL & BW, this patch allows using
> XMM16+ where possible.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2016-05-13  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* config/i386/sse.md (pbroadcast_evex_isa): New mode attr.
> 	(avx2_pbroadcast<mode>): Add another alternative with v instead
> 	of x constraints in it, using <pbroadcast_evex_isa> isa.
> 	(avx2_pbroadcast<mode>_1): Similarly, add two such alternatives.
> 
> 	* gcc.target/i386/avx512bw-vpbroadcast-1.c: New test.
> 	* gcc.target/i386/avx512bw-vpbroadcast-2.c: New test.
> 	* gcc.target/i386/avx512bw-vpbroadcast-3.c: New test.
> 	* gcc.target/i386/avx512vl-vpbroadcast-1.c: New test.
> 	* gcc.target/i386/avx512vl-vpbroadcast-2.c: New test.
> 	* gcc.target/i386/avx512vl-vpbroadcast-3.c: New test.
OK.

--
Thanks, K
diff mbox

Patch

--- gcc/config/i386/sse.md.jj	2016-05-13 16:12:24.631965207 +0200
+++ gcc/config/i386/sse.md	2016-05-13 17:33:32.429909899 +0200
@@ -16725,30 +16725,40 @@  (define_insn "avx_vzeroupper"
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "OI")])
 
+(define_mode_attr pbroadcast_evex_isa
+  [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
+   (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
+   (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
+   (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
+
 (define_insn "avx2_pbroadcast<mode>"
-  [(set (match_operand:VI 0 "register_operand" "=x")
+  [(set (match_operand:VI 0 "register_operand" "=x,v")
 	(vec_duplicate:VI
 	  (vec_select:<ssescalarmode>
-	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
+	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
 	    (parallel [(const_int 0)]))))]
   "TARGET_AVX2"
   "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
-  [(set_attr "type" "ssemov")
+  [(set_attr "isa" "*,<pbroadcast_evex_isa>")
+   (set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "vex,evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx2_pbroadcast<mode>_1"
-  [(set (match_operand:VI_256 0 "register_operand" "=x,x")
+  [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
 	(vec_duplicate:VI_256
 	  (vec_select:<ssescalarmode>
-	    (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
+	    (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
 	    (parallel [(const_int 0)]))))]
   "TARGET_AVX2"
   "@
    vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
+   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
+   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
    vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
-  [(set_attr "type" "ssemov")
+  [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
+   (set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
--- gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-1.c.jj	2016-05-13 16:58:07.491988435 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-1.c	2016-05-13 17:31:29.830534782 +0200
@@ -0,0 +1,104 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_broadcastb_epi8 (a);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */
+
+void
+f2 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_broadcastw_epi16 (a);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */
+
+void
+f3 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_broadcastd_epi32 (a);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */
+
+void
+f4 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_broadcastq_epi64 (a);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastq\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */
+
+void
+f5 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  register __m256i b __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  b = _mm256_broadcastb_epi8 (a);
+  asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */
+
+void
+f6 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  register __m256i b __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  b = _mm256_broadcastw_epi16 (a);
+  asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */
+
+void
+f7 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  register __m256i b __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  b = _mm256_broadcastd_epi32 (a);
+  asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */
+
+void
+f8 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  register __m256i b __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  b = _mm256_broadcastq_epi64 (a);
+  asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastq\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */
--- gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-2.c.jj	2016-05-13 17:23:57.412954445 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-2.c	2016-05-13 17:32:21.203853901 +0200
@@ -0,0 +1,68 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw" } */
+
+typedef char V1 __attribute__((vector_size (16)));
+typedef short V2 __attribute__((vector_size (16)));
+typedef char V5 __attribute__((vector_size (32)));
+typedef short V6 __attribute__((vector_size (32)));
+typedef int V7 __attribute__((vector_size (32)));
+
+void
+f1 (V1 x)
+{
+  register V1 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V1) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */
+
+void
+f2 (V2 x)
+{
+  register V2 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V2) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */
+
+void
+f5 (V5 x)
+{
+  register V5 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V5) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*(xmm16\[^\n\r]*ymm16|ymm16\[^\n\r]*xmm16)" } } */
+
+void
+f6 (V6 x)
+{
+  register V6 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V6) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*(xmm16\[^\n\r]*ymm16|ymm16\[^\n\r]*xmm16)" } } */
+
+void
+f7 (V7 x)
+{
+  register V7 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V7) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*(xmm16\[^\n\r]*ymm16|ymm16\[^\n\r]*xmm16)" } } */
--- gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-3.c.jj	2016-05-13 17:24:01.236896680 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcast-3.c	2016-05-13 17:19:57.000000000 +0200
@@ -0,0 +1,58 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw" } */
+
+typedef char V1 __attribute__((vector_size (16)));
+typedef short V2 __attribute__((vector_size (16)));
+typedef char V5 __attribute__((vector_size (32)));
+typedef short V6 __attribute__((vector_size (32)));
+typedef int V7 __attribute__((vector_size (32)));
+
+void
+f1 (V1 *x)
+{
+  register V1 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V1) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*xmm16" } } */
+
+void
+f2 (V2 *x)
+{
+  register V2 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V2) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*xmm16" } } */
+
+void
+f5 (V5 *x)
+{
+  register V5 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V5) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastb\[^\n\r]*ymm16" } } */
+
+void
+f6 (V6 *x)
+{
+  register V6 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V6) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastw\[^\n\r]*ymm16" } } */
+
+void
+f7 (V7 *x)
+{
+  register V7 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V7) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*ymm16" } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-1.c.jj	2016-05-13 16:58:38.167685897 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-1.c	2016-05-13 17:29:45.144162649 +0200
@@ -0,0 +1,104 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_broadcastb_epi8 (a);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*xmm16" } } */
+
+void
+f2 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_broadcastw_epi16 (a);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*xmm16" } } */
+
+void
+f3 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_broadcastd_epi32 (a);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */
+
+void
+f4 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_broadcastq_epi64 (a);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastq\[^\n\r]*xmm16\[^\n\r]*xmm16" } } */
+
+void
+f5 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  register __m256i b __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  b = _mm256_broadcastb_epi8 (a);
+  asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*\[xy]mm1\[67]" } } */
+
+void
+f6 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  register __m256i b __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  b = _mm256_broadcastw_epi16 (a);
+  asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*\[xy]mm1\[67]" } } */
+
+void
+f7 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  register __m256i b __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  b = _mm256_broadcastd_epi32 (a);
+  asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */
+
+void
+f8 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  register __m256i b __asm ("xmm17");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  b = _mm256_broadcastq_epi64 (a);
+  asm volatile ("" : "+v" (b));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastq\[^\n\r]*(xmm1\[67]\[^\n\r]*ymm1\[67]|ymm1\[67]\[^\n\r]*xmm1\[67])" } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-2.c.jj	2016-05-13 17:24:27.606562792 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-2.c	2016-05-13 17:28:19.176240587 +0200
@@ -0,0 +1,68 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */
+
+typedef char V1 __attribute__((vector_size (16)));
+typedef short V2 __attribute__((vector_size (16)));
+typedef char V5 __attribute__((vector_size (32)));
+typedef short V6 __attribute__((vector_size (32)));
+typedef int V7 __attribute__((vector_size (32)));
+
+void
+f1 (V1 x)
+{
+  register V1 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V1) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*xmm16" } } */
+
+void
+f2 (V2 x)
+{
+  register V2 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V2) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*xmm16" } } */
+
+void
+f5 (V5 x)
+{
+  register V5 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V5) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*\[xy]mm16" } } */
+
+void
+f6 (V6 x)
+{
+  register V6 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V6) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*\[xy]mm16" } } */
+
+void
+f7 (V7 x)
+{
+  register V7 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = __builtin_shuffle (a, (V7) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*(xmm16\[^\n\r]*ymm16|ymm16\[^\n\r]*xmm16)" } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-3.c.jj	2016-05-13 17:24:32.113506886 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcast-3.c	2016-05-13 17:26:31.587970561 +0200
@@ -0,0 +1,58 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */
+
+typedef char V1 __attribute__((vector_size (16)));
+typedef short V2 __attribute__((vector_size (16)));
+typedef char V5 __attribute__((vector_size (32)));
+typedef short V6 __attribute__((vector_size (32)));
+typedef int V7 __attribute__((vector_size (32)));
+
+void
+f1 (V1 *x)
+{
+  register V1 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V1) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*xmm16" } } */
+
+void
+f2 (V2 *x)
+{
+  register V2 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V2) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*xmm16" } } */
+
+void
+f5 (V5 *x)
+{
+  register V5 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V5) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastb\[^\n\r]*ymm16" } } */
+
+void
+f6 (V6 *x)
+{
+  register V6 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V6) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-not "vpbroadcastw\[^\n\r]*ymm16" } } */
+
+void
+f7 (V7 *x)
+{
+  register V7 a __asm ("xmm16");
+  a = __builtin_shuffle (*x, (V7) { 0 });
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\r]*ymm16" } } */