diff mbox

Improve whole vector right shift

Message ID 20160504195100.GS26501@tucnak.zalov.cz
State New
Headers show

Commit Message

Jakub Jelinek May 4, 2016, 7:51 p.m. UTC
Hi!

In this case the situation is more complicated, because for
V*HI we need avx512bw and avx512vl, while for V*SI only avx512vl
is needed and both are in the same pattern.  But we already have
a pattern that does the right thing right after the "ashr<mode>3"
- but as it is after it, the "ashr<mode>3" will win during recog
and will limit RA decisions.

The testcase shows that moving the pattern improves it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-05-04  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (<mask_codefor>ashr<mode>3<mask_name>): Move
	before the ashr<mode>3 pattern.

	* gcc.target/i386/avx512bw-vpsraw-3.c: New test.
	* gcc.target/i386/avx512vl-vpsrad-3.c: New test.


	Jakub

Comments

Kirill Yukhin May 6, 2016, 11:49 a.m. UTC | #1
On 04 May 21:51, Jakub Jelinek wrote:
> Hi!
> 
> In this case the situation is more complicated, because for
> V*HI we need avx512bw and avx512vl, while for V*SI only avx512vl
> is needed and both are in the same pattern.  But we already have
> a pattern that does the right thing right after the "ashr<mode>3"
> - but as it is after it, the "ashr<mode>3" will win during recog
> and will limit RA decisions.
> 
> The testcase shows that moving the pattern improves it.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK for trunk.

--
Thanks, K
> 
> 2016-05-04  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* config/i386/sse.md (<mask_codefor>ashr<mode>3<mask_name>): Move
> 	before the ashr<mode>3 pattern.
> 
> 	* gcc.target/i386/avx512bw-vpsraw-3.c: New test.
> 	* gcc.target/i386/avx512vl-vpsrad-3.c: New test.
> 
> --- gcc/config/i386/sse.md.jj	2016-05-04 16:54:31.000000000 +0200
> +++ gcc/config/i386/sse.md	2016-05-04 16:55:31.155848054 +0200
> @@ -10088,6 +10088,20 @@ (define_expand "usadv32qi"
>    DONE;
>  })
>  
> +(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
> +  [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
> +	(ashiftrt:VI24_AVX512BW_1
> +	  (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
> +	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
> +  "TARGET_AVX512VL"
> +  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
> +  [(set_attr "type" "sseishft")
> +   (set (attr "length_immediate")
> +     (if_then_else (match_operand 2 "const_int_operand")
> +       (const_string "1")
> +       (const_string "0")))
> +   (set_attr "mode" "<sseinsnmode>")])
> +
>  (define_insn "ashr<mode>3"
>    [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
>  	(ashiftrt:VI24_AVX2
> @@ -10107,20 +10121,6 @@ (define_insn "ashr<mode>3"
>     (set_attr "prefix" "orig,vex")
>     (set_attr "mode" "<sseinsnmode>")])
>  
> -(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
> -  [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
> -	(ashiftrt:VI24_AVX512BW_1
> -	  (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
> -	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
> -  "TARGET_AVX512VL"
> -  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
> -  [(set_attr "type" "sseishft")
> -   (set (attr "length_immediate")
> -     (if_then_else (match_operand 2 "const_int_operand")
> -       (const_string "1")
> -       (const_string "0")))
> -   (set_attr "mode" "<sseinsnmode>")])
> -
>  (define_insn "<mask_codefor>ashrv2di3<mask_name>"
>    [(set (match_operand:V2DI 0 "register_operand" "=v,v")
>  	(ashiftrt:V2DI
> --- gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c.jj	2016-05-04 17:01:52.332810541 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c	2016-05-04 17:02:56.104966537 +0200
> @@ -0,0 +1,44 @@
> +/* { dg-do assemble { target { avx512bw && { avx512vl && { ! ia32 } } } } } */
> +/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
> +
> +#include <x86intrin.h>
> +
> +void
> +f1 (__m128i x, int y)
> +{
> +  register __m128i a __asm ("xmm16");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  a = _mm_srai_epi16 (a, y);
> +  asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f2 (__m128i x)
> +{
> +  register __m128i a __asm ("xmm16");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  a = _mm_srai_epi16 (a, 16);
> +  asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f3 (__m256i x, int y)
> +{
> +  register __m256i a __asm ("xmm16");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  a = _mm256_srai_epi16 (a, y);
> +  asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f4 (__m256i x)
> +{
> +  register __m256i a __asm ("xmm16");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  a = _mm256_srai_epi16 (a, 16);
> +  asm volatile ("" : "+v" (a));
> +}
> --- gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c.jj	2016-05-04 17:01:58.770725338 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c	2016-05-04 17:00:16.000000000 +0200
> @@ -0,0 +1,44 @@
> +/* { dg-do assemble { target { avx512vl && { ! ia32 } } } } */
> +/* { dg-options "-O2 -mavx512vl" } */
> +
> +#include <x86intrin.h>
> +
> +void
> +f1 (__m128i x, int y)
> +{
> +  register __m128i a __asm ("xmm16");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  a = _mm_srai_epi32 (a, y);
> +  asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f2 (__m128i x)
> +{
> +  register __m128i a __asm ("xmm16");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  a = _mm_srai_epi32 (a, 16);
> +  asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f3 (__m256i x, int y)
> +{
> +  register __m256i a __asm ("xmm16");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  a = _mm256_srai_epi32 (a, y);
> +  asm volatile ("" : "+v" (a));
> +}
> +
> +void
> +f4 (__m256i x)
> +{
> +  register __m256i a __asm ("xmm16");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  a = _mm256_srai_epi32 (a, 16);
> +  asm volatile ("" : "+v" (a));
> +}
> 
> 	Jakub
diff mbox

Patch

--- gcc/config/i386/sse.md.jj	2016-05-04 16:54:31.000000000 +0200
+++ gcc/config/i386/sse.md	2016-05-04 16:55:31.155848054 +0200
@@ -10088,6 +10088,20 @@  (define_expand "usadv32qi"
   DONE;
 })
 
+(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
+  [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
+	(ashiftrt:VI24_AVX512BW_1
+	  (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
+	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+  "TARGET_AVX512VL"
+  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "ashr<mode>3"
   [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
 	(ashiftrt:VI24_AVX2
@@ -10107,20 +10121,6 @@  (define_insn "ashr<mode>3"
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
-  [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
-	(ashiftrt:VI24_AVX512BW_1
-	  (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
-	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
-  "TARGET_AVX512VL"
-  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
-  [(set_attr "type" "sseishft")
-   (set (attr "length_immediate")
-     (if_then_else (match_operand 2 "const_int_operand")
-       (const_string "1")
-       (const_string "0")))
-   (set_attr "mode" "<sseinsnmode>")])
-
 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
   [(set (match_operand:V2DI 0 "register_operand" "=v,v")
 	(ashiftrt:V2DI
--- gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c.jj	2016-05-04 17:01:52.332810541 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c	2016-05-04 17:02:56.104966537 +0200
@@ -0,0 +1,44 @@ 
+/* { dg-do assemble { target { avx512bw && { avx512vl && { ! ia32 } } } } } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x, int y)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_srai_epi16 (a, y);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_srai_epi16 (a, 16);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f3 (__m256i x, int y)
+{
+  register __m256i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm256_srai_epi16 (a, y);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (__m256i x)
+{
+  register __m256i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm256_srai_epi16 (a, 16);
+  asm volatile ("" : "+v" (a));
+}
--- gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c.jj	2016-05-04 17:01:58.770725338 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c	2016-05-04 17:00:16.000000000 +0200
@@ -0,0 +1,44 @@ 
+/* { dg-do assemble { target { avx512vl && { ! ia32 } } } } */
+/* { dg-options "-O2 -mavx512vl" } */
+
+#include <x86intrin.h>
+
+void
+f1 (__m128i x, int y)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_srai_epi32 (a, y);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (__m128i x)
+{
+  register __m128i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm_srai_epi32 (a, 16);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f3 (__m256i x, int y)
+{
+  register __m256i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm256_srai_epi32 (a, y);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (__m256i x)
+{
+  register __m256i a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a = _mm256_srai_epi32 (a, 16);
+  asm volatile ("" : "+v" (a));
+}