diff mbox

Improve *vec_concatv2si_sse4_1

Message ID 20160526192442.GC28550@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek May 26, 2016, 7:24 p.m. UTC
On Thu, May 26, 2016 at 07:39:01PM +0200, Uros Bizjak wrote:
> On Thu, May 26, 2016 at 7:05 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> > Hi!
> >
> > This patch adds an avx512dq alternative (EVEX vpinsrd requires that) and
> > enables EVEX vmovd and vpunpckldq.
> >
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> >
> > 2016-05-26  Jakub Jelinek  <jakub@redhat.com>
> >
> >         * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
> >         alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
> >         alternative to v=rm,C.
> >
> >         * gcc.target/i386/avx512dq-concatv2si-1.c: New test.
> >         * gcc.target/i386/avx512vl-concatv2si-1.c: New test.
> 
> Ouch, I have just changed these mega strings in attribute definitions
> to something more readable. Can you please redo the attribute part? It
> should be much more pleasant experience than counting all the
> commas...).

Here is updated version of this patch (the other two pending sse.md patches
from me still apply cleanly):

2016-05-26  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
	alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
	alternative to v=rm,C.

	* gcc.target/i386/avx512dq-concatv2si-1.c: New test.
	* gcc.target/i386/avx512vl-concatv2si-1.c: New test.



	Jakub

Comments

Uros Bizjak June 2, 2016, 10:49 a.m. UTC | #1
On Thu, May 26, 2016 at 9:24 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, May 26, 2016 at 07:39:01PM +0200, Uros Bizjak wrote:
>> On Thu, May 26, 2016 at 7:05 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>> > Hi!
>> >
>> > This patch adds an avx512dq alternative (EVEX vpinsrd requires that) and
>> > enables EVEX vmovd and vpunpckldq.
>> >
>> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>> >
>> > 2016-05-26  Jakub Jelinek  <jakub@redhat.com>
>> >
>> >         * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
>> >         alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
>> >         alternative to v=rm,C.
>> >
>> >         * gcc.target/i386/avx512dq-concatv2si-1.c: New test.
>> >         * gcc.target/i386/avx512vl-concatv2si-1.c: New test.
>>
>> Ouch, I have just changed these mega strings in attribute definitions
>> to something more readable. Can you please redo the attribute part? It
>> should be much more pleasant experience than counting all the
>> commas...).
>
> Here is updated version of this patch (the other two pending sse.md patches
> from me still apply cleanly):
>
> 2016-05-26  Jakub Jelinek  <jakub@redhat.com>
>
>         * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
>         alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
>         alternative to v=rm,C.
>
>         * gcc.target/i386/avx512dq-concatv2si-1.c: New test.
>         * gcc.target/i386/avx512vl-concatv2si-1.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/sse.md.jj   2016-05-26 10:44:25.000000000 +0200
> +++ gcc/config/i386/sse.md      2016-05-26 14:22:26.819313220 +0200
> @@ -13488,43 +13488,44 @@
>
>  (define_insn "*vec_concatv2si_sse4_1"
>    [(set (match_operand:V2SI 0 "register_operand"
> -         "=Yr,*x,x, Yr,*x,x, x, *y,*y")
> +         "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
>         (vec_concat:V2SI
>           (match_operand:SI 1 "nonimmediate_operand"
> -         "  0, 0,x,  0,0, x,rm,  0,rm")
> +         "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
>           (match_operand:SI 2 "vector_move_operand"
> -         " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
> +         " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
>    "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "@
>     pinsrd\t{$1, %2, %0|%0, %2, 1}
>     pinsrd\t{$1, %2, %0|%0, %2, 1}
>     vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
> +   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
>     punpckldq\t{%2, %0|%0, %2}
>     punpckldq\t{%2, %0|%0, %2}
>     vpunpckldq\t{%2, %1, %0|%0, %1, %2}
>     %vmovd\t{%1, %0|%0, %1}
>     punpckldq\t{%2, %0|%0, %2}
>     movd\t{%1, %0|%0, %1}"
> -  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
> +  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
>     (set (attr "type")
> -     (cond [(eq_attr "alternative" "6")
> +     (cond [(eq_attr "alternative" "7")
>               (const_string "ssemov")
> -           (eq_attr "alternative" "7")
> -             (const_string "mmxcvt")
>             (eq_attr "alternative" "8")
> +             (const_string "mmxcvt")
> +           (eq_attr "alternative" "9")
>               (const_string "mmxmov")
>            ]
>            (const_string "sselog")))
>     (set (attr "prefix_extra")
> -     (if_then_else (eq_attr "alternative" "0,1,2")
> +     (if_then_else (eq_attr "alternative" "0,1,2,3")
>                    (const_string "1")
>                    (const_string "*")))
>     (set (attr "length_immediate")
> -     (if_then_else (eq_attr "alternative" "0,1,2")
> +     (if_then_else (eq_attr "alternative" "0,1,2,3")
>                    (const_string "1")
>                    (const_string "*")))
> -   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
> -   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
> +   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
> +   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
>
>  ;; ??? In theory we can match memory for the MMX alternative, but allowing
>  ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
> --- gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c.jj    2016-05-26 15:14:55.853786550 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c       2016-05-26 15:13:57.000000000 +0200
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */
> +
> +typedef int V __attribute__((vector_size (8)));
> +
> +void
> +f1 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register int b __asm ("xmm17");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  c = (V) { a, b };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
> +
> +void
> +f2 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +void
> +f3 (int x, int *y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, *y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler-times "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" 2 } } */
> --- gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c.jj    2016-05-26 15:15:11.921574803 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c       2016-05-26 15:16:24.936612585 +0200
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */
> +
> +typedef int V __attribute__((vector_size (8)));
> +
> +void
> +f1 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register int b __asm ("xmm17");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  c = (V) { a, b };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
> +
> +void
> +f2 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +void
> +f3 (int x, int *y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, *y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler-not "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
>
>
>         Jakub
diff mbox

Patch

--- gcc/config/i386/sse.md.jj	2016-05-26 10:44:25.000000000 +0200
+++ gcc/config/i386/sse.md	2016-05-26 14:22:26.819313220 +0200
@@ -13488,43 +13488,44 @@ 
 
 (define_insn "*vec_concatv2si_sse4_1"
   [(set (match_operand:V2SI 0 "register_operand"
-	  "=Yr,*x,x, Yr,*x,x, x, *y,*y")
+	  "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
 	(vec_concat:V2SI
 	  (match_operand:SI 1 "nonimmediate_operand"
-	  "  0, 0,x,  0,0, x,rm,  0,rm")
+	  "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
 	  (match_operand:SI 2 "vector_move_operand"
-	  " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
+	  " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
   "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    pinsrd\t{$1, %2, %0|%0, %2, 1}
    pinsrd\t{$1, %2, %0|%0, %2, 1}
    vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
+   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
    punpckldq\t{%2, %0|%0, %2}
    punpckldq\t{%2, %0|%0, %2}
    vpunpckldq\t{%2, %1, %0|%0, %1, %2}
    %vmovd\t{%1, %0|%0, %1}
    punpckldq\t{%2, %0|%0, %2}
    movd\t{%1, %0|%0, %1}"
-  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
+  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
    (set (attr "type")
-     (cond [(eq_attr "alternative" "6")
+     (cond [(eq_attr "alternative" "7")
 	      (const_string "ssemov")
-	    (eq_attr "alternative" "7")
-	      (const_string "mmxcvt")
 	    (eq_attr "alternative" "8")
+	      (const_string "mmxcvt")
+	    (eq_attr "alternative" "9")
 	      (const_string "mmxmov")
 	   ]
 	   (const_string "sselog")))
    (set (attr "prefix_extra")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
 		   (const_string "1")
 		   (const_string "*")))
    (set (attr "length_immediate")
-     (if_then_else (eq_attr "alternative" "0,1,2")
+     (if_then_else (eq_attr "alternative" "0,1,2,3")
 		   (const_string "1")
 		   (const_string "*")))
-   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
-   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
+   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
+   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
 
 ;; ??? In theory we can match memory for the MMX alternative, but allowing
 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
--- gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c.jj	2016-05-26 15:14:55.853786550 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c	2016-05-26 15:13:57.000000000 +0200
@@ -0,0 +1,43 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */
+
+typedef int V __attribute__((vector_size (8)));
+
+void
+f1 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register int b __asm ("xmm17");
+  register V c __asm ("xmm3");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
+
+void
+f2 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+void
+f3 (int x, int *y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-times "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" 2 } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c.jj	2016-05-26 15:15:11.921574803 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c	2016-05-26 15:16:24.936612585 +0200
@@ -0,0 +1,43 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */
+
+typedef int V __attribute__((vector_size (8)));
+
+void
+f1 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register int b __asm ("xmm17");
+  register V c __asm ("xmm3");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
+
+void
+f2 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+void
+f3 (int x, int *y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-not "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */