diff mbox

Improve *vec_concatv2si_sse4_1

Message ID 20160526170545.GZ28550@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek May 26, 2016, 5:05 p.m. UTC
Hi!

This patch adds an avx512dq alternative (EVEX vpinsrd requires that) and
enables EVEX vmovd and vpunpckldq.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-05-26  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
	alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
	alternative to v=rm,C.

	* gcc.target/i386/avx512dq-concatv2si-1.c: New test.
	* gcc.target/i386/avx512vl-concatv2si-1.c: New test.


	Jakub

Comments

Uros Bizjak May 26, 2016, 5:39 p.m. UTC | #1
On Thu, May 26, 2016 at 7:05 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This patch adds an avx512dq alternative (EVEX vpinsrd requires that) and
> enables EVEX vmovd and vpunpckldq.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2016-05-26  Jakub Jelinek  <jakub@redhat.com>
>
>         * config/i386/sse.md (*vec_concatv2si_sse4_1): Add avx512dq v=Yv,rm
>         alternative.  Change x=x,x alternative to v=Yv,Yv and x=rm,C
>         alternative to v=rm,C.
>
>         * gcc.target/i386/avx512dq-concatv2si-1.c: New test.
>         * gcc.target/i386/avx512vl-concatv2si-1.c: New test.

Ouch, I have just changed these mega strings in attribute definitions
to something more readable. Can you please redo the attribute part? It
should be much more pleasant experience than counting all the
commas...).

Uros.

> --- gcc/config/i386/sse.md.jj   2016-05-26 10:44:25.000000000 +0200
> +++ gcc/config/i386/sse.md      2016-05-26 14:22:26.819313220 +0200
> @@ -13339,29 +13339,30 @@ (define_split
>
>  (define_insn "*vec_concatv2si_sse4_1"
>    [(set (match_operand:V2SI 0 "register_operand"
> -         "=Yr,*x,x, Yr,*x,x, x, *y,*y")
> +         "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
>         (vec_concat:V2SI
>           (match_operand:SI 1 "nonimmediate_operand"
> -         "  0, 0,x,  0,0, x,rm,  0,rm")
> +         "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
>           (match_operand:SI 2 "vector_move_operand"
> -         " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
> +         " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
>    "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "@
>     pinsrd\t{$1, %2, %0|%0, %2, 1}
>     pinsrd\t{$1, %2, %0|%0, %2, 1}
>     vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
> +   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
>     punpckldq\t{%2, %0|%0, %2}
>     punpckldq\t{%2, %0|%0, %2}
>     vpunpckldq\t{%2, %1, %0|%0, %1, %2}
>     %vmovd\t{%1, %0|%0, %1}
>     punpckldq\t{%2, %0|%0, %2}
>     movd\t{%1, %0|%0, %1}"
> -  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
> -   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
> -   (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
> -   (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
> -   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
> -   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
> +  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
> +   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
> +   (set_attr "prefix_extra" "1,1,1,1,*,*,*,*,*,*")
> +   (set_attr "length_immediate" "1,1,1,1,*,*,*,*,*,*")
> +   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
> +   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
>
>  ;; ??? In theory we can match memory for the MMX alternative, but allowing
>  ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
> --- gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c.jj    2016-05-26 15:14:55.853786550 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c       2016-05-26 15:13:57.000000000 +0200
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */
> +
> +typedef int V __attribute__((vector_size (8)));
> +
> +void
> +f1 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register int b __asm ("xmm17");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  c = (V) { a, b };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
> +
> +void
> +f2 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +void
> +f3 (int x, int *y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, *y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler-times "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" 2 } } */
> --- gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c.jj    2016-05-26 15:15:11.921574803 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c       2016-05-26 15:16:24.936612585 +0200
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */
> +
> +typedef int V __attribute__((vector_size (8)));
> +
> +void
> +f1 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register int b __asm ("xmm17");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  b = y;
> +  asm volatile ("" : "+v" (a), "+v" (b));
> +  c = (V) { a, b };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
> +
> +void
> +f2 (int x, int y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +void
> +f3 (int x, int *y)
> +{
> +  register int a __asm ("xmm16");
> +  register V c __asm ("xmm3");
> +  a = x;
> +  asm volatile ("" : "+v" (a));
> +  c = (V) { a, *y };
> +  asm volatile ("" : "+v" (c));
> +}
> +
> +/* { dg-final { scan-assembler-not "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
>
>         Jakub
diff mbox

Patch

--- gcc/config/i386/sse.md.jj	2016-05-26 10:44:25.000000000 +0200
+++ gcc/config/i386/sse.md	2016-05-26 14:22:26.819313220 +0200
@@ -13339,29 +13339,30 @@  (define_split
 
 (define_insn "*vec_concatv2si_sse4_1"
   [(set (match_operand:V2SI 0 "register_operand"
-	  "=Yr,*x,x, Yr,*x,x, x, *y,*y")
+	  "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
 	(vec_concat:V2SI
 	  (match_operand:SI 1 "nonimmediate_operand"
-	  "  0, 0,x,  0,0, x,rm,  0,rm")
+	  "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
 	  (match_operand:SI 2 "vector_move_operand"
-	  " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
+	  " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
   "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    pinsrd\t{$1, %2, %0|%0, %2, 1}
    pinsrd\t{$1, %2, %0|%0, %2, 1}
    vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
+   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
    punpckldq\t{%2, %0|%0, %2}
    punpckldq\t{%2, %0|%0, %2}
    vpunpckldq\t{%2, %1, %0|%0, %1, %2}
    %vmovd\t{%1, %0|%0, %1}
    punpckldq\t{%2, %0|%0, %2}
    movd\t{%1, %0|%0, %1}"
-  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
-   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
-   (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
-   (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
-   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
-   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
+  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
+   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "prefix_extra" "1,1,1,1,*,*,*,*,*,*")
+   (set_attr "length_immediate" "1,1,1,1,*,*,*,*,*,*")
+   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
+   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
 
 ;; ??? In theory we can match memory for the MMX alternative, but allowing
 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
--- gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c.jj	2016-05-26 15:14:55.853786550 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-concatv2si-1.c	2016-05-26 15:13:57.000000000 +0200
@@ -0,0 +1,43 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq -masm=att" } */
+
+typedef int V __attribute__((vector_size (8)));
+
+void
+f1 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register int b __asm ("xmm17");
+  register V c __asm ("xmm3");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
+
+void
+f2 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+void
+f3 (int x, int *y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-times "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" 2 } } */
--- gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c.jj	2016-05-26 15:15:11.921574803 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-concatv2si-1.c	2016-05-26 15:16:24.936612585 +0200
@@ -0,0 +1,43 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq -masm=att" } */
+
+typedef int V __attribute__((vector_size (8)));
+
+void
+f1 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register int b __asm ("xmm17");
+  register V c __asm ("xmm3");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b));
+  c = (V) { a, b };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler "vpunpckldq\[^\n\r]*%xmm17\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */
+
+void
+f2 (int x, int y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, y };
+  asm volatile ("" : "+v" (c));
+}
+
+void
+f3 (int x, int *y)
+{
+  register int a __asm ("xmm16");
+  register V c __asm ("xmm3");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  c = (V) { a, *y };
+  asm volatile ("" : "+v" (c));
+}
+
+/* { dg-final { scan-assembler-not "vpinsrd\[^\n\r]*\\\$1\[^\n\r]*%xmm16\[^\n\r]*%xmm3" } } */