diff mbox

Improve XMM16+ handling in vec_set*

Message ID 20160518210139.GW28550@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek May 18, 2016, 9:01 p.m. UTC
Hi!

vinserti32x4 is in AVX512VL.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-05-18  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (vec_set_lo_v16hi, vec_set_hi_v16hi,
	vec_set_lo_v32qi, vec_set_hi_v32qi): Add alternative with
	v constraint instead of x and vinserti32x4 insn.

	* gcc.target/i386/avx512vl-vinserti32x4-3.c: New test.


	Jakub

Comments

Kirill Yukhin May 22, 2016, 7:41 a.m. UTC | #1
Hello,
On 18 May 23:01, Jakub Jelinek wrote:
> Hi!
> 
> vinserti32x4 is in AVX512VL.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.

> 
> 2016-05-18  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* config/i386/sse.md (vec_set_lo_v16hi, vec_set_hi_v16hi,
> 	vec_set_lo_v32qi, vec_set_hi_v32qi): Add alternative with
> 	v constraint instead of x and vinserti32x4 insn.
> 
> 	* gcc.target/i386/avx512vl-vinserti32x4-3.c: New test.

--
Thanks, K
diff mbox

Patch

--- gcc/config/i386/sse.md.jj	2016-05-18 13:21:35.000000000 +0200
+++ gcc/config/i386/sse.md	2016-05-18 15:02:54.574685438 +0200
@@ -17899,47 +17899,50 @@  (define_insn "vec_set_hi_<mode><mask_nam
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "vec_set_lo_v16hi"
-  [(set (match_operand:V16HI 0 "register_operand" "=x")
+  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
 	(vec_concat:V16HI
-	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
 	  (vec_select:V8HI
-	    (match_operand:V16HI 1 "register_operand" "x")
+	    (match_operand:V16HI 1 "register_operand" "x,v")
 	    (parallel [(const_int 8) (const_int 9)
 		       (const_int 10) (const_int 11)
 		       (const_int 12) (const_int 13)
 		       (const_int 14) (const_int 15)]))))]
   "TARGET_AVX"
-  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  "@vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
+   vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "vex,evex")
    (set_attr "mode" "OI")])
 
 (define_insn "vec_set_hi_v16hi"
-  [(set (match_operand:V16HI 0 "register_operand" "=x")
+  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
 	(vec_concat:V16HI
 	  (vec_select:V8HI
-	    (match_operand:V16HI 1 "register_operand" "x")
+	    (match_operand:V16HI 1 "register_operand" "x,v")
 	    (parallel [(const_int 0) (const_int 1)
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))
-	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_AVX"
-  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  "@
+   vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+   vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "vex,evex")
    (set_attr "mode" "OI")])
 
 (define_insn "vec_set_lo_v32qi"
-  [(set (match_operand:V32QI 0 "register_operand" "=x")
+  [(set (match_operand:V32QI 0 "register_operand" "=x,v")
 	(vec_concat:V32QI
-	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
 	  (vec_select:V16QI
-	    (match_operand:V32QI 1 "register_operand" "x")
+	    (match_operand:V32QI 1 "register_operand" "x,v")
 	    (parallel [(const_int 16) (const_int 17)
 		       (const_int 18) (const_int 19)
 		       (const_int 20) (const_int 21)
@@ -17949,18 +17952,20 @@  (define_insn "vec_set_lo_v32qi"
 		       (const_int 28) (const_int 29)
 		       (const_int 30) (const_int 31)]))))]
   "TARGET_AVX"
-  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  "@
+   vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
+   vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "vex,evex")
    (set_attr "mode" "OI")])
 
 (define_insn "vec_set_hi_v32qi"
-  [(set (match_operand:V32QI 0 "register_operand" "=x")
+  [(set (match_operand:V32QI 0 "register_operand" "=x,v")
 	(vec_concat:V32QI
 	  (vec_select:V16QI
-	    (match_operand:V32QI 1 "register_operand" "x")
+	    (match_operand:V32QI 1 "register_operand" "x,v")
 	    (parallel [(const_int 0) (const_int 1)
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
@@ -17969,13 +17974,15 @@  (define_insn "vec_set_hi_v32qi"
 		       (const_int 10) (const_int 11)
 		       (const_int 12) (const_int 13)
 		       (const_int 14) (const_int 15)]))
-	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_AVX"
-  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  "@
+   vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+   vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "vex,evex")
    (set_attr "mode" "OI")])
 
 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
--- gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-3.c.jj	2016-05-18 15:06:44.517541398 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-3.c	2016-05-18 15:31:00.918492975 +0200
@@ -0,0 +1,49 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512vl -masm=att" } */
+
+typedef char V1 __attribute__((vector_size (32)));
+typedef short V2 __attribute__((vector_size (32)));
+
+void
+f1 (V1 x, char y)
+{
+  register V1 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a[7] = y;
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (V1 x, char y)
+{
+  register V1 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a[28] = y;
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f3 (V2 x, short y)
+{
+  register V2 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a[3] = y;
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (V2 x, short y)
+{
+  register V2 a __asm ("xmm16");
+  a = x;
+  asm volatile ("" : "+v" (a));
+  a[14] = y;
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\r]*0x0\[^\n\r]*%ymm16" 2 } } */
+/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\r]*0x1\[^\n\r]*%ymm16" 2 } } */
+/* { dg-final { scan-assembler-times "vextracti32x4\[^\n\r]*0x1\[^\n\r]*%\[yz]mm16" 2 } } */