x86: don't use AVX512BW vmovdqu variants without -mavx512bw
diff mbox series

Message ID 5A3101F40200007800197102@prv-mh.provo.novell.com
State New
Headers show
Series
  • x86: don't use AVX512BW vmovdqu variants without -mavx512bw
Related show

Commit Message

Jan Beulich Dec. 13, 2017, 9:33 a.m. UTC
Simply mirror the MODE_XI logic of handling unaligned operands in
mov<mode>_internal into MODE_TI / MODE_OI handling.

gcc/
2017-12-13  Jan Beulich  <jbeulich@suse.com>

	* sse.md (mov<mode>_internal): Tighten condition for when to use
	vmovdqu<ssescalarsize> for TI and OI modes.

gcc/testsuite/
2017-12-13  Jan Beulich  <jbeulich@suse.com>

	* gcc.target/i386/avx512vl-no-vmovdqu8.c,
	gcc.target/i386/avx512vl-no-vmovdqu16.c: New.

---
I'm also being puzzled by the code being generated for the 256-bit cases
(which shouldn't differ much from the 128-bit ones).

Comments

Jan Beulich Jan. 2, 2018, 9:48 a.m. UTC | #1
>>> On 13.12.17 at 10:33,  wrote:
> Simply mirror the MODE_XI logic of handling unaligned operands in
> mov<mode>_internal into MODE_TI / MODE_OI handling.
> 
> gcc/
> 2017-12-13  Jan Beulich  <jbeulich@suse.com>
> 
> 	* sse.md (mov<mode>_internal): Tighten condition for when to use
> 	vmovdqu<ssescalarsize> for TI and OI modes.
> 
> gcc/testsuite/
> 2017-12-13  Jan Beulich  <jbeulich@suse.com>
> 
> 	* gcc.target/i386/avx512vl-no-vmovdqu8.c,
> 	gcc.target/i386/avx512vl-no-vmovdqu16.c: New.
> 
> ---
> I'm also being puzzled by the code being generated for the 256-bit cases
> (which shouldn't differ much from the 128-bit ones).
> 
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -1005,8 +1005,14 @@
>  	case MODE_TI:
>  	  if (misaligned_operand (operands[0], <MODE>mode)
>  	      || misaligned_operand (operands[1], <MODE>mode))
> -	    return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
> -				   : "%vmovdqu\t{%1, %0|%0, %1}";
> +	    return TARGET_AVX512VL
> +		   && (<MODE>mode == V4SImode
> +		       || <MODE>mode == V2DImode
> +		       || <MODE>mode == V8SImode
> +		       || <MODE>mode == V4DImode
> +		       || TARGET_AVX512BW)
> +		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
> +		   : "%vmovdqu\t{%1, %0|%0, %1}";
>  	  else
>  	    return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
>  				   : "%vmovdqa\t{%1, %0|%0, %1}";
> --- a/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu16.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu16.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */
> +
> +typedef unsigned int __attribute__((mode(HI), vector_size(16))) v8hi_t;
> +typedef unsigned int __attribute__((mode(HI), vector_size(32))) v16hi_t;
> +
> +struct s8hi {
> +	int i;
> +	v8hi_t __attribute__((packed)) v;
> +};
> +struct s16hi {
> +	int i;
> +	v16hi_t __attribute__((packed)) v;
> +};
> +
> +void f8hi(struct s8hi*p1, const struct s8hi*p2) {
> +	p1->v += p2->v;
> +}
> +
> +void f16hi(struct s16hi*p1, const struct s16hi*p2) {
> +	p1->v += p2->v;
> +}
> +
> +/* { dg-final { scan-assembler-not "^\[ \t\]*vmovdq\[au\](8|16)" } } */
> --- a/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu8.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu8.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */
> +
> +typedef unsigned int __attribute__((mode(QI), vector_size(16))) v16qi_t;
> +typedef unsigned int __attribute__((mode(QI), vector_size(32))) v32qi_t;
> +
> +struct s16qi {
> +	int i;
> +	v16qi_t __attribute__((packed)) v;
> +};
> +struct s32qi {
> +	int i;
> +	v32qi_t __attribute__((packed)) v;
> +};
> +
> +void f16qi(struct s16qi*p1, const struct s16qi*p2) {
> +	p1->v += p2->v;
> +}
> +
> +void f32qi(struct s32qi*p1, const struct s32qi*p2) {
> +	p1->v += p2->v;
> +}
> +
> +/* { dg-final { scan-assembler-not "^\[ \t\]*vmovdq\[au\](8|16)" } } */
> 
> 
>
Jan Hubicka Jan. 2, 2018, 10:05 a.m. UTC | #2
> >>> On 13.12.17 at 10:33,  wrote:
> > Simply mirror the MODE_XI logic of handling unaligned operands in
> > mov<mode>_internal into MODE_TI / MODE_OI handling.
> > 
> > gcc/
> > 2017-12-13  Jan Beulich  <jbeulich@suse.com>
> > 
> > 	* sse.md (mov<mode>_internal): Tighten condition for when to use
> > 	vmovdqu<ssescalarsize> for TI and OI modes.
> > 
> > gcc/testsuite/
> > 2017-12-13  Jan Beulich  <jbeulich@suse.com>
> > 
> > 	* gcc.target/i386/avx512vl-no-vmovdqu8.c,
> > 	gcc.target/i386/avx512vl-no-vmovdqu16.c: New.

Looks OK. We do not need to update instruction attribute becuase we make no
difference between those instructions and both ends up being ssemov?

Honza
> > 
> > ---
> > I'm also being puzzled by the code being generated for the 256-bit cases
> > (which shouldn't differ much from the 128-bit ones).
> > 
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -1005,8 +1005,14 @@
> >  	case MODE_TI:
> >  	  if (misaligned_operand (operands[0], <MODE>mode)
> >  	      || misaligned_operand (operands[1], <MODE>mode))
> > -	    return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
> > -				   : "%vmovdqu\t{%1, %0|%0, %1}";
> > +	    return TARGET_AVX512VL
> > +		   && (<MODE>mode == V4SImode
> > +		       || <MODE>mode == V2DImode
> > +		       || <MODE>mode == V8SImode
> > +		       || <MODE>mode == V4DImode
> > +		       || TARGET_AVX512BW)
> > +		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
> > +		   : "%vmovdqu\t{%1, %0|%0, %1}";
> >  	  else
> >  	    return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
> >  				   : "%vmovdqa\t{%1, %0|%0, %1}";
> > --- a/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu16.c
> > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu16.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */
> > +
> > +typedef unsigned int __attribute__((mode(HI), vector_size(16))) v8hi_t;
> > +typedef unsigned int __attribute__((mode(HI), vector_size(32))) v16hi_t;
> > +
> > +struct s8hi {
> > +	int i;
> > +	v8hi_t __attribute__((packed)) v;
> > +};
> > +struct s16hi {
> > +	int i;
> > +	v16hi_t __attribute__((packed)) v;
> > +};
> > +
> > +void f8hi(struct s8hi*p1, const struct s8hi*p2) {
> > +	p1->v += p2->v;
> > +}
> > +
> > +void f16hi(struct s16hi*p1, const struct s16hi*p2) {
> > +	p1->v += p2->v;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "^\[ \t\]*vmovdq\[au\](8|16)" } } */
> > --- a/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu8.c
> > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu8.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */
> > +
> > +typedef unsigned int __attribute__((mode(QI), vector_size(16))) v16qi_t;
> > +typedef unsigned int __attribute__((mode(QI), vector_size(32))) v32qi_t;
> > +
> > +struct s16qi {
> > +	int i;
> > +	v16qi_t __attribute__((packed)) v;
> > +};
> > +struct s32qi {
> > +	int i;
> > +	v32qi_t __attribute__((packed)) v;
> > +};
> > +
> > +void f16qi(struct s16qi*p1, const struct s16qi*p2) {
> > +	p1->v += p2->v;
> > +}
> > +
> > +void f32qi(struct s32qi*p1, const struct s32qi*p2) {
> > +	p1->v += p2->v;
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "^\[ \t\]*vmovdq\[au\](8|16)" } } */
> > 
> > 
> > 
> 
>
Jan Beulich Jan. 2, 2018, 10:19 a.m. UTC | #3
>>> On 02.01.18 at 11:05, <hubicka@ucw.cz> wrote:
>> >>> On 13.12.17 at 10:33,  wrote:
>> > Simply mirror the MODE_XI logic of handling unaligned operands in
>> > mov<mode>_internal into MODE_TI / MODE_OI handling.
>> > 
>> > gcc/
>> > 2017-12-13  Jan Beulich  <jbeulich@suse.com>
>> > 
>> > 	* sse.md (mov<mode>_internal): Tighten condition for when to use
>> > 	vmovdqu<ssescalarsize> for TI and OI modes.
>> > 
>> > gcc/testsuite/
>> > 2017-12-13  Jan Beulich  <jbeulich@suse.com>
>> > 
>> > 	* gcc.target/i386/avx512vl-no-vmovdqu8.c,
>> > 	gcc.target/i386/avx512vl-no-vmovdqu16.c: New.
> 
> Looks OK. We do not need to update instruction attribute becuase we make no
> difference between those instructions and both ends up being ssemov?

I think so, yes, but my opinion here is certainly not meaning much.

Jan
Jan Hubicka Jan. 2, 2018, 1:30 p.m. UTC | #4
> >>> On 02.01.18 at 11:05, <hubicka@ucw.cz> wrote:
> >> >>> On 13.12.17 at 10:33,  wrote:
> >> > Simply mirror the MODE_XI logic of handling unaligned operands in
> >> > mov<mode>_internal into MODE_TI / MODE_OI handling.
> >> > 
> >> > gcc/
> >> > 2017-12-13  Jan Beulich  <jbeulich@suse.com>
> >> > 
> >> > 	* sse.md (mov<mode>_internal): Tighten condition for when to use
> >> > 	vmovdqu<ssescalarsize> for TI and OI modes.
> >> > 
> >> > gcc/testsuite/
> >> > 2017-12-13  Jan Beulich  <jbeulich@suse.com>
> >> > 
> >> > 	* gcc.target/i386/avx512vl-no-vmovdqu8.c,
> >> > 	gcc.target/i386/avx512vl-no-vmovdqu16.c: New.
> > 
> > Looks OK. We do not need to update instruction attribute becuase we make no
> > difference between those instructions and both ends up being ssemov?
> 
> I think so, yes, but my opinion here is certainly not meaning much.

Seems to be that case. So patch is OK.
(we do not model avx512 on core or avx256 on Zen for scheduling very well. I guess
we should eventually)

Honza
> 
> Jan

Patch
diff mbox series

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1005,8 +1005,14 @@ 
 	case MODE_TI:
 	  if (misaligned_operand (operands[0], <MODE>mode)
 	      || misaligned_operand (operands[1], <MODE>mode))
-	    return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
-				   : "%vmovdqu\t{%1, %0|%0, %1}";
+	    return TARGET_AVX512VL
+		   && (<MODE>mode == V4SImode
+		       || <MODE>mode == V2DImode
+		       || <MODE>mode == V8SImode
+		       || <MODE>mode == V4DImode
+		       || TARGET_AVX512BW)
+		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
+		   : "%vmovdqu\t{%1, %0|%0, %1}";
 	  else
 	    return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
 				   : "%vmovdqa\t{%1, %0|%0, %1}";
--- a/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu16.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu16.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */
+
+typedef unsigned int __attribute__((mode(HI), vector_size(16))) v8hi_t;
+typedef unsigned int __attribute__((mode(HI), vector_size(32))) v16hi_t;
+
+struct s8hi {
+	int i;
+	v8hi_t __attribute__((packed)) v;
+};
+struct s16hi {
+	int i;
+	v16hi_t __attribute__((packed)) v;
+};
+
+void f8hi(struct s8hi*p1, const struct s8hi*p2) {
+	p1->v += p2->v;
+}
+
+void f16hi(struct s16hi*p1, const struct s16hi*p2) {
+	p1->v += p2->v;
+}
+
+/* { dg-final { scan-assembler-not "^\[ \t\]*vmovdq\[au\](8|16)" } } */
--- a/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu8.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-no-vmovdqu8.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw" } */
+
+typedef unsigned int __attribute__((mode(QI), vector_size(16))) v16qi_t;
+typedef unsigned int __attribute__((mode(QI), vector_size(32))) v32qi_t;
+
+struct s16qi {
+	int i;
+	v16qi_t __attribute__((packed)) v;
+};
+struct s32qi {
+	int i;
+	v32qi_t __attribute__((packed)) v;
+};
+
+void f16qi(struct s16qi*p1, const struct s16qi*p2) {
+	p1->v += p2->v;
+}
+
+void f32qi(struct s32qi*p1, const struct s32qi*p2) {
+	p1->v += p2->v;
+}
+
+/* { dg-final { scan-assembler-not "^\[ \t\]*vmovdq\[au\](8|16)" } } */