diff mbox

i?86 unaligned/aligned load improvement for AVX512F

Message ID 20140113210551.GF892@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Jan. 13, 2014, 9:05 p.m. UTC
On Mon, Jan 13, 2014 at 07:35:41PM +0100, Uros Bizjak wrote:
> Jakub, do you plan to submit this patch?

That would be following patch then, tested on x86_64-linux.
Unfortunately, it doesn't help for the avx512f-vmovdqu32-1.c
testcase, the thing is that the __m512i type is V8DImode and while
the emitted (unaligned) load is V16SImode, as it is then cast to
V8DImode, combiner combines it into V8DImode load and thus it is
vmovdqu64 anyway.  So not sure if this is worth it, your call...

But, while at it, is there any reason why we treat V64QImode and V32HImode
so badly?  As vec_initv64qi and vec_initv32hi aren't defined, e.g. for the
foo_1 in avx512f-vec-init.c we generate ~ 180 instructions when I'd say
	vmovd	%edi, %xmm0
	vpbroadcastb	%xmm0, %xmm0
	vpbroadcastq	%xmm0, %zmm0
	ret
would do the trick just fine.

2014-01-13  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (*mov<mode>_internal): Only use
	vmovdqa64 or vmovdqu64 instructions for V?DImode, for other
	MODE_VECT_INT modes use vmovdqa32 or vmovdqu32.

	* gcc.target/i386/avx512f-vec-init.c: Expect vmovdqa32 instead
	of vmovdqa64.



	Jakub
diff mbox

Patch

--- gcc/config/i386/sse.md.jj	2014-01-04 10:56:54.795976470 +0100
+++ gcc/config/i386/sse.md	2014-01-13 20:30:04.052499798 +0100
@@ -705,7 +705,14 @@  (define_insn "*mov<mode>_internal"
 		return "vmovapd\t{%g1, %g0|%g0, %g1}";
 	      case MODE_OI:
 	      case MODE_TI:
-		return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+		switch (<MODE>mode)
+		  {
+		  case V4DImode:
+		  case V2DImode:
+		    return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+		  default:
+		    return "vmovdqa32\t{%g1, %g0|%g0, %g1}";
+		  }
 	      default:
 		gcc_unreachable ();
 	      }
@@ -743,9 +750,16 @@  (define_insn "*mov<mode>_internal"
 	case MODE_XI:
 	  if (misaligned_operand (operands[0], <MODE>mode)
 	      || misaligned_operand (operands[1], <MODE>mode))
-	    return "vmovdqu64\t{%1, %0|%0, %1}";
-	  else
+	    {
+	      if (<MODE>mode == V8DImode)
+		return "vmovdqu64\t{%1, %0|%0, %1}";
+	      else
+		return "vmovdqu32\t{%1, %0|%0, %1}";
+	    }
+	  else if (<MODE>mode == V8DImode)
 	    return "vmovdqa64\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovdqa32\t{%1, %0|%0, %1}";
 
 	default:
 	  gcc_unreachable ();
--- gcc/testsuite/gcc.target/i386/avx512f-vec-init.c.jj	2013-12-31 12:51:09.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vec-init.c	2014-01-13 21:42:48.410415601 +0100
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O3 -mavx512f" } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+%zmm" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+%zmm" 2 } } */
 /* { dg-final { scan-assembler-times "vpbroadcastd" 1 } } */
 /* { dg-final { scan-assembler-times "vpbroadcastq" 1 } } */
 /* { dg-final { scan-assembler-times "vpbroadcastb" 2 } } */