diff mbox

[i386,AVX512,82/n] Add gather built-ins.

Message ID 20141022121707.GC11644@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Kirill Yukhin Oct. 22, 2014, 12:17 p.m. UTC
Hello,
This patch introduces built-ins for gather insns.

Bootstrapped.
AVX-512* tests on top of patch-set all pass
under simulator.

Is it ok for trunk?

gcc/
	* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_GATHER3ALTSIV4DF,
	IX86_BUILTIN_GATHER3ALTDIV8SF, IX86_BUILTIN_GATHER3ALTSIV4DI,
	IX86_BUILTIN_GATHER3ALTDIV8SI.
	(ix86_expand_builtin):
	Handle IX86_BUILTIN_GATHER3ALTDIV8SF, IX86_BUILTIN_GATHER3ALTDIV8SI,
	IX86_BUILTIN_SCATTERSIV4DF, IX86_BUILTIN_SCATTERSIV4DI,
	IX86_BUILTIN_SCATTERDIV2DF, IX86_BUILTIN_SCATTERDIV4DF,
	IX86_BUILTIN_GATHER3ALTSIV4DI, IX86_BUILTIN_GATHER3ALTSIV4DF,
	IX86_BUILTIN_SCATTERDIV4DI, IX86_BUILTIN_SCATTERDIV2DI,
	IX86_BUILTIN_GATHER3SIV8SI, IX86_BUILTIN_GATHER3DIV8SI,
	IX86_BUILTIN_GATHER3SIV4DF, IX86_BUILTIN_GATHER3SIV4DI,
	IX86_BUILTIN_GATHER3DIV8SF, IX86_BUILTIN_GATHER3SIV8SF,
	IX86_BUILTIN_GATHER3DIV4DF, IX86_BUILTIN_GATHER3DIV2DF,
	IX86_BUILTIN_GATHER3DIV2DI, IX86_BUILTIN_GATHER3DIV4DI,
	IX86_BUILTIN_SCATTERDIV4SF, IX86_BUILTIN_SCATTERSIV2DI,
	IX86_BUILTIN_GATHER3SIV2DI, IX86_BUILTIN_GATHER3SIV4SI,
	IX86_BUILTIN_GATHER3SIV4SF, IX86_BUILTIN_GATHER3SIV2DF,
	IX86_BUILTIN_SCATTERSIV2DF, IX86_BUILTIN_SCATTERDIV4SI,
	IX86_BUILTIN_SCATTERSIV4SF, IX86_BUILTIN_SCATTERSIV4SI,
	IX86_BUILTIN_SCATTERDIV8SI, IX86_BUILTIN_GATHER3DIV4SI,
	IX86_BUILTIN_SCATTERSIV8SI, IX86_BUILTIN_SCATTERSIV8SF,
	IX86_BUILTIN_GATHER3DIV4SF, IX86_BUILTIN_SCATTERDIV8SF.
	(ix86_vectorize_builtin_gather): Update V2DFmode, V4DFmode, V2DImode,
	V4DImode, V4SFmode, V8SFmode, V4SImode, V8SImode.

--
Thanks, K

Comments

Uros Bizjak Oct. 23, 2014, 5:51 p.m. UTC | #1
On Wed, Oct 22, 2014 at 2:17 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Hello,
> This patch introduces built-ins for gather insns.
>
> Bootstrapped.
> AVX-512* tests on top of patch-set all pass
> under simulator.
>
> Is it ok for trunk?
>
> gcc/
>         * config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_GATHER3ALTSIV4DF,
>         IX86_BUILTIN_GATHER3ALTDIV8SF, IX86_BUILTIN_GATHER3ALTSIV4DI,
>         IX86_BUILTIN_GATHER3ALTDIV8SI.
>         (ix86_expand_builtin):
>         Handle IX86_BUILTIN_GATHER3ALTDIV8SF, IX86_BUILTIN_GATHER3ALTDIV8SI,
>         IX86_BUILTIN_SCATTERSIV4DF, IX86_BUILTIN_SCATTERSIV4DI,
>         IX86_BUILTIN_SCATTERDIV2DF, IX86_BUILTIN_SCATTERDIV4DF,
>         IX86_BUILTIN_GATHER3ALTSIV4DI, IX86_BUILTIN_GATHER3ALTSIV4DF,
>         IX86_BUILTIN_SCATTERDIV4DI, IX86_BUILTIN_SCATTERDIV2DI,
>         IX86_BUILTIN_GATHER3SIV8SI, IX86_BUILTIN_GATHER3DIV8SI,
>         IX86_BUILTIN_GATHER3SIV4DF, IX86_BUILTIN_GATHER3SIV4DI,
>         IX86_BUILTIN_GATHER3DIV8SF, IX86_BUILTIN_GATHER3SIV8SF,
>         IX86_BUILTIN_GATHER3DIV4DF, IX86_BUILTIN_GATHER3DIV2DF,
>         IX86_BUILTIN_GATHER3DIV2DI, IX86_BUILTIN_GATHER3DIV4DI,
>         IX86_BUILTIN_SCATTERDIV4SF, IX86_BUILTIN_SCATTERSIV2DI,
>         IX86_BUILTIN_GATHER3SIV2DI, IX86_BUILTIN_GATHER3SIV4SI,
>         IX86_BUILTIN_GATHER3SIV4SF, IX86_BUILTIN_GATHER3SIV2DF,
>         IX86_BUILTIN_SCATTERSIV2DF, IX86_BUILTIN_SCATTERDIV4SI,
>         IX86_BUILTIN_SCATTERSIV4SF, IX86_BUILTIN_SCATTERSIV4SI,
>         IX86_BUILTIN_SCATTERDIV8SI, IX86_BUILTIN_GATHER3DIV4SI,
>         IX86_BUILTIN_SCATTERSIV8SI, IX86_BUILTIN_SCATTERSIV8SF,
>         IX86_BUILTIN_GATHER3DIV4SF, IX86_BUILTIN_SCATTERDIV8SF.
>         (ix86_vectorize_builtin_gather): Update V2DFmode, V4DFmode, V2DImode,
>         V4DImode, V4SFmode, V8SFmode, V4SImode, V8SImode.

OK.

Thanks,
Uros.
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 87992b0..5869273 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -29632,6 +29632,10 @@  enum ix86_builtins
   IX86_BUILTIN_GATHERALTDIV8SI,
   IX86_BUILTIN_GATHER3ALTDIV16SF,
   IX86_BUILTIN_GATHER3ALTDIV16SI,
+  IX86_BUILTIN_GATHER3ALTSIV4DF,
+  IX86_BUILTIN_GATHER3ALTDIV8SF,
+  IX86_BUILTIN_GATHER3ALTSIV4DI,
+  IX86_BUILTIN_GATHER3ALTDIV8SI,
   IX86_BUILTIN_GATHER3ALTSIV8DF,
   IX86_BUILTIN_GATHER3ALTSIV8DI,
   IX86_BUILTIN_GATHER3DIV16SF,
@@ -38262,6 +38266,66 @@  addcarryx:
     case IX86_BUILTIN_GATHER3ALTDIV16SI:
       icode = CODE_FOR_avx512f_gatherdiv16si;
       goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV2DF:
+      icode = CODE_FOR_avx512vl_gathersiv2df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV4DF:
+      icode = CODE_FOR_avx512vl_gathersiv4df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV2DF:
+      icode = CODE_FOR_avx512vl_gatherdiv2df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV4DF:
+      icode = CODE_FOR_avx512vl_gatherdiv4df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV4SF:
+      icode = CODE_FOR_avx512vl_gathersiv4sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV8SF:
+      icode = CODE_FOR_avx512vl_gathersiv8sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV4SF:
+      icode = CODE_FOR_avx512vl_gatherdiv4sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV8SF:
+      icode = CODE_FOR_avx512vl_gatherdiv8sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV2DI:
+      icode = CODE_FOR_avx512vl_gathersiv2di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV4DI:
+      icode = CODE_FOR_avx512vl_gathersiv4di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV2DI:
+      icode = CODE_FOR_avx512vl_gatherdiv2di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV4DI:
+      icode = CODE_FOR_avx512vl_gatherdiv4di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV4SI:
+      icode = CODE_FOR_avx512vl_gathersiv4si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3SIV8SI:
+      icode = CODE_FOR_avx512vl_gathersiv8si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV4SI:
+      icode = CODE_FOR_avx512vl_gatherdiv4si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3DIV8SI:
+      icode = CODE_FOR_avx512vl_gatherdiv8si;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3ALTSIV4DF:
+      icode = CODE_FOR_avx512vl_gathersiv4df;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3ALTDIV8SF:
+      icode = CODE_FOR_avx512vl_gatherdiv8sf;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3ALTSIV4DI:
+      icode = CODE_FOR_avx512vl_gathersiv4di;
+      goto gather_gen;
+    case IX86_BUILTIN_GATHER3ALTDIV8SI:
+      icode = CODE_FOR_avx512vl_gatherdiv8si;
+      goto gather_gen;
     case IX86_BUILTIN_SCATTERSIV16SF:
       icode = CODE_FOR_avx512f_scattersiv16sf;
       goto scatter_gen;
@@ -38286,7 +38350,54 @@  addcarryx:
     case IX86_BUILTIN_SCATTERDIV8DI:
       icode = CODE_FOR_avx512f_scatterdiv8di;
       goto scatter_gen;
-
+    case IX86_BUILTIN_SCATTERSIV8SF:
+      icode = CODE_FOR_avx512vl_scattersiv8sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV4SF:
+      icode = CODE_FOR_avx512vl_scattersiv4sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV4DF:
+      icode = CODE_FOR_avx512vl_scattersiv4df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV2DF:
+      icode = CODE_FOR_avx512vl_scattersiv2df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV8SF:
+      icode = CODE_FOR_avx512vl_scatterdiv8sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV4SF:
+      icode = CODE_FOR_avx512vl_scatterdiv4sf;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV4DF:
+      icode = CODE_FOR_avx512vl_scatterdiv4df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV2DF:
+      icode = CODE_FOR_avx512vl_scatterdiv2df;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV8SI:
+      icode = CODE_FOR_avx512vl_scattersiv8si;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV4SI:
+      icode = CODE_FOR_avx512vl_scattersiv4si;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV4DI:
+      icode = CODE_FOR_avx512vl_scattersiv4di;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERSIV2DI:
+      icode = CODE_FOR_avx512vl_scattersiv2di;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV8SI:
+      icode = CODE_FOR_avx512vl_scatterdiv8si;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV4SI:
+      icode = CODE_FOR_avx512vl_scatterdiv4si;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV4DI:
+      icode = CODE_FOR_avx512vl_scatterdiv4di;
+      goto scatter_gen;
+    case IX86_BUILTIN_SCATTERDIV2DI:
+      icode = CODE_FOR_avx512vl_scatterdiv2di;
+      goto scatter_gen;
     case IX86_BUILTIN_GATHERPFDPD:
       icode = CODE_FOR_avx512pf_gatherpfv8sidf;
       goto vec_prefetch_gen;
@@ -38350,6 +38461,8 @@  addcarryx:
 	  emit_insn (gen_vec_extract_lo_v16si (half, op2));
 	  op2 = half;
 	  break;
+	case IX86_BUILTIN_GATHER3ALTSIV4DF:
+	case IX86_BUILTIN_GATHER3ALTSIV4DI:
 	case IX86_BUILTIN_GATHERALTSIV4DF:
 	case IX86_BUILTIN_GATHERALTSIV4DI:
 	  half = gen_reg_rtx (V4SImode);
@@ -38377,6 +38490,8 @@  addcarryx:
 	      op3 = half;
 	    }
 	  break;
+	case IX86_BUILTIN_GATHER3ALTDIV8SF:
+	case IX86_BUILTIN_GATHER3ALTDIV8SI:
 	case IX86_BUILTIN_GATHERALTDIV8SF:
 	case IX86_BUILTIN_GATHERALTDIV8SI:
 	  half = gen_reg_rtx (mode0);
@@ -38512,11 +38627,13 @@  addcarryx:
 	    target = gen_reg_rtx (V8SImode);
 	  emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
 	  break;
+	case IX86_BUILTIN_GATHER3DIV8SF:
 	case IX86_BUILTIN_GATHERDIV8SF:
 	  if (target == NULL_RTX)
 	    target = gen_reg_rtx (V4SFmode);
 	  emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
 	  break;
+	case IX86_BUILTIN_GATHER3DIV8SI:
 	case IX86_BUILTIN_GATHERDIV8SI:
 	  if (target == NULL_RTX)
 	    target = gen_reg_rtx (V4SImode);
@@ -39348,28 +39465,52 @@  ix86_vectorize_builtin_gather (const_tree mem_vectype,
   switch (TYPE_MODE (mem_vectype))
     {
     case V2DFmode:
-      code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
+      if (TARGET_AVX512VL)
+	code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
+      else
+	code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
       break;
     case V4DFmode:
-      code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
+      if (TARGET_AVX512VL)
+	code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
+      else
+	code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
       break;
     case V2DImode:
-      code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
+      if (TARGET_AVX512VL)
+	code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
+      else
+	code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
       break;
     case V4DImode:
-      code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
+      if (TARGET_AVX512VL)
+	code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
+      else
+	code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
       break;
     case V4SFmode:
-      code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
+      if (TARGET_AVX512VL)
+	code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
+      else
+	code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
       break;
     case V8SFmode:
-      code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
+      if (TARGET_AVX512VL)
+	code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
+      else
+	code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
       break;
     case V4SImode:
-      code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
+      if (TARGET_AVX512VL)
+	code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
+      else
+	code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
       break;
     case V8SImode:
-      code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
+      if (TARGET_AVX512VL)
+	code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
+      else
+	code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
       break;
     case V8DFmode:
       if (TARGET_AVX512F)