diff mbox

[RFC] Reducing GCC initialization time by merging builtins

Message ID CACy_Q+wT_ri=dPegZmBkYpAp_9zRH-wk77DM28iviZfO+YUY2A@mail.gmail.com
State New
Headers show

Commit Message

Maxim Blumental Feb. 24, 2015, 8:26 a.m. UTC
The attached patch illustrates the following idea: we can replace
several builtins having the same signature with one auxiliary builtin
with a special argument. This argument will specify which particular
builtin the auxiliary one should expand to. We pull the argument out
at the expand stage and based on its value specify which exactly
builtin we want to get: we add info about ISA restrictions and
according named pattern. And then it is translated as always.

The benefit is that at the initialization time we process as many
builtins as many different signatures there are. If we take three
static arrays in gcc/config/i386/i386.c with builtin descriptions
(namely, bdesc_args, bdesc_special_args, bdesc_round_args) and apply
the idea, we can turn 2163 builtin to only 677 (3.2 times less or
minus 1486).
commit bd37499025b09c9bb0337077040068fcd750c1e5
Author: Maxim Blumenthal <maxim.blumenthal@intel.com>
Date:   Mon Feb 16 16:14:06 2015 +0300

    Here we combine several builtins having the same
    signature by introducing one auxiliary builtin with
    a special argument which will specify what particular
    builtin the auxiliary one should expand to. We pull
    the argument out at the expand stage and based on its
    value specify information about the builtin we want
    to get, i.e. ISA mask, named pattern, etc.
diff mbox

Patch

diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h
index 4da5250..a4e8edf 100644
--- a/gcc/config/i386/avx512cdintrin.h
+++ b/gcc/config/i386/avx512cdintrin.h
@@ -34,6 +34,9 @@ 
 #define __DISABLE_AVX512CD__
 #endif /* __AVX512CD__ */
 
+/*Opcode argument to __builtin_ia32_v8di_ftype_qi*/
+#define BROADCASTMB512 0
+
 /* Internal data types for implementing the intrinsics.  */
 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
 typedef int __v16si __attribute__ ((__vector_size__ (64)));
@@ -166,7 +169,7 @@  extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcastmb_epi64 (__mmask8 __A)
 {
-  return (__m512i) __builtin_ia32_broadcastmb512 (__A);
+  return (__m512i) __builtin_ia32_v8di_ftype_qi (__A, BROADCASTMB512);
 }
 
 extern __inline __m512i
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index b36ef48..053a391 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -34,6 +34,9 @@ 
 #define __DISABLE_AVX512DQ__
 #endif /* __AVX512DQ__ */
 
+/*Opcode argument to __builtin_ia32_v8di_ftype_qi*/
+#define CVTMASK2Q512 1
+
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_f64x2 (__m128d __A)
@@ -534,7 +537,7 @@  extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_movm_epi64 (__mmask8 __A)
 {
-  return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
+  return (__m512i) __builtin_ia32_v8di_ftype_qi (__A, CVTMASK2Q512);
 }
 
 extern __inline __m512i
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 864d0ea..d59a88e 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -686,6 +686,7 @@  DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI)
 DEF_FUNCTION_TYPE (QI, V4SF, V4SF, INT, QI, INT)
 DEF_FUNCTION_TYPE (V16SI, HI)
 DEF_FUNCTION_TYPE (V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, QI, INT)
 
 DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT)
 DEF_FUNCTION_TYPE (UCHAR, UINT, UINT, UINT)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 71a5b22..9d38151 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -30497,9 +30497,17 @@  enum ix86_builtins
   IX86_BUILTIN_READ_FLAGS,
   IX86_BUILTIN_WRITE_FLAGS,
 
+  IX86_BUILTIN_V8DI_FTYPE_QI,
   IX86_BUILTIN_MAX
 };
 
+enum V8DI_FTYPE_QI_type
+{
+  BROADCASTMB512,
+  CVTMASK2Q512,
+  V8DI_FTYPE_QI_type_MAX
+};
+
 /* Table for the ix86 builtin decls.  */
 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
 
@@ -31878,7 +31886,7 @@  static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
-  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
+  { 0, CODE_FOR_nothing, "__builtin_ia32_v8di_ftype_qi", IX86_BUILTIN_V8DI_FTYPE_QI, UNKNOWN, (int) V8DI_FTYPE_QI_INT },
   { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
@@ -32795,7 +32803,6 @@  static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
   { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
-  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
 
   /* AVX512BW.  */
   { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
@@ -33389,6 +33396,11 @@  static const struct builtin_description bdesc_tm[] =
   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
 };
 
+static const struct builtin_description bdesc_united[] =
+{
+  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_v8di_ftype_qi", IX86_BUILTIN_V8DI_FTYPE_QI, UNKNOWN, V8DI_FTYPE_QI },
+  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_v8di_ftype_qi", IX86_BUILTIN_V8DI_FTYPE_QI, UNKNOWN, V8DI_FTYPE_QI }
+};
 /* TM callbacks.  */
 
 /* Return the builtin decl needed to load a vector of TYPE.  */
@@ -36505,7 +36517,6 @@  ix86_expand_args_builtin (const struct builtin_description *d,
   machine_mode rmode = VOIDmode;
   bool swap = false;
   enum rtx_code comparison = d->comparison;
-
   switch ((enum ix86_builtin_func_type) d->flag)
     {
     case V2DF_FTYPE_V2DF_ROUND:
@@ -36806,6 +36817,7 @@  ix86_expand_args_builtin (const struct builtin_description *d,
     case HI_FTYPE_HI_INT:
     case QI_FTYPE_V4SF_INT:
     case QI_FTYPE_V2DF_INT:
+    case V8DI_FTYPE_QI_INT:
       nargs = 2;
       nargs_constant = 1;
       break;
@@ -40006,12 +40018,37 @@  addcarryx:
     if (d->code == fcode)
       switch (fcode)
 	{
-	case IX86_BUILTIN_FABSQ:
+        case IX86_BUILTIN_V8DI_FTYPE_QI:
+          {
+            tree opcode_arg_tree = CALL_EXPR_ARG (exp, 1);
+            rtx opcode_arg_rtx = expand_normal (opcode_arg_tree);
+            int opcode = INTVAL(opcode_arg_rtx);
+            switch((enum V8DI_FTYPE_QI_type)opcode)
+              {
+              case BROADCASTMB512:
+                {
+                  ix86_builtins_isa[d->code].isa = OPTION_MASK_ISA_AVX512CD;
+                  return ix86_expand_args_builtin (&bdesc_united[opcode], exp, target);
+                }
+              case CVTMASK2Q512:
+                {
+                  ix86_builtins_isa[d->code].isa = OPTION_MASK_ISA_AVX512DQ;
+                  return ix86_expand_args_builtin (&bdesc_united[opcode], exp, target);
+                }
+              default:
+                error ("Invalid second argument \"%d\" was passed to \"__builtin_ia32_v8di_ftype_qi\". \
+                       Only values from \"0\" to \"%d\" are mapped to instructions.",
+                       opcode, (int)V8DI_FTYPE_QI_type_MAX - 1);
+	        return const0_rtx;
+              }
+            break;
+          }
+        case IX86_BUILTIN_FABSQ:
 	case IX86_BUILTIN_COPYSIGNQ:
 	  if (!TARGET_SSE)
 	    /* Emit a normal call if SSE isn't available.  */
 	    return expand_call (exp, target, ignore);
-	default:
+        default:
 	  return ix86_expand_args_builtin (d, exp, target);
 	}
 
diff --git a/gcc/testsuite/gcc.target/i386/testimm-11.c b/gcc/testsuite/gcc.target/i386/testimm-11.c
new file mode 100644
index 0000000..bb7db92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/testimm-11.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx512f" } */
+
+#include <x86intrin.h>
+
+__m512i m512i;
+__mmask8 mmask8;
+
+void
+test (void)
+{
+  m512i = __builtin_ia32_v8di_ftype_qi (mmask8, -1); /* { dg-error "Invalid second argument" } */
+}