diff mbox

[i386,Pointer,Bounds,Checker,31/x] Pointer Bounds Checker builtins for i386 target

Message ID 20140918134734.GA50194@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich Sept. 18, 2014, 1:47 p.m. UTC
On 17 Sep 20:06, Uros Bizjak wrote:
> On Wed, Sep 17, 2014 at 6:31 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:
> 
> >> >> I don't like the way arguments are prepared. For the case above,
> >> >> bnd_ldx should have index_register_operand predicate in its pattern,
> >> >> and this predicate (and its mode) should be checked in the expander
> >> >> code. There are many examples of argument expansion in
> >> >> ix86_expand_builtin function, including how Pmode is handled.
> >> >>
> >> >> Also, please see how target is handled there. Target can be null, so
> >> >> REG_P predicate will crash.
> >> >>
> >> >> You should also select insn patterns depending on BNDmode, not TARGET_64BIT.
> >> >>
> >> >> Please use assign_386_stack_local so stack slots can be shared.
> >> >> SLOT_TEMP is intended for short-lived temporaries, you can introduce
> >> >> new slots if you need more live values at once.
> >> >>
> >> >> Uros.
> >> >
> >> > Thanks for comments!  Here is a new version in which I addressed all your concerns.
> >>
> >> Unfortunately, it doesn't. The patch only fixed one instance w.r.t to
> >> target handling, the one I referred as an example. You still have
> >> unchecked target, at least in IX86_BUILTIN_BNDMK.
> >>
> >> However, you have a general problems in your builtin expansion code,
> >> so please look at how other builtins are handled. E.g.:
> >>
> >>   if (optimize || !target
> >>       || GET_MODE (target) != tmode
> >>       || !register_operand(target, tmode))
> >>     target = gen_reg_rtx (tmode);
> >>
> >> also, here is an example how input operands are prepared:
> >>
> >>       op0 = expand_normal (arg0);
> >>       op1 = expand_normal (arg1);
> >>       op2 = expand_normal (arg2);
> >>       if (!register_operand (op0, Pmode))
> >>     op0 = ix86_zero_extend_to_Pmode (op0);
> >>       if (!register_operand (op1, SImode))
> >>     op1 = copy_to_mode_reg (SImode, op1);
> >>       if (!register_operand (op2, SImode))
> >>     op2 = copy_to_mode_reg (SImode, op2);
> >>
> >> So, Pmode is handled in a special way, even when x32 is not considered.
> >>
> >> BTW: I wonder if word_mode is needed here, Pmode can be SImode with
> >> address prefix (x32).
> >>
> >> Inside the expanders, please use expand_simple_binop and expand_unop
> >> on RTX, not tree expressions. Again, please see many examples.
> >
> > Thank you for additional explanations.  Hope this time I answer your concerns correctly :)
> 
> Yes, this version is MUCH better. There are further comments down the code.
> 
> > 2014-09-17  Ilya Enkovich  <ilya.enkovich@intel.com>
> >
> >         * config/i386/i386-builtin-types.def (BND): New.
> >         (ULONG): New.
> >         (BND_FTYPE_PCVOID_ULONG): New.
> >         (VOID_FTYPE_BND_PCVOID): New.
> >         (VOID_FTYPE_PCVOID_PCVOID_BND): New.
> >         (BND_FTYPE_PCVOID_PCVOID): New.
> >         (BND_FTYPE_PCVOID): New.
> >         (BND_FTYPE_BND_BND): New.
> >         (PVOID_FTYPE_PVOID_PVOID_ULONG): New.
> >         (PVOID_FTYPE_PCVOID_BND_ULONG): New.
> >         (ULONG_FTYPE_VOID): New.
> >         (PVOID_FTYPE_BND): New.
> >         * config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
> >         (ix86_builtins): Add
> >         IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
> >         IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
> >         IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
> >         IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
> >         IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
> >         IX86_BUILTIN_BNDUPPER.
> >         (builtin_isa): Add leaf_p and nothrow_p fields.
> >         (def_builtin): Initialize leaf_p and nothrow_p.
> >         (ix86_add_new_builtins): Handle leaf_p and nothrow_p
> >         flags.
> >         (bdesc_mpx): New.
> >         (bdesc_mpx_const): New.
> >         (ix86_init_mpx_builtins): New.
> >         (ix86_init_builtins): Call ix86_init_mpx_builtins.
> >         (ix86_emit_cmove): New.
> >         (ix86_emit_move_max): New.
> >         (ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
> >         IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
> >         IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
> >         IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
> >         IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
> >         IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.
> >         * config/i386/i386.h (ix86_stack_slot): Added SLOT_BND_STORED.
> 
> ..
> 
> > +       /* We need to move bounds to memory before any computations.  */
> > +       if (!MEM_P (op1))
> > +         {
> > +           m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
> > +           emit_move_insn (m1, op1);
> > +         }
> > +       else
> > +         m1 = op1;
> 
> No negative conditions, please. Just swap the arms of if sentence. It
> is much more readable.
> 
> > +
> > +       /* Generate mem expression to be used for access to LB and UB.  */
> > +       m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
> > +       m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0),
> > +                                                 GET_MODE_SIZE (Pmode)));
> 
> Please use adjust_address  instead of manually producing MEMs.
> 
> > +
> > +       t1 = gen_reg_rtx (Pmode);
> > +
> > +       /* Compute LB.  */
> > +       emit_move_insn (t1, m1h1);
> > +       ix86_emit_move_max (t1, lb);
> > +       emit_move_insn (m1h1, t1);
> > +
> > +       /* Compute UB.  UB is stored in 1's complement form.  Therefore
> > +          we also use max here.  */
> > +       emit_move_insn (t1, m1h2);
> > +       ix86_emit_move_max (t1, ub);
> > +       emit_move_insn (m1h2, t1);
> > +
> > +       op2 = gen_reg_rtx (BNDmode);
> > +       emit_move_insn (op2, m1);
> > +
> > +       return chkp_join_splitted_slot (lb, op2);
> > +      }
> > +
> > +    case IX86_BUILTIN_BNDINT:
> 
> The handling in this builtin looks strange.
> 
> I suggest to do it in a serial way, like this:
> 
> if (MEM_P (op0)
>   m0 = op0;
> else
>   {
>     m0 = stack (SLOT_TEMP)
>     move (op0, m0)
>   }
> 
> move parts of m0 to temporaries.
> 
> if (MEM_P (op1)
>   m1 = op1;
> else
>   {
>     m1 = stack (SLOT_TEMP)
>     move (op1, m1)
>   }
> 
> move parts of m1 to another temporaries.
> 
> Process temporaries.
> 
> res = stack (SLOT_BND_STORED)
> 
> move calculated stuff to res.
> 
> This will ensure that nobody will clobber your stack slot with the
> result, assuming consumer will soon use it. SLOT_TEMP is a short-lived
> slot, and can be reused.

I see returning result in a memory is not a good idea at all since result may be corrupted for consequent buiultin calls.  And there is no reason in a new slot if result is returned on a regsiter and only one slot is used at the same time.  Therefore I removed SLOT_BND_STORED.

> 
> > +      {
> > +       unsigned bndsize = GET_MODE_SIZE (BNDmode);
> > +       unsigned psize = GET_MODE_SIZE (Pmode);
> > +       rtx res, m1, m2, m1h1, m1h2, m2h1, m2h2, t1, t2, rh1, rh2;
> > +
> > +       arg0 = CALL_EXPR_ARG (exp, 0);
> > +       arg1 = CALL_EXPR_ARG (exp, 1);
> > +
> > +       op0 = expand_normal (arg0);
> > +       op1 = expand_normal (arg1);
> > +
> > +       /* We need to move bounds to memory before any computations.  */
> > +       if (!MEM_P (op0))
> > +         {
> > +           m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
> > +           emit_move_insn (m1, op0);
> > +         }
> > +       else
> > +         m1 = op0;
> > +
> > +       if (!MEM_P (op1))
> > +         {
> > +           m2 = assign_386_stack_local (BNDmode,
> > +                                        MEM_P (op0)
> > +                                        ? SLOT_TEMP
> > +                                        : SLOT_BND_STORED);
> > +           emit_move_insn (m2, op1);
> > +         }
> > +       else
> > +         m2 = op1;
> > +
> > +       if (!MEM_P (op0))
> > +         res = m1;
> > +       else if (!MEM_P (op1))
> > +         res = m2;
> > +       else
> > +         res = assign_386_stack_local (BNDmode, SLOT_TEMP);
> 
> > +       /* Generate mem expression to be used for access to LB and UB.  */
> > +       m1h1 = gen_rtx_MEM (Pmode, XEXP (m1, 0));
> > +       m1h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m1, 0), psize));
> > +       m2h1 = gen_rtx_MEM (Pmode, XEXP (m2, 0));
> > +       m2h2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (m2, 0), psize));
> > +       rh1 = gen_rtx_MEM (Pmode, XEXP (res, 0));
> > +       rh2 = gen_rtx_MEM (Pmode, plus_constant (Pmode, XEXP (res, 0), psize));
> 
> Please use adjust_address here.
> 
> > +
> > +       /* Allocate temporaries.  */
> > +       t1 = gen_reg_rtx (Pmode);
> > +       t2 = gen_reg_rtx (Pmode);
> > +
> > +       /* Compute LB.  */
> > +       emit_move_insn (t1, m1h1);
> > +       emit_move_insn (t2, m2h1);
> > +       ix86_emit_move_max (t1, t2);
> > +       emit_move_insn (rh1, t1);
> > +
> > +       /* Compute UB.  UB is stored in 1's complement form.  Therefore
> > +          we also use max here.  */
> > +       emit_move_insn (t1, m1h2);
> > +       emit_move_insn (t2, m2h2);
> > +       ix86_emit_move_max (t1, t2);
> > +       emit_move_insn (rh2, t1);
> > +
> > +       return res;
> > +      }
> > +
> > +    case IX86_BUILTIN_SIZEOF:
> > +      {
> > +       enum machine_mode mode = Pmode;
> 
> No need for the above temporary...
> 
> > +       tree name;
> > +       rtx temp;
> > +
> > +       if (!target
> > +           || GET_MODE (target) != Pmode
> > +           || !register_operand (target, Pmode))
> > +         target = gen_reg_rtx (Pmode);
> > +
> > +       arg0 = CALL_EXPR_ARG (exp, 0);
> > +       gcc_assert (TREE_CODE (arg0) == VAR_DECL);
> > +
> > +       name = DECL_ASSEMBLER_NAME (arg0);
> > +       temp = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
> > +       temp = gen_rtx_UNSPEC (mode, gen_rtvec (1, temp), UNSPEC_SIZEOF);
> 
> Call expander directly, use target as an operand 0. You won't need move below.
> 
> > +       emit_move_insn (target, temp);
> > +
> > +       return target;
> > +      }
> > +
> > +    case IX86_BUILTIN_BNDLOWER:
> > +      {
> > +       rtx mem, hmem;
> > +
> > +       if (!target
> > +           || GET_MODE (target) != Pmode
> > +           || !register_operand (target, Pmode))
> > +         target = gen_reg_rtx (Pmode);
> > +
> > +       arg0 = CALL_EXPR_ARG (exp, 0);
> > +       op0 = expand_normal (arg0);
> > +
> > +       /* We need to move bounds to memory first.  */
> > +       if (!MEM_P (op0))
> > +         {
> > +           mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
> > +           emit_move_insn (mem, op0);
> > +         }
> > +       else
> > +         mem = op0;
> 
> No negative conditions.
> 
> > +       /* Generate mem expression to access LB and load it.  */
> > +       hmem = gen_rtx_MEM (Pmode, XEXP (mem, 0));
> 
> adjust_address again.
> 
> > +       emit_move_insn (target, hmem);
> > +
> > +       return target;
> > +      }
> > +
> > +    case IX86_BUILTIN_BNDUPPER:
> > +      {
> > +       rtx mem, hmem;
> > +
> > +       if (!target
> > +           || GET_MODE (target) != Pmode
> > +           || !register_operand (target, Pmode))
> > +         target = gen_reg_rtx (Pmode);
> > +
> > +       arg0 = CALL_EXPR_ARG (exp, 0);
> > +       op0 = expand_normal (arg0);
> > +
> > +       /* We need to move bounds to memory first.  */
> > +       if (!MEM_P (op0))
> > +         {
> > +           mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
> > +           emit_move_insn (mem, op0);
> > +         }
> > +       else
> > +         mem = op0;
> 
> No negative conditions.
> 
> > +       /* Generate mem expression to access UB and load it.  */
> > +       hmem = gen_rtx_MEM (Pmode,
> > +                           gen_rtx_PLUS (Pmode, XEXP (mem, 0),
> > +                                         GEN_INT (GET_MODE_SIZE (Pmode))));
> 
> adjust_address again.
> 
> > +       emit_move_insn (target, hmem);
> > +
> > +       /* We need to inverse all bits of UB.  */
> > +       emit_move_insn (target, gen_rtx_NOT (Pmode, target));
> 
> Use emit_simple_unop here.
> 
> > +
> > +       return target;
> > +      }
> > +
> >      case IX86_BUILTIN_MASKMOVQ:
> >      case IX86_BUILTIN_MASKMOVDQU:
> >        icode = (fcode == IX86_BUILTIN_MASKMOVQ
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index a38c5d1..ededa67 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -2317,6 +2317,7 @@ enum ix86_stack_slot
> >    SLOT_CW_FLOOR,
> >    SLOT_CW_CEIL,
> >    SLOT_CW_MASK_PM,
> > +  SLOT_BND_STORED,
> >    MAX_386_STACK_LOCALS
> >  };
> >
> 
> Uros.

Thanks for your comments.  Below is a fixed verison.

Ilya
--
2014-09-17  Ilya Enkovich  <ilya.enkovich@intel.com>

	* config/i386/i386-builtin-types.def (BND): New.
	(ULONG): New.
	(BND_FTYPE_PCVOID_ULONG): New.
	(VOID_FTYPE_BND_PCVOID): New.
	(VOID_FTYPE_PCVOID_PCVOID_BND): New.
	(BND_FTYPE_PCVOID_PCVOID): New.
	(BND_FTYPE_PCVOID): New.
	(BND_FTYPE_BND_BND): New.
	(PVOID_FTYPE_PVOID_PVOID_ULONG): New.
	(PVOID_FTYPE_PCVOID_BND_ULONG): New.
	(ULONG_FTYPE_VOID): New.
	(PVOID_FTYPE_BND): New.
	* config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
	(ix86_builtins): Add
	IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
	IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
	IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
	IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
	IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
	IX86_BUILTIN_BNDUPPER.
	(builtin_isa): Add leaf_p and nothrow_p fields.
	(def_builtin): Initialize leaf_p and nothrow_p.
	(ix86_add_new_builtins): Handle leaf_p and nothrow_p
	flags.
	(bdesc_mpx): New.
	(bdesc_mpx_const): New.
	(ix86_init_mpx_builtins): New.
	(ix86_init_builtins): Call ix86_init_mpx_builtins.
	(ix86_emit_cmove): New.
	(ix86_emit_move_max): New.
	(ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
	IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
	IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
	IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
	IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
	IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.

Comments

Uros Bizjak Sept. 18, 2014, 5:33 p.m. UTC | #1
On Thu, Sep 18, 2014 at 3:47 PM, Ilya Enkovich <enkovich.gnu@gmail.com> wrote:

> Thanks for your comments.  Below is a fixed verison.
>
> Ilya
> --
> 2014-09-17  Ilya Enkovich  <ilya.enkovich@intel.com>
>
>         * config/i386/i386-builtin-types.def (BND): New.
>         (ULONG): New.
>         (BND_FTYPE_PCVOID_ULONG): New.
>         (VOID_FTYPE_BND_PCVOID): New.
>         (VOID_FTYPE_PCVOID_PCVOID_BND): New.
>         (BND_FTYPE_PCVOID_PCVOID): New.
>         (BND_FTYPE_PCVOID): New.
>         (BND_FTYPE_BND_BND): New.
>         (PVOID_FTYPE_PVOID_PVOID_ULONG): New.
>         (PVOID_FTYPE_PCVOID_BND_ULONG): New.
>         (ULONG_FTYPE_VOID): New.
>         (PVOID_FTYPE_BND): New.
>         * config/i386/i386.c: Include tree-chkp.h, rtl-chkp.h.
>         (ix86_builtins): Add
>         IX86_BUILTIN_BNDMK, IX86_BUILTIN_BNDSTX,
>         IX86_BUILTIN_BNDLDX, IX86_BUILTIN_BNDCL,
>         IX86_BUILTIN_BNDCU, IX86_BUILTIN_BNDRET,
>         IX86_BUILTIN_BNDNARROW, IX86_BUILTIN_BNDINT,
>         IX86_BUILTIN_SIZEOF, IX86_BUILTIN_BNDLOWER,
>         IX86_BUILTIN_BNDUPPER.
>         (builtin_isa): Add leaf_p and nothrow_p fields.
>         (def_builtin): Initialize leaf_p and nothrow_p.
>         (ix86_add_new_builtins): Handle leaf_p and nothrow_p
>         flags.
>         (bdesc_mpx): New.
>         (bdesc_mpx_const): New.
>         (ix86_init_mpx_builtins): New.
>         (ix86_init_builtins): Call ix86_init_mpx_builtins.
>         (ix86_emit_cmove): New.
>         (ix86_emit_move_max): New.
>         (ix86_expand_builtin): Expand IX86_BUILTIN_BNDMK,
>         IX86_BUILTIN_BNDSTX, IX86_BUILTIN_BNDLDX,
>         IX86_BUILTIN_BNDCL, IX86_BUILTIN_BNDCU,
>         IX86_BUILTIN_BNDRET, IX86_BUILTIN_BNDNARROW,
>         IX86_BUILTIN_BNDINT, IX86_BUILTIN_SIZEOF,
>         IX86_BUILTIN_BNDLOWER, IX86_BUILTIN_BNDUPPER.

OK with a few nits below.

Thanks,
Uros.

>
> diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
> index 35c0035..989297a 100644
> --- a/gcc/config/i386/i386-builtin-types.def
> +++ b/gcc/config/i386/i386-builtin-types.def
> @@ -47,6 +47,7 @@ DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
>  DEF_PRIMITIVE_TYPE (QI, char_type_node)
>  DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
>  DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
> +DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
>  # ??? Logically this should be intDI_type_node, but that maps to "long"
>  # with 64-bit, and that's not how the emmintrin.h is written.  Again,
>  # changing this would change name mangling.
> @@ -60,6 +61,7 @@ DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
>  DEF_PRIMITIVE_TYPE (INT, integer_type_node)
>  DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
>  DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
> +DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
>  DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
>  DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
>  DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
> @@ -806,3 +808,15 @@ DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
>  DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
>  DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
>  DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
> +
> +# MPX builtins
> +DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
> +DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
> +DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
> +DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
> +DEF_FUNCTION_TYPE (BND, PCVOID)
> +DEF_FUNCTION_TYPE (BND, BND, BND)
> +DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
> +DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
> +DEF_FUNCTION_TYPE (ULONG, VOID)
> +DEF_FUNCTION_TYPE (PVOID, BND)
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index d0f58b1..6082f86 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -85,6 +85,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-vectorizer.h"
>  #include "shrink-wrap.h"
>  #include "builtins.h"
> +#include "tree-chkp.h"
> +#include "rtl-chkp.h"
>
>  static rtx legitimize_dllimport_symbol (rtx, bool);
>  static rtx legitimize_pe_coff_extern_decl (rtx, bool);
> @@ -28775,6 +28777,19 @@ enum ix86_builtins
>    IX86_BUILTIN_XABORT,
>    IX86_BUILTIN_XTEST,
>
> +  /* MPX */
> +  IX86_BUILTIN_BNDMK,
> +  IX86_BUILTIN_BNDSTX,
> +  IX86_BUILTIN_BNDLDX,
> +  IX86_BUILTIN_BNDCL,
> +  IX86_BUILTIN_BNDCU,
> +  IX86_BUILTIN_BNDRET,
> +  IX86_BUILTIN_BNDNARROW,
> +  IX86_BUILTIN_BNDINT,
> +  IX86_BUILTIN_SIZEOF,
> +  IX86_BUILTIN_BNDLOWER,
> +  IX86_BUILTIN_BNDUPPER,
> +
>    /* BMI instructions.  */
>    IX86_BUILTIN_BEXTR32,
>    IX86_BUILTIN_BEXTR64,
> @@ -28848,6 +28863,8 @@ struct builtin_isa {
>    enum ix86_builtin_func_type tcode; /* type to use in the declaration */
>    HOST_WIDE_INT isa;           /* isa_flags this builtin is defined for */
>    bool const_p;                        /* true if the declaration is constant */
> +  bool leaf_p;                 /* true if the declaration has leaf attribute */
> +  bool nothrow_p;              /* true if the declaration has nothrow attribute */
>    bool set_and_not_built_p;
>  };
>
> @@ -28899,6 +28916,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
>           ix86_builtins[(int) code] = NULL_TREE;
>           ix86_builtins_isa[(int) code].tcode = tcode;
>           ix86_builtins_isa[(int) code].name = name;
> +         ix86_builtins_isa[(int) code].leaf_p = false;
> +         ix86_builtins_isa[(int) code].nothrow_p = false;
>           ix86_builtins_isa[(int) code].const_p = false;
>           ix86_builtins_isa[(int) code].set_and_not_built_p = true;
>         }
> @@ -28949,6 +28968,11 @@ ix86_add_new_builtins (HOST_WIDE_INT isa)
>           ix86_builtins[i] = decl;
>           if (ix86_builtins_isa[i].const_p)
>             TREE_READONLY (decl) = 1;
> +         if (ix86_builtins_isa[i].leaf_p)
> +           DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
> +                                                     NULL_TREE);
> +         if (ix86_builtins_isa[i].nothrow_p)
> +           TREE_NOTHROW (decl) = 1;
>         }
>      }
>  }
> @@ -30402,6 +30426,27 @@ static const struct builtin_description bdesc_round_args[] =
>    { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
>  };
>
> +/* Bultins for MPX.  */
> +static const struct builtin_description bdesc_mpx[] =
> +{
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
> +};
> +
> +/* Const builtins for MPX.  */
> +static const struct builtin_description bdesc_mpx_const[] =
> +{
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
> +  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
> +};
> +
>  /* FMA4 and XOP.  */
>  #define MULTI_ARG_4_DF2_DI_I   V2DF_FTYPE_V2DF_V2DF_V2DI_INT
>  #define MULTI_ARG_4_DF2_DI_I1  V4DF_FTYPE_V4DF_V4DF_V4DI_INT
> @@ -31250,6 +31295,67 @@ ix86_init_mmx_sse_builtins (void)
>      }
>  }
>
> +static void
> +ix86_init_mpx_builtins ()
> +{
> +  const struct builtin_description * d;
> +  enum ix86_builtin_func_type ftype;
> +  tree decl;
> +  size_t i;
> +
> +  for (i = 0, d = bdesc_mpx;
> +       i < ARRAY_SIZE (bdesc_mpx);
> +       i++, d++)
> +    {
> +      if (d->name == 0)
> +       continue;
> +
> +      ftype = (enum ix86_builtin_func_type) d->flag;
> +      decl = def_builtin (d->mask, d->name, ftype, d->code);
> +
> +      /* With no leaf and nothrow flags for MPX builtins
> +        abnormal edges may follow its call when setjmp
> +        presents in the function.  Since we may have a lot
> +        of MPX builtins calls it causes lots of useless
> +        edges and enormous PHI nodes.  To avoid this we mark
> +        MPX builtins as leaf and nothrow.  */
> +      if (decl)
> +       {
> +         DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
> +                                                   NULL_TREE);
> +         TREE_NOTHROW (decl) = 1;
> +       }
> +      else
> +       {
> +         ix86_builtins_isa[(int)d->code].leaf_p = true;
> +         ix86_builtins_isa[(int)d->code].nothrow_p = true;
> +       }
> +    }
> +
> +  for (i = 0, d = bdesc_mpx_const;
> +       i < ARRAY_SIZE (bdesc_mpx_const);
> +       i++, d++)
> +    {
> +      if (d->name == 0)
> +       continue;
> +
> +      ftype = (enum ix86_builtin_func_type) d->flag;
> +      decl = def_builtin_const (d->mask, d->name, ftype, d->code);
> +
> +      if (decl)
> +       {
> +         DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
> +                                                   NULL_TREE);
> +         TREE_NOTHROW (decl) = 1;
> +       }
> +      else
> +       {
> +         ix86_builtins_isa[(int)d->code].leaf_p = true;
> +         ix86_builtins_isa[(int)d->code].nothrow_p = true;
> +       }
> +    }
> +}
> +
>  /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
>     to return a pointer to VERSION_DECL if the outcome of the expression
>     formed by PREDICATE_CHAIN is true.  This function will be called during
> @@ -32788,6 +32894,7 @@ ix86_init_builtins (void)
>
>    ix86_init_tm_builtins ();
>    ix86_init_mmx_sse_builtins ();
> +  ix86_init_mpx_builtins ();
>
>    if (TARGET_LP64)
>      ix86_init_builtins_va_builtins_abi ();
> @@ -35053,6 +35160,37 @@ ix86_expand_vec_set_builtin (tree exp)
>    return target;
>  }
>
> +/* Emit conditional move of SRC to DST with condition
> +   OP1 CODE OP2.  */
> +static void
> +ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
> +{
> +  rtx t;
> +
> +  if (TARGET_CMOVE)
> +    {
> +      t = ix86_expand_compare (code, op1, op2);
> +      emit_insn (gen_rtx_SET (VOIDmode, dst,
> +                             gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
> +                                                   src, dst)));
> +    }
> +  else
> +    {
> +      rtx nomove = gen_label_rtx ();
> +      emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
> +                              const0_rtx, GET_MODE (op1), 1, nomove);
> +      emit_move_insn (dst, src);
> +      emit_label (nomove);
> +    }
> +}
> +
> +/* Choose max of DST and SRC and put it to DST.  */
> +static void
> +ix86_emit_move_max (rtx dst, rtx src)
> +{
> +  ix86_emit_cmove (dst, src, LTU, dst, src);
> +}
> +
>  /* Expand an expression EXP that calls a built-in function,
>     with result going to TARGET if that's convenient
>     (and in mode MODE if that's convenient).
> @@ -35118,6 +35256,339 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
>
>    switch (fcode)
>      {
> +    case IX86_BUILTIN_BNDMK:
> +      if (!target
> +         || GET_MODE (target) != BNDmode
> +         || !register_operand (target, BNDmode))
> +       target = gen_reg_rtx (BNDmode);
> +
> +      arg0 = CALL_EXPR_ARG (exp, 0);
> +      arg1 = CALL_EXPR_ARG (exp, 1);
> +
> +      op0 = expand_normal (arg0);
> +      op1 = expand_normal (arg1);
> +
> +      if (!register_operand (op0, Pmode))
> +       op0 = ix86_zero_extend_to_Pmode (op0);
> +      if (!register_operand (op1, Pmode))
> +       op1 = ix86_zero_extend_to_Pmode (op1);
> +
> +      /* Builtin arg1 is size of block but instruction op1 should
> +        be (size - 1).  */
> +      op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
> +                                op1, 1, OPTAB_DIRECT);
> +
> +      emit_insn (BNDmode == BND64mode
> +                 ? gen_bnd64_mk (target, op0, op1)
> +                 : gen_bnd32_mk (target, op0, op1));
> +      return target;
> +
> +    case IX86_BUILTIN_BNDSTX:
> +      arg0 = CALL_EXPR_ARG (exp, 0);
> +      arg1 = CALL_EXPR_ARG (exp, 1);
> +      arg2 = CALL_EXPR_ARG (exp, 2);
> +
> +      op0 = expand_normal (arg0);
> +      op1 = expand_normal (arg1);
> +      op2 = expand_normal (arg2);
> +
> +      if (!register_operand (op0, Pmode))
> +       op0 = ix86_zero_extend_to_Pmode (op0);
> +      if (!register_operand (op1, BNDmode))
> +       op1 = copy_to_mode_reg (BNDmode, op1);
> +      if (!register_operand (op2, Pmode))
> +       op2 = ix86_zero_extend_to_Pmode (op2);
> +
> +      emit_insn (BNDmode == BND64mode
> +                 ? gen_bnd64_stx (op2, op0, op1)
> +                 : gen_bnd32_stx (op2, op0, op1));
> +      return 0;
> +
> +    case IX86_BUILTIN_BNDLDX:
> +      if (!target
> +         || GET_MODE (target) != BNDmode
> +         || !register_operand (target, BNDmode))
> +       target = gen_reg_rtx (BNDmode);
> +
> +      arg0 = CALL_EXPR_ARG (exp, 0);
> +      arg1 = CALL_EXPR_ARG (exp, 1);
> +
> +      op0 = expand_normal (arg0);
> +      op1 = expand_normal (arg1);
> +
> +      if (!register_operand (op0, Pmode))
> +       op0 = ix86_zero_extend_to_Pmode (op0);
> +      if (!register_operand (op1, Pmode))
> +       op1 = ix86_zero_extend_to_Pmode (op1);
> +
> +      emit_insn (BNDmode == BND64mode
> +                ? gen_bnd64_ldx (target, op0, op1)
> +                : gen_bnd32_ldx (target, op0, op1));
> +      return target;
> +
> +    case IX86_BUILTIN_BNDCL:
> +      arg0 = CALL_EXPR_ARG (exp, 0);
> +      arg1 = CALL_EXPR_ARG (exp, 1);
> +
> +      op0 = expand_normal (arg0);
> +      op1 = expand_normal (arg1);
> +
> +      if (!register_operand (op0, Pmode))
> +       op0 = ix86_zero_extend_to_Pmode (op0);
> +      if (!register_operand (op1, BNDmode))
> +       op1 = copy_to_mode_reg (BNDmode, op1);
> +
> +      emit_insn (BNDmode == BND64mode
> +                 ? gen_bnd64_cl (op1, op0)
> +                 : gen_bnd32_cl (op1, op0));
> +      return 0;
> +
> +    case IX86_BUILTIN_BNDCU:
> +      arg0 = CALL_EXPR_ARG (exp, 0);
> +      arg1 = CALL_EXPR_ARG (exp, 1);
> +
> +      op0 = expand_normal (arg0);
> +      op1 = expand_normal (arg1);
> +
> +      if (!register_operand (op0, Pmode))
> +       op0 = ix86_zero_extend_to_Pmode (op0);
> +      if (!register_operand (op1, BNDmode))
> +       op1 = copy_to_mode_reg (BNDmode, op1);
> +
> +      emit_insn (BNDmode == BND64mode
> +                 ? gen_bnd64_cu (op1, op0)
> +                 : gen_bnd32_cu (op1, op0));
> +      return 0;
> +
> +    case IX86_BUILTIN_BNDRET:
> +      arg0 = CALL_EXPR_ARG (exp, 0);
> +      gcc_assert (TREE_CODE (arg0) == SSA_NAME);
> +      target = chkp_get_rtl_bounds (arg0);

Please add vertical space here ...

> +      /* If no bounds were specified for returned value,
> +        then use INIT bounds.  It usually happens when
> +        some built-in function is expanded.  */
> +      if (!target)
> +       {
> +         rtx t1 = gen_reg_rtx (Pmode);
> +         rtx t2 = gen_reg_rtx (Pmode);
> +         target = gen_reg_rtx (BNDmode);
> +         emit_move_insn (t1, const0_rtx);
> +         emit_move_insn (t2, constm1_rtx);
> +         emit_insn (BNDmode == BND64mode
> +                    ? gen_bnd64_mk (target, t1, t2)
> +                    : gen_bnd32_mk (target, t1, t2));
> +       }

... and here.

> +      gcc_assert (target && REG_P (target));
> +      return target;
> +
> +    case IX86_BUILTIN_BNDNARROW:
> +      {
> +       rtx m1, m1h1, m1h2, lb, ub, t1;
> +
> +       /* Return value and lb.  */
> +       arg0 = CALL_EXPR_ARG (exp, 0);
> +       /* Bounds.  */
> +       arg1 = CALL_EXPR_ARG (exp, 1);
> +       /* Size.  */
> +       arg2 = CALL_EXPR_ARG (exp, 2);
> +
> +       lb = expand_normal (arg0);
> +       op1 = expand_normal (arg1);
> +       op2 = expand_normal (arg2);
> +
> +       /* Size was passed but we need to use (size - 1) as for bndmk.  */
> +       op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
> +                                  op2, 1, OPTAB_DIRECT);
> +
> +       /* Add LB to size and inverse to get UB.  */
> +       op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
> +                                  op2, 1, OPTAB_DIRECT);
> +       ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
> +
> +       if (!register_operand (lb, Pmode))
> +         lb = ix86_zero_extend_to_Pmode (lb);
> +       if (!register_operand (op2, Pmode))
> +         ub = ix86_zero_extend_to_Pmode (op2);
> +
> +       /* We need to move bounds to memory before any computations.  */
> +       if (MEM_P (op1))
> +         m1 = op1;
> +       else
> +         {
> +           m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
> +           emit_move_insn (m1, op1);
> +         }
> +
> +       /* Generate mem expression to be used for access to LB and UB.  */
> +       m1h1 = adjust_address (m1, Pmode, 0);
> +       m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
> +
> +       t1 = gen_reg_rtx (Pmode);
> +
> +       /* Compute LB.  */
> +       emit_move_insn (t1, m1h1);
> +       ix86_emit_move_max (t1, lb);
> +       emit_move_insn (m1h1, t1);
> +
> +       /* Compute UB.  UB is stored in 1's complement form.  Therefore
> +          we also use max here.  */
> +       emit_move_insn (t1, m1h2);
> +       ix86_emit_move_max (t1, ub);
> +       emit_move_insn (m1h2, t1);
> +
> +       op2 = gen_reg_rtx (BNDmode);
> +       emit_move_insn (op2, m1);
> +
> +       return chkp_join_splitted_slot (lb, op2);
> +      }
> +
> +    case IX86_BUILTIN_BNDINT:
> +      {
> +       rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
> +
> +       if (!target
> +           || GET_MODE (target) != BNDmode
> +           || !register_operand (target, BNDmode))
> +         target = gen_reg_rtx (BNDmode);
> +
> +       arg0 = CALL_EXPR_ARG (exp, 0);
> +       arg1 = CALL_EXPR_ARG (exp, 1);
> +
> +       op0 = expand_normal (arg0);
> +       op1 = expand_normal (arg1);
> +
> +       res = assign_386_stack_local (BNDmode, SLOT_TEMP);
> +       rh1 = adjust_address (res, Pmode, 0);
> +       rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
> +
> +       /* Put first bounds to temporaries.  */
> +       lb1 = gen_reg_rtx (Pmode);
> +       ub1 = gen_reg_rtx (Pmode);
> +       if (MEM_P (op0))
> +         {
> +           emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
> +           emit_move_insn (ub1, adjust_address (op0, Pmode,
> +                                                GET_MODE_SIZE (Pmode)));
> +         }
> +       else
> +         {
> +           emit_move_insn (res, op0);
> +           emit_move_insn (lb1, rh1);
> +           emit_move_insn (ub1, rh2);
> +         }
> +
> +       /* Put second bounds to temporaries.  */
> +       lb2 = gen_reg_rtx (Pmode);
> +       ub2 = gen_reg_rtx (Pmode);
> +       if (MEM_P (op1))
> +         {
> +           emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
> +           emit_move_insn (ub2, adjust_address (op1, Pmode,
> +                                                GET_MODE_SIZE (Pmode)));
> +         }
> +       else
> +         {
> +           emit_move_insn (res, op1);
> +           emit_move_insn (lb2, rh1);
> +           emit_move_insn (ub2, rh2);
> +         }
> +
> +       /* Compute LB.  */
> +       ix86_emit_move_max (lb1, lb2);
> +       emit_move_insn (rh1, lb1);
> +
> +       /* Compute UB.  UB is stored in 1's complement form.  Therefore
> +          we also use max here.  */
> +       ix86_emit_move_max (ub1, ub2);
> +       emit_move_insn (rh2, ub1);
> +
> +       emit_move_insn (target, res);
> +
> +       return target;
> +      }
> +
> +    case IX86_BUILTIN_SIZEOF:
> +      {
> +       tree name;
> +       rtx symbol;
> +
> +       if (!target
> +           || GET_MODE (target) != Pmode
> +           || !register_operand (target, Pmode))
> +         target = gen_reg_rtx (Pmode);
> +
> +       arg0 = CALL_EXPR_ARG (exp, 0);
> +       gcc_assert (TREE_CODE (arg0) == VAR_DECL);
> +
> +       name = DECL_ASSEMBLER_NAME (arg0);
> +       symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
> +
> +       emit_insn (Pmode == SImode
> +                  ? gen_move_size_reloc_si (target, symbol)
> +                  : gen_move_size_reloc_di (target, symbol));
> +
> +       return target;
> +      }
> +
> +    case IX86_BUILTIN_BNDLOWER:
> +      {
> +       rtx mem, hmem;
> +
> +       if (!target
> +           || GET_MODE (target) != Pmode
> +           || !register_operand (target, Pmode))
> +         target = gen_reg_rtx (Pmode);
> +
> +       arg0 = CALL_EXPR_ARG (exp, 0);
> +       op0 = expand_normal (arg0);
> +
> +       /* We need to move bounds to memory first.  */
> +       if (MEM_P (op0))
> +         mem = op0;
> +       else
> +         {
> +           mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
> +           emit_move_insn (mem, op0);
> +         }
> +
> +       /* Generate mem expression to access LB and load it.  */
> +       hmem = adjust_address (mem, Pmode, 0);
> +       emit_move_insn (target, hmem);
> +
> +       return target;
> +      }
> +
> +    case IX86_BUILTIN_BNDUPPER:
> +      {
> +       rtx mem, hmem, res;
> +
> +       if (!target
> +           || GET_MODE (target) != Pmode
> +           || !register_operand (target, Pmode))
> +         target = gen_reg_rtx (Pmode);
> +
> +       arg0 = CALL_EXPR_ARG (exp, 0);
> +       op0 = expand_normal (arg0);
> +
> +       /* We need to move bounds to memory first.  */
> +       if (MEM_P (op0))
> +         mem = op0;
> +       else
> +         {
> +           mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
> +           emit_move_insn (mem, op0);
> +         }
> +
> +       /* Generate mem expression to access UB.  */
> +       hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));

Vertical space here ...
> +       /* We need to inverse all bits of UB.  */
> +       res = expand_simple_unop (Pmode, NOT, hmem, target, 1);

... and here.

> +       if (res != target)
> +         emit_move_insn (target, res);
> +
> +       return target;
> +      }
> +
>      case IX86_BUILTIN_MASKMOVQ:
>      case IX86_BUILTIN_MASKMOVDQU:
>        icode = (fcode == IX86_BUILTIN_MASKMOVQ
diff mbox

Patch

diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 35c0035..989297a 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -47,6 +47,7 @@  DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
 DEF_PRIMITIVE_TYPE (QI, char_type_node)
 DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
 DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+DEF_PRIMITIVE_TYPE (BND, pointer_bounds_type_node)
 # ??? Logically this should be intDI_type_node, but that maps to "long"
 # with 64-bit, and that's not how the emmintrin.h is written.  Again, 
 # changing this would change name mangling.
@@ -60,6 +61,7 @@  DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
 DEF_PRIMITIVE_TYPE (INT, integer_type_node)
 DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
 DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (ULONG, long_unsigned_type_node)
 DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
 DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
 DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
@@ -806,3 +808,15 @@  DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
 DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
 DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
 DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
+
+# MPX builtins
+DEF_FUNCTION_TYPE (BND, PCVOID, ULONG)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND)
+DEF_FUNCTION_TYPE (VOID, PCVOID, BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID, PCVOID)
+DEF_FUNCTION_TYPE (BND, PCVOID)
+DEF_FUNCTION_TYPE (BND, BND, BND)
+DEF_FUNCTION_TYPE (PVOID, PVOID, PVOID, ULONG)
+DEF_FUNCTION_TYPE (PVOID, PCVOID, BND, ULONG)
+DEF_FUNCTION_TYPE (ULONG, VOID)
+DEF_FUNCTION_TYPE (PVOID, BND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d0f58b1..6082f86 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -85,6 +85,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-vectorizer.h"
 #include "shrink-wrap.h"
 #include "builtins.h"
+#include "tree-chkp.h"
+#include "rtl-chkp.h"
 
 static rtx legitimize_dllimport_symbol (rtx, bool);
 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
@@ -28775,6 +28777,19 @@  enum ix86_builtins
   IX86_BUILTIN_XABORT,
   IX86_BUILTIN_XTEST,
 
+  /* MPX */
+  IX86_BUILTIN_BNDMK,
+  IX86_BUILTIN_BNDSTX,
+  IX86_BUILTIN_BNDLDX,
+  IX86_BUILTIN_BNDCL,
+  IX86_BUILTIN_BNDCU,
+  IX86_BUILTIN_BNDRET,
+  IX86_BUILTIN_BNDNARROW,
+  IX86_BUILTIN_BNDINT,
+  IX86_BUILTIN_SIZEOF,
+  IX86_BUILTIN_BNDLOWER,
+  IX86_BUILTIN_BNDUPPER,
+
   /* BMI instructions.  */
   IX86_BUILTIN_BEXTR32,
   IX86_BUILTIN_BEXTR64,
@@ -28848,6 +28863,8 @@  struct builtin_isa {
   enum ix86_builtin_func_type tcode; /* type to use in the declaration */
   HOST_WIDE_INT isa;		/* isa_flags this builtin is defined for */
   bool const_p;			/* true if the declaration is constant */
+  bool leaf_p;			/* true if the declaration has leaf attribute */
+  bool nothrow_p;		/* true if the declaration has nothrow attribute */
   bool set_and_not_built_p;
 };
 
@@ -28899,6 +28916,8 @@  def_builtin (HOST_WIDE_INT mask, const char *name,
 	  ix86_builtins[(int) code] = NULL_TREE;
 	  ix86_builtins_isa[(int) code].tcode = tcode;
 	  ix86_builtins_isa[(int) code].name = name;
+	  ix86_builtins_isa[(int) code].leaf_p = false;
+	  ix86_builtins_isa[(int) code].nothrow_p = false;
 	  ix86_builtins_isa[(int) code].const_p = false;
 	  ix86_builtins_isa[(int) code].set_and_not_built_p = true;
 	}
@@ -28949,6 +28968,11 @@  ix86_add_new_builtins (HOST_WIDE_INT isa)
 	  ix86_builtins[i] = decl;
 	  if (ix86_builtins_isa[i].const_p)
 	    TREE_READONLY (decl) = 1;
+	  if (ix86_builtins_isa[i].leaf_p)
+	    DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+						      NULL_TREE);
+	  if (ix86_builtins_isa[i].nothrow_p)
+	    TREE_NOTHROW (decl) = 1;
 	}
     }
 }
@@ -30402,6 +30426,27 @@  static const struct builtin_description bdesc_round_args[] =
   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
 };
 
+/* Bultins for MPX.  */
+static const struct builtin_description bdesc_mpx[] =
+{
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
+};
+
+/* Const builtins for MPX.  */
+static const struct builtin_description bdesc_mpx_const[] =
+{
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
+  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
+};
+
 /* FMA4 and XOP.  */
 #define MULTI_ARG_4_DF2_DI_I	V2DF_FTYPE_V2DF_V2DF_V2DI_INT
 #define MULTI_ARG_4_DF2_DI_I1	V4DF_FTYPE_V4DF_V4DF_V4DI_INT
@@ -31250,6 +31295,67 @@  ix86_init_mmx_sse_builtins (void)
     }
 }
 
+static void
+ix86_init_mpx_builtins ()
+{
+  const struct builtin_description * d;
+  enum ix86_builtin_func_type ftype;
+  tree decl;
+  size_t i;
+
+  for (i = 0, d = bdesc_mpx;
+       i < ARRAY_SIZE (bdesc_mpx);
+       i++, d++)
+    {
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      decl = def_builtin (d->mask, d->name, ftype, d->code);
+
+      /* With no leaf and nothrow flags for MPX builtins
+	 abnormal edges may follow its call when setjmp
+	 presents in the function.  Since we may have a lot
+	 of MPX builtins calls it causes lots of useless
+	 edges and enormous PHI nodes.  To avoid this we mark
+	 MPX builtins as leaf and nothrow.  */
+      if (decl)
+	{
+	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+						    NULL_TREE);
+	  TREE_NOTHROW (decl) = 1;
+	}
+      else
+	{
+	  ix86_builtins_isa[(int)d->code].leaf_p = true;
+	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
+	}
+    }
+
+  for (i = 0, d = bdesc_mpx_const;
+       i < ARRAY_SIZE (bdesc_mpx_const);
+       i++, d++)
+    {
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      decl = def_builtin_const (d->mask, d->name, ftype, d->code);
+
+      if (decl)
+	{
+	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
+						    NULL_TREE);
+	  TREE_NOTHROW (decl) = 1;
+	}
+      else
+	{
+	  ix86_builtins_isa[(int)d->code].leaf_p = true;
+	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
+	}
+    }
+}
+
 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
    to return a pointer to VERSION_DECL if the outcome of the expression
    formed by PREDICATE_CHAIN is true.  This function will be called during
@@ -32788,6 +32894,7 @@  ix86_init_builtins (void)
 
   ix86_init_tm_builtins ();
   ix86_init_mmx_sse_builtins ();
+  ix86_init_mpx_builtins ();
 
   if (TARGET_LP64)
     ix86_init_builtins_va_builtins_abi ();
@@ -35053,6 +35160,37 @@  ix86_expand_vec_set_builtin (tree exp)
   return target;
 }
 
+/* Emit conditional move of SRC to DST with condition
+   OP1 CODE OP2.  */
+static void
+ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
+{
+  rtx t;
+
+  if (TARGET_CMOVE)
+    {
+      t = ix86_expand_compare (code, op1, op2);
+      emit_insn (gen_rtx_SET (VOIDmode, dst,
+			      gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
+						    src, dst)));
+    }
+  else
+    {
+      rtx nomove = gen_label_rtx ();
+      emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
+			       const0_rtx, GET_MODE (op1), 1, nomove);
+      emit_move_insn (dst, src);
+      emit_label (nomove);
+    }
+}
+
+/* Choose max of DST and SRC and put it to DST.  */
+static void
+ix86_emit_move_max (rtx dst, rtx src)
+{
+  ix86_emit_cmove (dst, src, LTU, dst, src);
+}
+
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
    (and in mode MODE if that's convenient).
@@ -35118,6 +35256,339 @@  ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
 
   switch (fcode)
     {
+    case IX86_BUILTIN_BNDMK:
+      if (!target
+	  || GET_MODE (target) != BNDmode
+	  || !register_operand (target, BNDmode))
+	target = gen_reg_rtx (BNDmode);
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+
+      if (!register_operand (op0, Pmode))
+	op0 = ix86_zero_extend_to_Pmode (op0);
+      if (!register_operand (op1, Pmode))
+	op1 = ix86_zero_extend_to_Pmode (op1);
+
+      /* Builtin arg1 is size of block but instruction op1 should
+	 be (size - 1).  */
+      op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
+				 op1, 1, OPTAB_DIRECT);
+
+      emit_insn (BNDmode == BND64mode
+                 ? gen_bnd64_mk (target, op0, op1)
+                 : gen_bnd32_mk (target, op0, op1));
+      return target;
+
+    case IX86_BUILTIN_BNDSTX:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+
+      if (!register_operand (op0, Pmode))
+	op0 = ix86_zero_extend_to_Pmode (op0);
+      if (!register_operand (op1, BNDmode))
+	op1 = copy_to_mode_reg (BNDmode, op1);
+      if (!register_operand (op2, Pmode))
+	op2 = ix86_zero_extend_to_Pmode (op2);
+
+      emit_insn (BNDmode == BND64mode
+                 ? gen_bnd64_stx (op2, op0, op1)
+                 : gen_bnd32_stx (op2, op0, op1));
+      return 0;
+
+    case IX86_BUILTIN_BNDLDX:
+      if (!target
+	  || GET_MODE (target) != BNDmode
+	  || !register_operand (target, BNDmode))
+	target = gen_reg_rtx (BNDmode);
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+
+      if (!register_operand (op0, Pmode))
+	op0 = ix86_zero_extend_to_Pmode (op0);
+      if (!register_operand (op1, Pmode))
+	op1 = ix86_zero_extend_to_Pmode (op1);
+
+      emit_insn (BNDmode == BND64mode
+		 ? gen_bnd64_ldx (target, op0, op1)
+		 : gen_bnd32_ldx (target, op0, op1));
+      return target;
+
+    case IX86_BUILTIN_BNDCL:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+
+      if (!register_operand (op0, Pmode))
+	op0 = ix86_zero_extend_to_Pmode (op0);
+      if (!register_operand (op1, BNDmode))
+	op1 = copy_to_mode_reg (BNDmode, op1);
+
+      emit_insn (BNDmode == BND64mode
+                 ? gen_bnd64_cl (op1, op0)
+                 : gen_bnd32_cl (op1, op0));
+      return 0;
+
+    case IX86_BUILTIN_BNDCU:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+
+      if (!register_operand (op0, Pmode))
+	op0 = ix86_zero_extend_to_Pmode (op0);
+      if (!register_operand (op1, BNDmode))
+	op1 = copy_to_mode_reg (BNDmode, op1);
+
+      emit_insn (BNDmode == BND64mode
+                 ? gen_bnd64_cu (op1, op0)
+                 : gen_bnd32_cu (op1, op0));
+      return 0;
+
+    case IX86_BUILTIN_BNDRET:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      gcc_assert (TREE_CODE (arg0) == SSA_NAME);
+      target = chkp_get_rtl_bounds (arg0);
+      /* If no bounds were specified for returned value,
+	 then use INIT bounds.  It usually happens when
+	 some built-in function is expanded.  */
+      if (!target)
+	{
+	  rtx t1 = gen_reg_rtx (Pmode);
+	  rtx t2 = gen_reg_rtx (Pmode);
+	  target = gen_reg_rtx (BNDmode);
+	  emit_move_insn (t1, const0_rtx);
+	  emit_move_insn (t2, constm1_rtx);
+	  emit_insn (BNDmode == BND64mode
+		     ? gen_bnd64_mk (target, t1, t2)
+		     : gen_bnd32_mk (target, t1, t2));
+	}
+      gcc_assert (target && REG_P (target));
+      return target;
+
+    case IX86_BUILTIN_BNDNARROW:
+      {
+	rtx m1, m1h1, m1h2, lb, ub, t1;
+
+	/* Return value and lb.  */
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	/* Bounds.  */
+	arg1 = CALL_EXPR_ARG (exp, 1);
+	/* Size.  */
+	arg2 = CALL_EXPR_ARG (exp, 2);
+
+	lb = expand_normal (arg0);
+	op1 = expand_normal (arg1);
+	op2 = expand_normal (arg2);
+
+	/* Size was passed but we need to use (size - 1) as for bndmk.  */
+	op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
+				   op2, 1, OPTAB_DIRECT);
+
+	/* Add LB to size and inverse to get UB.  */
+	op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
+				   op2, 1, OPTAB_DIRECT);
+	ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
+
+	if (!register_operand (lb, Pmode))
+	  lb = ix86_zero_extend_to_Pmode (lb);
+	if (!register_operand (op2, Pmode))
+	  ub = ix86_zero_extend_to_Pmode (op2);
+
+	/* We need to move bounds to memory before any computations.  */
+	if (MEM_P (op1))
+	  m1 = op1;
+	else
+	  {
+	    m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
+	    emit_move_insn (m1, op1);
+	  }
+
+	/* Generate mem expression to be used for access to LB and UB.  */
+	m1h1 = adjust_address (m1, Pmode, 0);
+	m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
+
+	t1 = gen_reg_rtx (Pmode);
+
+	/* Compute LB.  */
+	emit_move_insn (t1, m1h1);
+	ix86_emit_move_max (t1, lb);
+	emit_move_insn (m1h1, t1);
+
+	/* Compute UB.  UB is stored in 1's complement form.  Therefore
+	   we also use max here.  */
+	emit_move_insn (t1, m1h2);
+	ix86_emit_move_max (t1, ub);
+	emit_move_insn (m1h2, t1);
+
+	op2 = gen_reg_rtx (BNDmode);
+	emit_move_insn (op2, m1);
+
+	return chkp_join_splitted_slot (lb, op2);
+      }
+
+    case IX86_BUILTIN_BNDINT:
+      {
+	rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
+
+	if (!target
+	    || GET_MODE (target) != BNDmode
+	    || !register_operand (target, BNDmode))
+	  target = gen_reg_rtx (BNDmode);
+
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	arg1 = CALL_EXPR_ARG (exp, 1);
+
+	op0 = expand_normal (arg0);
+	op1 = expand_normal (arg1);
+
+	res = assign_386_stack_local (BNDmode, SLOT_TEMP);
+	rh1 = adjust_address (res, Pmode, 0);
+	rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
+
+	/* Put first bounds to temporaries.  */
+	lb1 = gen_reg_rtx (Pmode);
+	ub1 = gen_reg_rtx (Pmode);
+	if (MEM_P (op0))
+	  {
+	    emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
+	    emit_move_insn (ub1, adjust_address (op0, Pmode,
+						 GET_MODE_SIZE (Pmode)));
+	  }
+	else
+	  {
+	    emit_move_insn (res, op0);
+	    emit_move_insn (lb1, rh1);
+	    emit_move_insn (ub1, rh2);
+	  }
+
+	/* Put second bounds to temporaries.  */
+	lb2 = gen_reg_rtx (Pmode);
+	ub2 = gen_reg_rtx (Pmode);
+	if (MEM_P (op1))
+	  {
+	    emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
+	    emit_move_insn (ub2, adjust_address (op1, Pmode,
+						 GET_MODE_SIZE (Pmode)));
+	  }
+	else
+	  {
+	    emit_move_insn (res, op1);
+	    emit_move_insn (lb2, rh1);
+	    emit_move_insn (ub2, rh2);
+	  }
+
+	/* Compute LB.  */
+	ix86_emit_move_max (lb1, lb2);
+	emit_move_insn (rh1, lb1);
+
+	/* Compute UB.  UB is stored in 1's complement form.  Therefore
+	   we also use max here.  */
+	ix86_emit_move_max (ub1, ub2);
+	emit_move_insn (rh2, ub1);
+
+	emit_move_insn (target, res);
+
+	return target;
+      }
+
+    case IX86_BUILTIN_SIZEOF:
+      {
+	tree name;
+	rtx symbol;
+
+	if (!target
+	    || GET_MODE (target) != Pmode
+	    || !register_operand (target, Pmode))
+	  target = gen_reg_rtx (Pmode);
+
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	gcc_assert (TREE_CODE (arg0) == VAR_DECL);
+
+	name = DECL_ASSEMBLER_NAME (arg0);
+	symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
+
+	emit_insn (Pmode == SImode
+		   ? gen_move_size_reloc_si (target, symbol)
+		   : gen_move_size_reloc_di (target, symbol));
+
+	return target;
+      }
+
+    case IX86_BUILTIN_BNDLOWER:
+      {
+	rtx mem, hmem;
+
+	if (!target
+	    || GET_MODE (target) != Pmode
+	    || !register_operand (target, Pmode))
+	  target = gen_reg_rtx (Pmode);
+
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	op0 = expand_normal (arg0);
+
+	/* We need to move bounds to memory first.  */
+	if (MEM_P (op0))
+	  mem = op0;
+	else
+	  {
+	    mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+	    emit_move_insn (mem, op0);
+	  }
+
+	/* Generate mem expression to access LB and load it.  */
+	hmem = adjust_address (mem, Pmode, 0);
+	emit_move_insn (target, hmem);
+
+	return target;
+      }
+
+    case IX86_BUILTIN_BNDUPPER:
+      {
+	rtx mem, hmem, res;
+
+	if (!target
+	    || GET_MODE (target) != Pmode
+	    || !register_operand (target, Pmode))
+	  target = gen_reg_rtx (Pmode);
+
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	op0 = expand_normal (arg0);
+
+	/* We need to move bounds to memory first.  */
+	if (MEM_P (op0))
+	  mem = op0;
+	else
+	  {
+	    mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
+	    emit_move_insn (mem, op0);
+	  }
+
+	/* Generate mem expression to access UB.  */
+	hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
+	/* We need to inverse all bits of UB.  */
+	res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
+	if (res != target)
+	  emit_move_insn (target, res);
+
+	return target;
+      }
+
     case IX86_BUILTIN_MASKMOVQ:
     case IX86_BUILTIN_MASKMOVDQU:
       icode = (fcode == IX86_BUILTIN_MASKMOVQ