diff mbox

[i386,MPX,2/X] Pointers Checker [24/25] MPX ABI support

Message ID 20131119204846.GV21297@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich Nov. 19, 2013, 8:48 p.m. UTC
Hi,

Here is a patch to support MPX ABI (http://software.intel.com/en-us/articles/linux-abi).

Thanks,
Ilya
--
2013-11-13  Ilya Enkovich  <ilya.enkovich@intel.com>

	* config/i386/i386.c (x86_64_reg_class): Add
	X86_64_BOUNDED_INTEGER_CLASS and
	X86_64_BOUNDED_INTEGERSI_CLASS.
	(ix86_return_pops_args): Pass function type to
	aggregate_value_p if function decl is not available.
	(init_cumulative_args): Initialize bound regs info
	and stdarg info.
	(merge_classes): Support X86_64_BOUNDED_INTEGER_CLASS
	and X86_64_BOUNDED_INTEGERSI_CLASS.
	(classify_argument): Use bounded classes for bounded
	types when Pointer Bounds Checker is on.
	(examine_argument): Support X86_64_BOUNDED_INTEGER_CLASS
	and X86_64_BOUNDED_INTEGERSI_CLASS.
	(construct_container): Likewise.
	(function_arg_advance_32): Support bound regs.
	(function_arg_advance_64): Likewise.
	(ix86_function_value_regno_p): Likewise.
	(function_value_64): Adjust construct_container call.
	(function_value_32): Add bound register for bounded values.
	(function_value_ms_64): Likewise.
	(return_in_memory_32): Adjust examine_argument call.
	(ix86_gimplify_va_arg): Adjust construct_container and
	examine_argument calls.
	(ix86_expand_call): Generate returned bounds.
	(ix86_bnd_prefixed_insn_p): Check if we have instrumented call
	or function.
	* config/i386/i386.h (ix86_args): Add bnd_nregs, bnd_regno
	and stdarg fields.
	* config/i386/i386.md (UNSPEC_BNDRET): New.
	(*call_value): Add returned bounds.
	(*sibcall_value): Likewise.
	(*call_value_rex64_ms_sysv): Likewise.
	(*call_value_pop): Likewise.
	(*sibcall_value_pop): Likewise.
	* config/i386/predicates.md (call_rex64_ms_sysv_operation): Adjust
	to changed call patterns.
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d401c4f..1077168 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2195,6 +2195,9 @@  tree x86_mfence;
    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
    use SF or DFmode move instead of DImode to avoid reformatting penalties.
 
+   X86_64_BOUNDED* classes are similar to integer classes but additionally
+   mean bounds should be passed for the argument.
+
    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
    whenever possible (upper half does contain padding).  */
 enum x86_64_reg_class
@@ -2209,7 +2212,9 @@  enum x86_64_reg_class
     X86_64_X87_CLASS,
     X86_64_X87UP_CLASS,
     X86_64_COMPLEX_X87_CLASS,
-    X86_64_MEMORY_CLASS
+    X86_64_MEMORY_CLASS,
+    X86_64_BOUNDED_INTEGER_CLASS,
+    X86_64_BOUNDED_INTEGERSI_CLASS
   };
 
 #define MAX_CLASSES 4
@@ -3523,6 +3528,9 @@  ix86_option_override_internal (bool main_args_p,
 	break;
       }
 
+  if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
+    error ("Intel MPX does not support x32");
+
   if (!strcmp (opts->x_ix86_arch_string, "generic"))
     error ("generic CPU can be used only for %stune=%s %s",
 	   prefix, suffix, sw);
@@ -5551,7 +5559,7 @@  ix86_return_pops_args (tree fundecl, tree funtype, int size)
     return size;
 
   /* Lose any fake structure return argument if it is passed on the stack.  */
-  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
+  if (aggregate_value_p (TREE_TYPE (funtype), fundecl ? fundecl : funtype)
       && !ix86_keep_aggregate_return_pointer (funtype))
     {
       int nregs = ix86_function_regparm (funtype, fundecl);
@@ -5888,6 +5896,9 @@  init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
       cum->nregs = (cum->call_abi == SYSV_ABI
                    ? X86_64_REGPARM_MAX
                    : X86_64_MS_REGPARM_MAX);
+
+      /* All bound registers are available for argument passing.  */
+      cum->bnd_nregs = LAST_BND_REG - FIRST_BND_REG + 1;
     }
   if (TARGET_SSE)
     {
@@ -5913,6 +5924,7 @@  init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
      FIXME: once typesytem is fixed, we won't need this code anymore.  */
   if (i && i->local && i->can_change_signature)
     fntype = TREE_TYPE (fndecl);
+  cum->stdarg = fntype ? stdarg_p (fntype) : false;
   cum->maybe_vaarg = (fntype
 		      ? (!prototype_p (fntype) || stdarg_p (fntype))
 		      : !libname);
@@ -5929,6 +5941,7 @@  init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 	  cum->warn_avx = 0;
 	  cum->warn_sse = 0;
 	  cum->warn_mmx = 0;
+	  cum->bnd_nregs = 0;
 	  return;
 	}
 
@@ -5949,12 +5962,15 @@  init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 	    }
 	  else
 	    cum->nregs = ix86_function_regparm (fntype, fndecl);
+	  cum->bnd_nregs = cum->nregs;
 	}
 
       /* Set up the number of SSE registers used for passing SFmode
 	 and DFmode arguments.  Warn for mismatching ABI.  */
       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
     }
+
+  cum->bnd_regno = FIRST_BND_REG;
 }
 
 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
@@ -6083,9 +6099,16 @@  merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
     return X86_64_MEMORY_CLASS;
 
   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  /* Rule #4.1 If one of the classes is BOUNDED, the result is BOUNDED.  */
   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
     return X86_64_INTEGERSI_CLASS;
+  if ((class1 == X86_64_BOUNDED_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_BOUNDED_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_BOUNDED_INTEGERSI_CLASS;
+  if (class1 == X86_64_BOUNDED_INTEGER_CLASS || class1 == X86_64_BOUNDED_INTEGERSI_CLASS
+      || class2 == X86_64_BOUNDED_INTEGER_CLASS || class2 == X86_64_BOUNDED_INTEGERSI_CLASS)
+    return X86_64_BOUNDED_INTEGER_CLASS;
   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
     return X86_64_INTEGER_CLASS;
@@ -6118,7 +6141,8 @@  merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
 
 static int
 classify_argument (enum machine_mode mode, const_tree type,
-		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
+		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
+		   bool stdarg)
 {
   HOST_WIDE_INT bytes =
     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
@@ -6211,7 +6235,7 @@  classify_argument (enum machine_mode mode, const_tree type,
 		      num = classify_argument (TYPE_MODE (type), type,
 					       subclasses,
 					       (int_bit_position (field)
-						+ bit_offset) % 256);
+						+ bit_offset) % 256, stdarg);
 		      if (!num)
 			return 0;
 		      pos = (int_bit_position (field)
@@ -6229,7 +6253,7 @@  classify_argument (enum machine_mode mode, const_tree type,
 	  {
 	    int num;
 	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
-				     TREE_TYPE (type), subclasses, bit_offset);
+				     TREE_TYPE (type), subclasses, bit_offset, stdarg);
 	    if (!num)
 	      return 0;
 
@@ -6260,7 +6284,7 @@  classify_argument (enum machine_mode mode, const_tree type,
 
 		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
 					   TREE_TYPE (field), subclasses,
-					   bit_offset);
+					   bit_offset, stdarg);
 		  if (!num)
 		    return 0;
 		  for (i = 0; i < num; i++)
@@ -6373,12 +6397,20 @@  classify_argument (enum machine_mode mode, const_tree type,
 
 	if (size <= 32)
 	  {
-	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    /* Pass bounds for pointers and unnamed integers.  */
+	    classes[0] = chkp_function_instrumented_p (current_function_decl)
+	      && ((type && BOUNDED_TYPE_P (type)) || stdarg)
+	      ? X86_64_BOUNDED_INTEGERSI_CLASS
+	      : X86_64_INTEGERSI_CLASS;
 	    return 1;
 	  }
 	else if (size <= 64)
 	  {
-	    classes[0] = X86_64_INTEGER_CLASS;
+	    /* Pass bounds for pointers and unnamed integers.  */
+	    classes[0] = chkp_function_instrumented_p (current_function_decl)
+	      && ((type && BOUNDED_TYPE_P (type)) || stdarg)
+	      ? X86_64_BOUNDED_INTEGER_CLASS
+	      : X86_64_INTEGER_CLASS;
 	    return 1;
 	  }
 	else if (size <= 64+32)
@@ -6478,6 +6510,8 @@  classify_argument (enum machine_mode mode, const_tree type,
     case V8QImode:
       classes[0] = X86_64_SSE_CLASS;
       return 1;
+    case BND32mode:
+    case BND64mode:
     case BLKmode:
     case VOIDmode:
       return 0;
@@ -6502,13 +6536,14 @@  classify_argument (enum machine_mode mode, const_tree type,
    class.  Return 0 iff parameter should be passed in memory.  */
 static int
 examine_argument (enum machine_mode mode, const_tree type, int in_return,
-		  int *int_nregs, int *sse_nregs)
+		  int *int_nregs, int *sse_nregs, int *bnd_nregs, bool stdarg)
 {
   enum x86_64_reg_class regclass[MAX_CLASSES];
-  int n = classify_argument (mode, type, regclass, 0);
+  int n = classify_argument (mode, type, regclass, 0, stdarg);
 
   *int_nregs = 0;
   *sse_nregs = 0;
+  *bnd_nregs = 0;
   if (!n)
     return 0;
   for (n--; n >= 0; n--)
@@ -6518,6 +6553,11 @@  examine_argument (enum machine_mode mode, const_tree type, int in_return,
       case X86_64_INTEGERSI_CLASS:
 	(*int_nregs)++;
 	break;
+      case X86_64_BOUNDED_INTEGER_CLASS:
+      case X86_64_BOUNDED_INTEGERSI_CLASS:
+	(*int_nregs)++;
+	(*bnd_nregs)++;
+	break;
       case X86_64_SSE_CLASS:
       case X86_64_SSESF_CLASS:
       case X86_64_SSEDF_CLASS:
@@ -6545,7 +6585,8 @@  examine_argument (enum machine_mode mode, const_tree type, int in_return,
 static rtx
 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
 		     const_tree type, int in_return, int nintregs, int nsseregs,
-		     const int *intreg, int sse_regno)
+		     const int *intreg, int sse_regno,
+		     int bnd_regno, bool stdarg)
 {
   /* The following variables hold the static issued_error state.  */
   static bool issued_sse_arg_error;
@@ -6559,15 +6600,15 @@  construct_container (enum machine_mode mode, enum machine_mode orig_mode,
   int n;
   int i;
   int nexps = 0;
-  int needed_sseregs, needed_intregs;
+  int needed_sseregs, needed_intregs, needed_bndregs;
   rtx exp[MAX_CLASSES];
   rtx ret;
 
-  n = classify_argument (mode, type, regclass, 0);
+  n = classify_argument (mode, type, regclass, 0, stdarg);
   if (!n)
     return NULL;
   if (!examine_argument (mode, type, in_return, &needed_intregs,
-			 &needed_sseregs))
+			 &needed_sseregs, &needed_bndregs, stdarg))
     return NULL;
   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
     return NULL;
@@ -6613,6 +6654,14 @@  construct_container (enum machine_mode mode, enum machine_mode orig_mode,
   if (n == 1 && mode != SCmode)
     switch (regclass[0])
       {
+      case X86_64_BOUNDED_INTEGER_CLASS:
+      case X86_64_BOUNDED_INTEGERSI_CLASS:
+	ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (2));
+	XVECEXP (ret, 0, 0) = bnd_regno <= LAST_BND_REG
+	  ? gen_rtx_REG (BNDmode, bnd_regno)
+	  : GEN_INT (bnd_regno - LAST_BND_REG);
+	XVECEXP (ret, 0, 1) = gen_rtx_REG (mode, intreg[0]);
+	return ret;
       case X86_64_INTEGER_CLASS:
       case X86_64_INTEGERSI_CLASS:
 	return gen_rtx_REG (mode, intreg[0]);
@@ -6667,6 +6716,17 @@  construct_container (enum machine_mode mode, enum machine_mode orig_mode,
         {
 	  case X86_64_NO_CLASS:
 	    break;
+	  case X86_64_BOUNDED_INTEGER_CLASS:
+	  case X86_64_BOUNDED_INTEGERSI_CLASS:
+	    exp [nexps++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   bnd_regno <= LAST_BND_REG
+				   ? gen_rtx_REG (BNDmode, bnd_regno)
+				   : GEN_INT (bnd_regno - LAST_BND_REG),
+				   GEN_INT (i*8));
+	    bnd_regno++;
+	    /* FALLTHRU */
+
 	  case X86_64_INTEGER_CLASS:
 	  case X86_64_INTEGERSI_CLASS:
 	    /* Merge TImodes on aligned occasions here too.  */
@@ -6778,6 +6838,14 @@  function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
       cum->words += words;
       cum->nregs -= words;
       cum->regno += words;
+      if (chkp_function_instrumented_p (current_function_decl)
+	  && type
+	  && chkp_type_has_pointer (type))
+	{
+	  unsigned count = chkp_type_bounds_count (type);
+	  cum->bnd_nregs -= count;
+	  cum->bnd_regno += count;
+	}
 
       if (cum->nregs <= 0)
 	{
@@ -6849,19 +6917,23 @@  static void
 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 			 const_tree type, HOST_WIDE_INT words, bool named)
 {
-  int int_nregs, sse_nregs;
+  int int_nregs, sse_nregs, bnd_nregs, exam;
 
   /* Unnamed 256bit vector mode parameters are passed on stack.  */
   if (!named && VALID_AVX256_REG_MODE (mode))
     return;
 
-  if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
+  exam = examine_argument (mode, type, 0, &int_nregs, &sse_nregs, &bnd_nregs, cum->stdarg);
+
+  if (exam
       && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
     {
       cum->nregs -= int_nregs;
       cum->sse_nregs -= sse_nregs;
+      cum->bnd_nregs -= bnd_nregs;
       cum->regno += int_nregs;
       cum->sse_regno += sse_nregs;
+      cum->bnd_regno += bnd_nregs;
     }
   else
     {
@@ -6969,7 +7041,57 @@  function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
 	      if (regno == AX_REG)
 		regno = CX_REG;
 	    }
-	  return gen_rtx_REG (mode, regno);
+
+	  /* Add bounds slot for each passed pointer.  */
+	  if (chkp_function_instrumented_p (current_function_decl)
+	      && type
+	      && BOUNDED_TYPE_P (type))
+	    {
+	      rtx bnd = gen_rtx_REG (BNDmode, cum->bnd_regno);
+	      rtx val = gen_rtx_REG (mode, regno);
+	      rtx ret = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+	      XVECEXP (ret, 0, 0) = bnd;
+	      XVECEXP (ret, 0, 1) = val;
+	      return ret;
+	    }
+	  else if (chkp_function_instrumented_p (current_function_decl)
+	      && type
+	      && chkp_type_has_pointer (type))
+	    {
+	      unsigned int i;
+	      vec<bool> has_bounds = chkp_find_bound_slots (type);
+	      unsigned int bnd_num = 0;
+	      unsigned int bnd_no = 1;
+	      unsigned int bnd_regno = cum->bnd_regno;
+	      rtx ret;
+
+	      /* Compute number of passed bounds.  */
+	      for (i = 0; i < has_bounds.length (); i++)
+		if (has_bounds[i])
+		  bnd_num++;
+
+	      /* We return PARALLEL holding value reg and all bounds
+		 slots.  */
+	      ret = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (bnd_num + 1));
+	      /* Add value to the resulting PARALLEL.  */
+	      XVECEXP (ret, 0, 0) = gen_rtx_REG (mode, regno);
+
+	      /* Add bounds to the resulting PARALLEL.  */
+	      for (i = 0; i < has_bounds.length (); i++)
+		if (has_bounds[i])
+		  {
+		    rtx bnd = bnd_regno <= LAST_BND_REG
+		      ? gen_rtx_REG (BNDmode, bnd_regno)
+		      : GEN_INT (bnd_regno - LAST_BND_REG);
+		    bnd = gen_rtx_EXPR_LIST (VOIDmode, bnd, GEN_INT (i*8));
+		    XVECEXP (ret, 0, bnd_no++) = bnd;
+		    bnd_regno++;
+		  }
+	      has_bounds.release ();
+	      return ret;
+	    }
+	  else
+	    return gen_rtx_REG (mode, regno);
 	}
       break;
 
@@ -7077,7 +7199,7 @@  function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
   return construct_container (mode, orig_mode, type, 0, cum->nregs,
 			      cum->sse_nregs,
 			      &x86_64_int_parameter_registers [cum->regno],
-			      cum->sse_regno);
+			      cum->sse_regno, cum->bnd_regno, cum->stdarg);
 }
 
 static rtx
@@ -7419,6 +7541,9 @@  ix86_function_value_regno_p (const unsigned int regno)
     case SI_REG:
       return TARGET_64BIT && ix86_abi != MS_ABI;
 
+    case FIRST_BND_REG:
+      return chkp_function_instrumented_p (current_function_decl);
+
       /* Complex values are returned in %st(0)/%st(1) pair.  */
     case ST0_REG:
     case ST1_REG:
@@ -7453,6 +7578,7 @@  function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
 		   const_tree fntype, const_tree fn)
 {
   unsigned int regno;
+  rtx res;
 
   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
      we normally prevent this case when mmx is not available.  However
@@ -7491,7 +7617,18 @@  function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
   /* OImode shouldn't be used directly.  */
   gcc_assert (mode != OImode);
 
-  return gen_rtx_REG (orig_mode, regno);
+  res = gen_rtx_REG (orig_mode, regno);
+
+  /* Add bound register if bounds are returned in addition to
+     function value.  */
+  if (chkp_function_instrumented_p (current_function_decl)
+      && (!fntype || BOUNDED_P (fntype)) && regno == AX_REG)
+    {
+      rtx b0 = gen_rtx_REG (BNDmode, FIRST_BND_REG);
+      res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, res, b0));
+    }
+
+  return res;
 }
 
 static rtx
@@ -7537,7 +7674,7 @@  function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
 
   ret = construct_container (mode, orig_mode, valtype, 1,
 			     X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
-			     x86_64_int_return_registers, 0);
+			     x86_64_int_return_registers, 0, FIRST_BND_REG, false);
 
   /* For zero sized structures, construct_container returns NULL, but we
      need to keep rest of compiler happy by returning meaningful value.  */
@@ -7552,8 +7689,11 @@  function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
 		      const_tree valtype)
 {
   unsigned int regno = AX_REG;
+  rtx res;
 
-  if (TARGET_SSE)
+  if (mode == BND64mode)
+    regno = FIRST_BND_REG;
+  else if (TARGET_SSE)
     {
       switch (GET_MODE_SIZE (mode))
 	{
@@ -7577,7 +7717,19 @@  function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
 	  break;
         }
     }
-  return gen_rtx_REG (orig_mode, regno);
+
+  res = gen_rtx_REG (orig_mode, regno);
+
+  /* Add bound register if bounds are returned in addition to
+     function value.  */
+  if (chkp_function_instrumented_p (current_function_decl)
+      && BOUNDED_TYPE_P (valtype))
+    {
+      rtx b0 = gen_rtx_REG (BNDmode, FIRST_BND_REG);
+      res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, res, b0));
+    }
+
+  return res;
 }
 
 static rtx
@@ -7695,8 +7847,9 @@  return_in_memory_32 (const_tree type, enum machine_mode mode)
 static bool ATTRIBUTE_UNUSED
 return_in_memory_64 (const_tree type, enum machine_mode mode)
 {
-  int needed_intregs, needed_sseregs;
-  return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
+  int needed_intregs, needed_sseregs, needed_bndregs;
+  return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs,
+			    &needed_bndregs, true);
 }
 
 static bool ATTRIBUTE_UNUSED
@@ -8177,7 +8330,7 @@  ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
   int size, rsize;
   tree lab_false, lab_over = NULL_TREE;
   tree addr, t2;
-  rtx container;
+  rtx container, bndcontainer = NULL;
   int indirect_p = 0;
   tree ptrtype;
   enum machine_mode nat_mode;
@@ -8225,7 +8378,8 @@  ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
       container = construct_container (nat_mode, TYPE_MODE (type),
 				       type, 0, X86_64_REGPARM_MAX,
 				       X86_64_SSE_REGPARM_MAX, intreg,
-				       0);
+				       0, 0, false);
+      chkp_split_slot (container, &container, &bndcontainer);
       break;
     }
 
@@ -8235,14 +8389,15 @@  ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
 
   if (container)
     {
-      int needed_intregs, needed_sseregs;
+      int needed_intregs, needed_sseregs, needed_bndregs;
       bool need_temp;
       tree int_addr, sse_addr;
 
       lab_false = create_artificial_label (UNKNOWN_LOCATION);
       lab_over = create_artificial_label (UNKNOWN_LOCATION);
 
-      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
+      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs,
+			&needed_bndregs, true);
 
       need_temp = (!REG_P (container)
 		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
@@ -14945,7 +15100,7 @@  ix86_print_operand (FILE *file, rtx x, int code)
 	  return;
 
 	case '!':
-	  if (ix86_bnd_prefixed_insn_p (NULL_RTX))
+	  if (ix86_bnd_prefixed_insn_p (current_output_insn))
 	    fputs ("bnd ", file);
 	  return;
 
@@ -23427,7 +23582,6 @@  decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 	  || !alg_usable_p (algs->unknown_size, memset)))
     {
       enum stringop_alg alg;
-
       /* If there aren't any usable algorithms, then recursing on
          smaller sizes isn't going to find anything.  Just return the
          simple byte-at-a-time copy loop.  */
@@ -24403,10 +24557,32 @@  ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
     }
 
   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
+
   if (retval)
-    call = gen_rtx_SET (VOIDmode, retval, call);
+    {
+      /* For instrumented code we may have GPR + BR in parallel but
+	 it will confuse DF and we need to put each reg
+	 under EXPR_LIST.  */
+      if (chkp_function_instrumented_p (current_function_decl))
+	chkp_put_regs_to_expr_list (retval);
+
+      call = gen_rtx_SET (VOIDmode, retval, call);
+    }
   vec[vec_len++] = call;
 
+  /* b0 and b1 registers hold bounds for returned value.  */
+  if (retval)
+    {
+      rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
+      rtx unspec0 = gen_rtx_UNSPEC (BND64mode,
+				    gen_rtvec (1, b0), UNSPEC_BNDRET);
+      rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
+      rtx unspec1 = gen_rtx_UNSPEC (BND64mode,
+				    gen_rtvec (1, b1), UNSPEC_BNDRET);
+      vec[vec_len++] = gen_rtx_SET (BND64mode, b0, unspec0);
+      vec[vec_len++] = gen_rtx_SET (BND64mode, b1, unspec1);
+    }
+
   if (pop)
     {
       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
@@ -42988,9 +43164,18 @@  ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
    bnd by default for current function.  */
 
 bool
-ix86_bnd_prefixed_insn_p (rtx insn ATTRIBUTE_UNUSED)
+ix86_bnd_prefixed_insn_p (rtx insn)
 {
-  return false;
+  /* For call insns check special flag.  */
+  if (insn && CALL_P (insn))
+    {
+      rtx call = get_call_rtx_from (insn);
+      if (call)
+	return CALL_EXPR_WITH_BOUNDS_P (call);
+    }
+
+  /* All other insns are prefixed only if function is instrumented.  */
+  return chkp_function_instrumented_p (current_function_decl);
 }
 
 /* Calculate integer abs() using only SSE2 instructions.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 123e3fa..1369c4d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1664,6 +1664,9 @@  typedef struct ix86_args {
   int float_in_sse;		/* Set to 1 or 2 for 32bit targets if
 				   SFmode/DFmode arguments should be passed
 				   in SSE registers.  Otherwise 0.  */
+  int bnd_nregs;                /* # bnd registers available for passing */
+  int bnd_regno;                /* next available bnd register number */
+  int stdarg;                   /* Set to 1 if function is stdarg.  */
   enum calling_abi call_abi;	/* Set to SYSV_ABI for sysv abi. Otherwise
  				   MS_ABI for ms abi.  */
 } CUMULATIVE_ARGS;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index dc7d81a..427ae0c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -190,6 +190,7 @@ 
   UNSPEC_BNDCU
   UNSPEC_BNDCN
   UNSPEC_MPX_FENCE
+  UNSPEC_BNDRET
 ])
 
 (define_c_enum "unspecv" [
@@ -11565,7 +11566,9 @@ 
 (define_insn "*call_value"
   [(set (match_operand 0)
 	(call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>zw"))
-	      (match_operand 2)))]
+	      (match_operand 2)))
+   (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET))
+   (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))]
   "!SIBLING_CALL_P (insn)"
   "* return ix86_output_call_insn (insn, operands[1]);"
   [(set_attr "type" "callv")])
@@ -11573,7 +11576,9 @@ 
 (define_insn "*sibcall_value"
   [(set (match_operand 0)
 	(call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "Uz"))
-	      (match_operand 2)))]
+	      (match_operand 2)))
+   (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET))
+   (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))]
   "SIBLING_CALL_P (insn)"
   "* return ix86_output_call_insn (insn, operands[1]);"
   [(set_attr "type" "callv")])
@@ -11583,6 +11588,8 @@ 
     [(set (match_operand 0)
 	  (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzw"))
 		(match_operand 2)))
+     (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET))
+     (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))
      (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])]
  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
   "* return ix86_output_call_insn (insn, operands[1]);"
@@ -11606,6 +11613,8 @@ 
   [(set (match_operand 0)
 	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm"))
 	      (match_operand 2)))
+   (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET))
+   (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))
    (set (reg:SI SP_REG)
 	(plus:SI (reg:SI SP_REG)
 		 (match_operand:SI 3 "immediate_operand" "i")))]
@@ -11617,6 +11626,8 @@ 
   [(set (match_operand 0)
 	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "Uz"))
 	      (match_operand 2)))
+   (set (reg:BND64 BND0_REG) (unspec [(reg:BND64 BND0_REG)] UNSPEC_BNDRET))
+   (set (reg:BND64 BND1_REG) (unspec [(reg:BND64 BND1_REG)] UNSPEC_BNDRET))
    (set (reg:SI SP_REG)
 	(plus:SI (reg:SI SP_REG)
 		 (match_operand:SI 3 "immediate_operand" "i")))]
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 5c758ab..7dd8f99 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -597,14 +597,17 @@ 
   (match_code "parallel")
 {
   unsigned creg_size = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
+  unsigned adop = GET_CODE (XVECEXP (op, 0, 0)) == SET
+                  ? 4
+		  : 2;
   unsigned i;
 
-  if ((unsigned) XVECLEN (op, 0) != creg_size + 2)
+  if ((unsigned) XVECLEN (op, 0) != creg_size + adop)
     return false;
 
   for (i = 0; i < creg_size; i++)
     {
-      rtx elt = XVECEXP (op, 0, i+2);
+      rtx elt = XVECEXP (op, 0, i+adop);
       enum machine_mode mode;
       unsigned regno;