Patchwork [RFC] Implement Undefined Behavior Sanitizer (take 2)

login
register
mail settings
Submitter Marek Polacek
Date June 8, 2013, 4:43 p.m.
Message ID <20130608164348.GF4160@redhat.com>
Download mbox | patch
Permalink /patch/249965/
State New
Headers show

Comments

Marek Polacek - June 8, 2013, 4:43 p.m.
Thanks for the reviews, here is another version.  I haven't touched
the division by zero instrumentation, but the shift instrumentation is
revamped; what it should instrument now is, as Jakub wrote:
1) if ((unsigned) y > precm1) ub
plus for signed x << y:
2) for C99/C11 if ((unsigned) x >> (precm1 - y)) ub
3) for C++11/C++14 if (x < 0 || ((unsigned) x >> (precm1 - y)) > 1) ub
Is there anything left to do for shifts?

The c-ubsan.c now resides in c-family/.
I also fixed the ICE with long/long long type (the resulting type of
a RSHIFT_EXPR should really be that of the left operand).
But I have no clue how it behaves with e.g. constexpr specifier.

When/if this is ok enough, I think we could put this on a branch,
together with Jakub's patches from
http://gcc.gnu.org/ml/gcc-patches/2013-06/msg00291.html

The next part will be to construct arguments for the ubsan library;
hopefully I can (mis)use parts of asan.c code.  Then the command-line
parsing (parse options into the flag_sanitize bitmask).  And after this
is done, I'd say we should add a testsuite, ideally something in
c-c++-common/ and something in g++.dg/ (templates and other stuff).

Regtested/bootstrapped on x86_64-linux.

2013-06-07  Marek Polacek  <polacek@redhat.com>

	* Makefile.in: Add ubsan.c.
	* common.opt: Add -fsanitize=undefined option.
	* doc/invoke.texi: Document the new flag.
	* sanitizer.def (DEF_SANITIZER_BUILTIN): Define.
	* builtin-attrs.def (ATTR_COLD): Define.
	* asan.c (initialize_sanitizer_builtins): Build
	BT_FN_VOID_PTR_PTR_PTR.
	* builtins.def (BUILT_IN_UBSAN_HANDLE_DIVREM_OVERFLOW,
	BUILT_IN_UBSAN_HANDLE_SHIFT_OUT_OF_BOUNDS): Define.

c-family/
	* c-ubsan.c: New file.
	* c-ubsan.h: New file.

cp/
	* typeck.c (cp_build_binary_op): Add division by zero and shift
	instrumentation.

c/
	* c-typeck.c (build_binary_op): Add division by zero and shift
	instrumentation.

 
	Marek
Marc Glisse - June 8, 2013, 5:48 p.m.
Hello,

thanks for working on this. Just a few questions inline:

On Sat, 8 Jun 2013, Marek Polacek wrote:

> +/* Instrument division by zero and INT_MIN / -1.  */
> +
> +tree
> +ubsan_instrument_division (location_t loc, enum tree_code code,
> +			   tree op0, tree op1)
> +{
> +  tree t, tt;
> +  tree orig = build2 (code, TREE_TYPE (op0), op0, op1);
> +
> +  if (TREE_CODE (TREE_TYPE (op0)) != INTEGER_TYPE
> +      || TREE_CODE (TREE_TYPE (op1)) != INTEGER_TYPE)
> +    return orig;

Type promotion means that INTEGRAL_TYPE_P wouldn't catch anything this 
doesn't?

> +  /* If we *know* that the divisor is not -1 or 0, we don't have to
> +     instrument this expression.
> +     ??? We could use decl_constant_value to cover up more cases.  */
> +  if (TREE_CODE (op1) == INTEGER_CST
> +      && integer_nonzerop (op1)
> +      && !integer_minus_onep (op1))
> +    return orig;

This is just to speed up compilation a bit, I assume, since fold would 
remove the instrumentation anyway.

> +  tt = fold_build2 (EQ_EXPR, boolean_type_node, op1,
> +		    integer_minus_one_node);

Don't we usually try to have both operands of a comparison of the same 
type?

> +  t = fold_build2 (EQ_EXPR, boolean_type_node, op0,
> +		   TYPE_MIN_VALUE (TREE_TYPE (op0)));

I didn't see where this test was restricted to the signed case (0u/-1 
is well defined)?

> +  t = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, t, tt);
> +  tt = build2 (EQ_EXPR, boolean_type_node,
> +	       op1, integer_zero_node);

Why not fold this one?

> +  t = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, tt, t);
> +  tt = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_DIVREM_OVERFLOW);
> +  tt = build_call_expr_loc (loc, tt, 0);
> +  t = fold_build3 (COND_EXPR, void_type_node, t, tt, void_zero_node);
> +  t = fold_build2 (COMPOUND_EXPR, TREE_TYPE (orig), t, orig);
> +
> +  return t;
> +}
> +
> +/* Instrument left and right shifts.  */
> +
> +tree
> +ubsan_instrument_shift (location_t loc, enum tree_code code,
> +			tree op0, tree op1)
> +{
> +  tree t, tt = NULL_TREE;
> +  tree orig = build2 (code, TREE_TYPE (op0), op0, op1);
> +  tree uprecm1 = build_int_cst (unsigned_type_for (TREE_TYPE (op1)),
> +			       TYPE_PRECISION (TREE_TYPE (op0)) - 1);
> +  tree precm1 = build_int_cst (TREE_TYPE (op1),
> +			       TYPE_PRECISION (TREE_TYPE (op0)) - 1);

(if we later want to extend this to vector-scalar shifts, 
element_precision will be better than TYPE_PRECISION)

Name unsigned_type_for (TREE_TYPE (op1)) and TYPE_PRECISION (TREE_TYPE 
(op0)) that are used several times?

> +  t = fold_convert_loc (loc, unsigned_type_for (TREE_TYPE (op1)), op1);
> +  t = fold_build2 (GT_EXPR, boolean_type_node, t, uprecm1);
[...]
> --- gcc/cp/typeck.c.mp	2013-06-07 23:01:16.779536165 +0200
> +++ gcc/cp/typeck.c	2013-06-07 23:04:47.625161351 +0200
> @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3.
> #include "convert.h"
> #include "c-family/c-common.h"
> #include "c-family/c-objc.h"
> +#include "c-family/c-ubsan.h"
> #include "params.h"
>
> static tree pfn_from_ptrmemfunc (tree);
> @@ -3891,6 +3892,12 @@ cp_build_binary_op (location_t location,
>   op0 = orig_op0;
>   op1 = orig_op1;
>
> +  /* Remember whether we're doing / or %.  */
> +  bool doing_div_or_mod = false;
> +
> +  /* Remember whether we're doing << or >>.  */
> +  bool doing_shift = false;
> +
>   if (code == TRUTH_AND_EXPR || code == TRUTH_ANDIF_EXPR
>       || code == TRUTH_OR_EXPR || code == TRUTH_ORIF_EXPR
>       || code == TRUTH_XOR_EXPR)
> @@ -4070,8 +4077,15 @@ cp_build_binary_op (location_t location,
> 	{
> 	  enum tree_code tcode0 = code0, tcode1 = code1;
> 	  tree cop1 = fold_non_dependent_expr_sfinae (op1, tf_none);
> +	  cop1 = maybe_constant_value (cop1);
>
> -	  warn_for_div_by_zero (location, maybe_constant_value (cop1));
> +	  if (!processing_template_decl && tcode0 == INTEGER_TYPE
> +	      && (TREE_CODE (cop1) != INTEGER_CST
> +		  || integer_zerop (cop1)
> +		  || integer_minus_onep (cop1)))
> +	    doing_div_or_mod = true;

Aren't you already doing this test in ubsan_instrument_division?
Jakub Jelinek - June 8, 2013, 6:22 p.m.
On Sat, Jun 08, 2013 at 07:48:27PM +0200, Marc Glisse wrote:
> >+/* Instrument division by zero and INT_MIN / -1.  */
> >+
> >+tree
> >+ubsan_instrument_division (location_t loc, enum tree_code code,
> >+			   tree op0, tree op1)
> >+{
> >+  tree t, tt;
> >+  tree orig = build2 (code, TREE_TYPE (op0), op0, op1);
> >+
> >+  if (TREE_CODE (TREE_TYPE (op0)) != INTEGER_TYPE
> >+      || TREE_CODE (TREE_TYPE (op1)) != INTEGER_TYPE)
> >+    return orig;
> 
> Type promotion means that INTEGRAL_TYPE_P wouldn't catch anything
> this doesn't?

This is after type promotion, and e.g. cp_build_binary_op already checks
for == INTEGER_TYPE, so I think enums/bools won't show up here.

> >+  /* If we *know* that the divisor is not -1 or 0, we don't have to
> >+     instrument this expression.
> >+     ??? We could use decl_constant_value to cover up more cases.  */
> >+  if (TREE_CODE (op1) == INTEGER_CST
> >+      && integer_nonzerop (op1)
> >+      && !integer_minus_onep (op1))
> >+    return orig;
> 
> This is just to speed up compilation a bit, I assume, since fold
> would remove the instrumentation anyway.

Yeah.
> 
> >+  tt = fold_build2 (EQ_EXPR, boolean_type_node, op1,
> >+		    integer_minus_one_node);
> 
> Don't we usually try to have both operands of a comparison of the
> same type?

Not just usually, it really has to be build_int_cst (TREE_TYPE (op1), -1).
And, more importantly, at least in cp_build_binary_op the calls need to be
moved further down in the function, at least after if (processing_template_decl)
but e.g. for division the trouble is that shorten_binary_op is performed
before actually promoting one or both operand to the result_type.  I guess
for the diagnostics which prints the types, it would be best to diagnose
using the promoted types and result_type constructed out of that, but
without shorten_binary_op etc., that is just an optimization I think.
So, maybe record the original result_type before shortening, and if
shortening changed that, convert the arguments for the instrumentation only
to the original result_type, otherwise use the conversion done normally.
For shifts this isn't a big deal, because they always use result_type of the
first operand after promotion, and the ubsan handler wants to see two types
there (the question is, does it want for the shift amount look for the
original shift count type, or the one converted to int)?

Also, perhaps it would be better if these ubsan_instrument* functions
didn't return a COMPOUND_EXPR, but instead just the lhs of that (i.e. the
actual instrumentation) and let the caller set some var to that and if that
var is non-NULL, after building the binary operation build a COMPOUND_EXPR
with lhs being the instrumentation and rhs the binary operation itself.

> 
> >+  t = fold_build2 (EQ_EXPR, boolean_type_node, op0,
> >+		   TYPE_MIN_VALUE (TREE_TYPE (op0)));
> 
> I didn't see where this test was restricted to the signed case
> (0u/-1 is well defined)?
> 
> >+  t = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, t, tt);
> >+  tt = build2 (EQ_EXPR, boolean_type_node,
> >+	       op1, integer_zero_node);
> 
> Why not fold this one?

Sure.  And yeah, the INT_MIN/-1 checking needs to be done for signed types
only.

> >+tree
> >+ubsan_instrument_shift (location_t loc, enum tree_code code,
> >+			tree op0, tree op1)
> >+{
> >+  tree t, tt = NULL_TREE;
> >+  tree orig = build2 (code, TREE_TYPE (op0), op0, op1);
> >+  tree uprecm1 = build_int_cst (unsigned_type_for (TREE_TYPE (op1)),
> >+			       TYPE_PRECISION (TREE_TYPE (op0)) - 1);
> >+  tree precm1 = build_int_cst (TREE_TYPE (op1),
> >+			       TYPE_PRECISION (TREE_TYPE (op0)) - 1);
> 
> (if we later want to extend this to vector-scalar shifts,
> element_precision will be better than TYPE_PRECISION)
> 
> Name unsigned_type_for (TREE_TYPE (op1)) and TYPE_PRECISION
> (TREE_TYPE (op0)) that are used several times?

Yes.  Note that for vector-scalar shifts (and even vector-vector) we'd need
library support to handle that.

	Jakub
Marek Polacek - June 10, 2013, 9:24 a.m.
On Sat, Jun 08, 2013 at 07:48:27PM +0200, Marc Glisse wrote:
> >+  tt = fold_build2 (EQ_EXPR, boolean_type_node, op1,
> >+		    integer_minus_one_node);
> 
> Don't we usually try to have both operands of a comparison of the
> same type?

Will fix.

> >+  t = fold_build2 (EQ_EXPR, boolean_type_node, op0,
> >+		   TYPE_MIN_VALUE (TREE_TYPE (op0)));
> 
> I didn't see where this test was restricted to the signed case
> (0u/-1 is well defined)?

Will fix.

> >+  t = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, t, tt);
> >+  tt = build2 (EQ_EXPR, boolean_type_node,
> >+	       op1, integer_zero_node);
> 
> Why not fold this one?

Sure, will do.

> Name unsigned_type_for (TREE_TYPE (op1)) and TYPE_PRECISION
> (TREE_TYPE (op0)) that are used several times?

Yeah.

> >@@ -4070,8 +4077,15 @@ cp_build_binary_op (location_t location,
> >	{
> >	  enum tree_code tcode0 = code0, tcode1 = code1;
> >	  tree cop1 = fold_non_dependent_expr_sfinae (op1, tf_none);
> >+	  cop1 = maybe_constant_value (cop1);
> >
> >-	  warn_for_div_by_zero (location, maybe_constant_value (cop1));
> >+	  if (!processing_template_decl && tcode0 == INTEGER_TYPE
> >+	      && (TREE_CODE (cop1) != INTEGER_CST
> >+		  || integer_zerop (cop1)
> >+		  || integer_minus_onep (cop1)))
> >+	    doing_div_or_mod = true;
> 
> Aren't you already doing this test in ubsan_instrument_division?

Yep, I'll throw it out of cp/typeck.c.

Thanks for the review!

	Marek
Jakub Jelinek - June 10, 2013, 9:32 a.m.
On Mon, Jun 10, 2013 at 11:24:16AM +0200, Marek Polacek wrote:
> > >@@ -4070,8 +4077,15 @@ cp_build_binary_op (location_t location,
> > >	{
> > >	  enum tree_code tcode0 = code0, tcode1 = code1;
> > >	  tree cop1 = fold_non_dependent_expr_sfinae (op1, tf_none);
> > >+	  cop1 = maybe_constant_value (cop1);
> > >
> > >-	  warn_for_div_by_zero (location, maybe_constant_value (cop1));
> > >+	  if (!processing_template_decl && tcode0 == INTEGER_TYPE
> > >+	      && (TREE_CODE (cop1) != INTEGER_CST
> > >+		  || integer_zerop (cop1)
> > >+		  || integer_minus_onep (cop1)))
> > >+	    doing_div_or_mod = true;
> > 
> > Aren't you already doing this test in ubsan_instrument_division?
> 
> Yep, I'll throw it out of cp/typeck.c.

Note that the above one actually performs more than what you do in
ubsan_instrument_division, because it works on maybe_constant_value result.
So, perhaps typeck.c should ensure that the ubsan functions are always
called with arguments passed through
maybe_constant_value (fold_non_dependent_expr_sfinae (opX, tf_none)).

	Jakub
Marek Polacek - June 10, 2013, 9:49 a.m.
On Mon, Jun 10, 2013 at 11:32:22AM +0200, Jakub Jelinek wrote:
> On Mon, Jun 10, 2013 at 11:24:16AM +0200, Marek Polacek wrote:
> > > >@@ -4070,8 +4077,15 @@ cp_build_binary_op (location_t location,
> > > >	{
> > > >	  enum tree_code tcode0 = code0, tcode1 = code1;
> > > >	  tree cop1 = fold_non_dependent_expr_sfinae (op1, tf_none);
> > > >+	  cop1 = maybe_constant_value (cop1);
> > > >
> > > >-	  warn_for_div_by_zero (location, maybe_constant_value (cop1));
> > > >+	  if (!processing_template_decl && tcode0 == INTEGER_TYPE
> > > >+	      && (TREE_CODE (cop1) != INTEGER_CST
> > > >+		  || integer_zerop (cop1)
> > > >+		  || integer_minus_onep (cop1)))
> > > >+	    doing_div_or_mod = true;
> > > 
> > > Aren't you already doing this test in ubsan_instrument_division?
> > 
> > Yep, I'll throw it out of cp/typeck.c.
> 
> Note that the above one actually performs more than what you do in
> ubsan_instrument_division, because it works on maybe_constant_value result.
> So, perhaps typeck.c should ensure that the ubsan functions are always
> called with arguments passed through
> maybe_constant_value (fold_non_dependent_expr_sfinae (opX, tf_none)).

Ah, ok, will add it there.  Thanks.

	Marek
Joseph S. Myers - June 10, 2013, 2:29 p.m.
On Sat, 8 Jun 2013, Marek Polacek wrote:

> +  if (code == LSHIFT_EXPR
> +      && !TYPE_UNSIGNED (TREE_TYPE (op0))
> +      && (flag_isoc99 || flag_isoc11))

flag_isoc11 implies flag_isoc99, you only need to check flag_isoc99 here.
Marek Polacek - June 11, 2013, 1:48 a.m.
On Mon, Jun 10, 2013 at 02:29:25PM +0000, Joseph S. Myers wrote:
> On Sat, 8 Jun 2013, Marek Polacek wrote:
> 
> > +  if (code == LSHIFT_EXPR
> > +      && !TYPE_UNSIGNED (TREE_TYPE (op0))
> > +      && (flag_isoc99 || flag_isoc11))
> 
> flag_isoc11 implies flag_isoc99, you only need to check flag_isoc99 here.

Ah, sure.  Thanks,

	Marek

Patch

--- gcc/c-family/c-ubsan.c.mp	2013-06-07 22:58:18.084990548 +0200
+++ gcc/c-family/c-ubsan.c	2013-06-07 22:29:34.677588474 +0200
@@ -0,0 +1,120 @@ 
+/* UndefinedBehaviorSanitizer, undefined behavior detector.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Marek Polacek <polacek@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+#include "c-family/c-ubsan.h"
+
+/* Instrument division by zero and INT_MIN / -1.  */
+
+tree
+ubsan_instrument_division (location_t loc, enum tree_code code,
+			   tree op0, tree op1)
+{
+  tree t, tt;
+  tree orig = build2 (code, TREE_TYPE (op0), op0, op1);
+
+  if (TREE_CODE (TREE_TYPE (op0)) != INTEGER_TYPE
+      || TREE_CODE (TREE_TYPE (op1)) != INTEGER_TYPE)
+    return orig;
+
+  /* If we *know* that the divisor is not -1 or 0, we don't have to
+     instrument this expression.
+     ??? We could use decl_constant_value to cover up more cases.  */
+  if (TREE_CODE (op1) == INTEGER_CST
+      && integer_nonzerop (op1)
+      && !integer_minus_onep (op1))
+    return orig;
+
+  tt = fold_build2 (EQ_EXPR, boolean_type_node, op1,
+		    integer_minus_one_node);
+  t = fold_build2 (EQ_EXPR, boolean_type_node, op0,
+		   TYPE_MIN_VALUE (TREE_TYPE (op0)));
+  t = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, t, tt);
+  tt = build2 (EQ_EXPR, boolean_type_node,
+	       op1, integer_zero_node);
+  t = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, tt, t);
+  tt = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_DIVREM_OVERFLOW);
+  tt = build_call_expr_loc (loc, tt, 0);
+  t = fold_build3 (COND_EXPR, void_type_node, t, tt, void_zero_node);
+  t = fold_build2 (COMPOUND_EXPR, TREE_TYPE (orig), t, orig);
+
+  return t;
+}
+
+/* Instrument left and right shifts.  */
+
+tree
+ubsan_instrument_shift (location_t loc, enum tree_code code,
+			tree op0, tree op1)
+{
+  tree t, tt = NULL_TREE;
+  tree orig = build2 (code, TREE_TYPE (op0), op0, op1);
+  tree uprecm1 = build_int_cst (unsigned_type_for (TREE_TYPE (op1)),
+			       TYPE_PRECISION (TREE_TYPE (op0)) - 1);
+  tree precm1 = build_int_cst (TREE_TYPE (op1),
+			       TYPE_PRECISION (TREE_TYPE (op0)) - 1);
+
+  t = fold_convert_loc (loc, unsigned_type_for (TREE_TYPE (op1)), op1);
+  t = fold_build2 (GT_EXPR, boolean_type_node, t, uprecm1);
+
+  /* For signed x << y, in C99/C11, the following:
+     (unsigned) x >> (precm1 - y)
+     if non-zero, is undefined.  */
+  if (code == LSHIFT_EXPR
+      && !TYPE_UNSIGNED (TREE_TYPE (op0))
+      && (flag_isoc99 || flag_isoc11))
+    {
+      tree x = fold_build2 (MINUS_EXPR, integer_type_node, precm1, op1);
+      tt = fold_convert_loc (loc, unsigned_type_for (TREE_TYPE (op0)), op0);
+      tt = fold_build2 (RSHIFT_EXPR, TREE_TYPE (tt), tt, x);
+      tt = fold_build2 (NE_EXPR, boolean_type_node, tt,
+			build_int_cst (TREE_TYPE (tt), 0));
+    }
+
+  /* For signed x << y, in C++11/C++14, the following:
+     x < 0 || ((unsigned) x >> (precm1 - y))
+     if > 1, is undefined.  */
+  if (code == LSHIFT_EXPR
+      && !TYPE_UNSIGNED (TREE_TYPE (op0))
+      && (cxx_dialect == cxx11 || cxx_dialect == cxx1y))
+    {
+      tree x = fold_build2 (MINUS_EXPR, integer_type_node, precm1, op1);
+      tt = fold_convert_loc (loc, unsigned_type_for (TREE_TYPE (op0)), op0);
+      tt = fold_build2 (RSHIFT_EXPR, TREE_TYPE (tt), tt, x);
+      tt = fold_build2 (GT_EXPR, boolean_type_node, tt,
+			build_int_cst (TREE_TYPE (tt), 1));
+      x = fold_build2 (LT_EXPR, boolean_type_node, op0,
+		       build_int_cst (TREE_TYPE (op0), 0));
+      tt = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, x, tt);
+    }
+
+  t = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, t,
+		   tt ? tt : integer_zero_node);
+  tt = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_SHIFT_OUT_OF_BOUNDS);
+  tt = build_call_expr_loc (loc, tt, 0);
+  t = fold_build3 (COND_EXPR, void_type_node, t, tt, void_zero_node);
+  t = fold_build2 (COMPOUND_EXPR, TREE_TYPE (orig), t, orig);
+
+  return t;
+}
--- gcc/c-family/c-ubsan.h.mp	2013-06-07 22:58:23.009004841 +0200
+++ gcc/c-family/c-ubsan.h	2013-06-05 18:10:21.284693807 +0200
@@ -0,0 +1,27 @@ 
+/* UndefinedBehaviorSanitizer, undefined behavior detector.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Marek Polacek <polacek@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_UBSAN_H
+#define GCC_UBSAN_H
+
+extern tree ubsan_instrument_division (location_t, enum tree_code, tree, tree);
+extern tree ubsan_instrument_shift (location_t, enum tree_code, tree, tree);
+
+#endif  /* GCC_UBSAN_H  */
--- gcc/sanitizer.def.mp	2013-06-07 23:01:16.783536178 +0200
+++ gcc/sanitizer.def	2013-06-07 23:04:47.646161427 +0200
@@ -283,3 +283,13 @@  DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_ATOM
 DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_ATOMIC_SIGNAL_FENCE,
 		      "__tsan_atomic_signal_fence",
 		      BT_FN_VOID_INT, ATTR_NOTHROW_LEAF_LIST)
+
+/* Undefined Behavior Sanitizer */
+DEF_SANITIZER_BUILTIN(BUILT_IN_UBSAN_HANDLE_DIVREM_OVERFLOW,
+		      "__ubsan_handle_divrem_overflow",
+		      BT_FN_VOID_PTR_PTR_PTR,
+		      ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST)
+DEF_SANITIZER_BUILTIN(BUILT_IN_UBSAN_HANDLE_SHIFT_OUT_OF_BOUNDS,
+		      "__ubsan_handle_shift_out_of_bounds",
+		      BT_FN_VOID_PTR_PTR_PTR,
+		      ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST)
--- gcc/builtins.def.mp	2013-06-07 23:01:16.775536150 +0200
+++ gcc/builtins.def	2013-06-07 23:04:47.611161301 +0200
@@ -155,7 +155,7 @@  along with GCC; see the file COPYING3.
 #define DEF_SANITIZER_BUILTIN(ENUM, NAME, TYPE, ATTRS) \
   DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE,    \
 	       true, true, true, ATTRS, true, \
-	       (flag_asan || flag_tsan))
+	       (flag_asan || flag_tsan || flag_ubsan))
 
 #undef DEF_CILKPLUS_BUILTIN
 #define DEF_CILKPLUS_BUILTIN(ENUM, NAME, TYPE, ATTRS) \
--- gcc/Makefile.in.mp	2013-06-07 23:01:16.771536135 +0200
+++ gcc/Makefile.in	2013-06-07 23:04:47.602161268 +0200
@@ -1150,7 +1150,7 @@  C_COMMON_OBJS = c-family/c-common.o c-fa
   c-family/c-omp.o c-family/c-opts.o c-family/c-pch.o \
   c-family/c-ppoutput.o c-family/c-pragma.o c-family/c-pretty-print.o \
   c-family/c-semantics.o c-family/c-ada-spec.o tree-mudflap.o \
-  c-family/array-notation-common.o
+  c-family/array-notation-common.o c-family/c-ubsan.o
 
 # Language-independent object files.
 # We put the insn-*.o files first so that a parallel make will build
@@ -2021,6 +2021,9 @@  c-family/array-notation-common.o : c-fam
 c-family/stub-objc.o : c-family/stub-objc.c $(CONFIG_H) $(SYSTEM_H) \
 	coretypes.h $(TREE_H) $(C_COMMON_H) c-family/c-objc.h
 
+c-family/c-ubsan.o : c-family/c-ubsan.c $(CONFIG_H) $(SYSTEM_H) \
+	coretypes.h $(TREE_H) $(C_COMMON_H) c-family/c-ubsan.h
+
 default-c.o: config/default-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
   $(C_TARGET_H) $(C_TARGET_DEF_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \
--- gcc/doc/invoke.texi.mp	2013-06-07 23:01:16.781536172 +0200
+++ gcc/doc/invoke.texi	2013-06-07 23:04:47.639161403 +0200
@@ -5143,6 +5143,11 @@  Memory access instructions will be instr
 data race bugs.
 See @uref{http://code.google.com/p/data-race-test/wiki/ThreadSanitizer} for more details.
 
+@item -fsanitize=undefined
+Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector
+Various computations will be instrumented to detect
+undefined behavior, e.g.@: division by zero or various overflows.
+
 @item -fdump-final-insns@r{[}=@var{file}@r{]}
 @opindex fdump-final-insns
 Dump the final internal representation (RTL) to @var{file}.  If the
--- gcc/cp/typeck.c.mp	2013-06-07 23:01:16.779536165 +0200
+++ gcc/cp/typeck.c	2013-06-07 23:04:47.625161351 +0200
@@ -37,6 +37,7 @@  along with GCC; see the file COPYING3.
 #include "convert.h"
 #include "c-family/c-common.h"
 #include "c-family/c-objc.h"
+#include "c-family/c-ubsan.h"
 #include "params.h"
 
 static tree pfn_from_ptrmemfunc (tree);
@@ -3891,6 +3892,12 @@  cp_build_binary_op (location_t location,
   op0 = orig_op0;
   op1 = orig_op1;
 
+  /* Remember whether we're doing / or %.  */
+  bool doing_div_or_mod = false;
+
+  /* Remember whether we're doing << or >>.  */
+  bool doing_shift = false;
+
   if (code == TRUTH_AND_EXPR || code == TRUTH_ANDIF_EXPR
       || code == TRUTH_OR_EXPR || code == TRUTH_ORIF_EXPR
       || code == TRUTH_XOR_EXPR)
@@ -4070,8 +4077,15 @@  cp_build_binary_op (location_t location,
 	{
 	  enum tree_code tcode0 = code0, tcode1 = code1;
 	  tree cop1 = fold_non_dependent_expr_sfinae (op1, tf_none);
+	  cop1 = maybe_constant_value (cop1);
 
-	  warn_for_div_by_zero (location, maybe_constant_value (cop1));
+	  if (!processing_template_decl && tcode0 == INTEGER_TYPE
+	      && (TREE_CODE (cop1) != INTEGER_CST
+		  || integer_zerop (cop1)
+		  || integer_minus_onep (cop1)))
+	    doing_div_or_mod = true;
+
+	  warn_for_div_by_zero (location, cop1);
 
 	  if (tcode0 == COMPLEX_TYPE || tcode0 == VECTOR_TYPE)
 	    tcode0 = TREE_CODE (TREE_TYPE (TREE_TYPE (op0)));
@@ -4109,8 +4123,14 @@  cp_build_binary_op (location_t location,
     case FLOOR_MOD_EXPR:
       {
 	tree cop1 = fold_non_dependent_expr_sfinae (op1, tf_none);
+	cop1 = maybe_constant_value (cop1);
 
-	warn_for_div_by_zero (location, maybe_constant_value (cop1));
+	if (!processing_template_decl && code0 == INTEGER_TYPE
+	    && (TREE_CODE (cop1) != INTEGER_CST
+		|| integer_zerop (cop1)
+		|| integer_minus_onep (cop1)))
+	  doing_div_or_mod = true;
+	warn_for_div_by_zero (location, cop1);
       }
 
       if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE
@@ -4164,6 +4184,7 @@  cp_build_binary_op (location_t location,
 	  if (TREE_CODE (const_op1) != INTEGER_CST)
 	    const_op1 = op1;
 	  result_type = type0;
+	  doing_shift = true;
 	  if (TREE_CODE (const_op1) == INTEGER_CST)
 	    {
 	      if (tree_int_cst_lt (const_op1, integer_zero_node))
@@ -4211,6 +4232,7 @@  cp_build_binary_op (location_t location,
 	  if (TREE_CODE (const_op1) != INTEGER_CST)
 	    const_op1 = op1;
 	  result_type = type0;
+	  doing_shift = true;
 	  if (TREE_CODE (const_op1) == INTEGER_CST)
 	    {
 	      if (tree_int_cst_lt (const_op1, integer_zero_node))
@@ -4607,6 +4629,17 @@  cp_build_binary_op (location_t location,
       break;
     }
 
+  if (flag_ubsan && doing_div_or_mod && !processing_template_decl)
+    {
+      resultcode = COMPOUND_EXPR;
+      return ubsan_instrument_division (location, code, op0, op1);
+    }
+  else if (flag_ubsan && doing_shift && !processing_template_decl)
+    {
+      resultcode = COMPOUND_EXPR;
+      return ubsan_instrument_shift (location, code, op0, op1);
+    }
+
   if (((code0 == INTEGER_TYPE || code0 == REAL_TYPE || code0 == COMPLEX_TYPE
 	|| code0 == ENUMERAL_TYPE)
        && (code1 == INTEGER_TYPE || code1 == REAL_TYPE
--- gcc/common.opt.mp	2013-06-07 23:01:16.778536161 +0200
+++ gcc/common.opt	2013-06-07 23:04:47.621161337 +0200
@@ -858,6 +858,10 @@  fsanitize=thread
 Common Report Var(flag_tsan)
 Enable ThreadSanitizer, a data race detector
 
+fsanitize=undefined
+Common Report Var(flag_ubsan)
+Enable UndefinedBehaviorSanitizer, an undefined behavior detector
+
 fasynchronous-unwind-tables
 Common Report Var(flag_asynchronous_unwind_tables) Optimization
 Generate unwind tables that are exact at each instruction boundary
--- gcc/builtin-attrs.def.mp	2013-06-07 23:01:16.774536147 +0200
+++ gcc/builtin-attrs.def	2013-06-07 23:04:47.609161293 +0200
@@ -83,6 +83,7 @@  DEF_LIST_INT_INT (5,6)
 #undef DEF_LIST_INT_INT
 
 /* Construct trees for identifiers.  */
+DEF_ATTR_IDENT (ATTR_COLD, "cold")
 DEF_ATTR_IDENT (ATTR_CONST, "const")
 DEF_ATTR_IDENT (ATTR_FORMAT, "format")
 DEF_ATTR_IDENT (ATTR_FORMAT_ARG, "format_arg")
@@ -130,6 +131,8 @@  DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHRO
 			ATTR_NULL, ATTR_NOTHROW_LIST)
 DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LEAF_LIST, ATTR_NORETURN,\
 			ATTR_NULL, ATTR_NOTHROW_LEAF_LIST)
+DEF_ATTR_TREE_LIST (ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST, ATTR_COLD,\
+			ATTR_NULL, ATTR_NORETURN_NOTHROW_LEAF_LIST)
 DEF_ATTR_TREE_LIST (ATTR_CONST_NORETURN_NOTHROW_LEAF_LIST, ATTR_CONST,\
 			ATTR_NULL, ATTR_NORETURN_NOTHROW_LEAF_LIST)
 DEF_ATTR_TREE_LIST (ATTR_MALLOC_NOTHROW_LIST, ATTR_MALLOC,	\
--- gcc/c/c-typeck.c.mp	2013-06-07 23:01:16.776536153 +0200
+++ gcc/c/c-typeck.c	2013-06-07 23:04:47.617161321 +0200
@@ -39,6 +39,7 @@  along with GCC; see the file COPYING3.
 #include "gimple.h"
 #include "c-family/c-objc.h"
 #include "c-family/c-common.h"
+#include "c-family/c-ubsan.h"
 
 /* Possible cases of implicit bad conversions.  Used to select
    diagnostic messages in convert_for_assignment.  */
@@ -9527,6 +9528,12 @@  build_binary_op (location_t location, en
      operands to truth-values.  */
   bool boolean_op = false;
 
+  /* Remember whether we're doing / or %.  */
+  bool doing_div_or_mod = false;
+
+  /* Remember whether we're doing << or >>.  */
+  bool doing_shift = false;
+
   if (location == UNKNOWN_LOCATION)
     location = input_location;
 
@@ -9728,6 +9735,7 @@  build_binary_op (location_t location, en
     case FLOOR_DIV_EXPR:
     case ROUND_DIV_EXPR:
     case EXACT_DIV_EXPR:
+      doing_div_or_mod = true;
       warn_for_div_by_zero (location, op1);
 
       if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE
@@ -9775,6 +9783,7 @@  build_binary_op (location_t location, en
 
     case TRUNC_MOD_EXPR:
     case FLOOR_MOD_EXPR:
+      doing_div_or_mod = true;
       warn_for_div_by_zero (location, op1);
 
       if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE
@@ -9873,6 +9882,7 @@  build_binary_op (location_t location, en
       else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE)
 	  && code1 == INTEGER_TYPE)
 	{
+	  doing_shift = true;
 	  if (TREE_CODE (op1) == INTEGER_CST)
 	    {
 	      if (tree_int_cst_sgn (op1) < 0)
@@ -9925,6 +9935,7 @@  build_binary_op (location_t location, en
       else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE)
 	  && code1 == INTEGER_TYPE)
 	{
+	  doing_shift = true;
 	  if (TREE_CODE (op1) == INTEGER_CST)
 	    {
 	      if (tree_int_cst_sgn (op1) < 0)
@@ -10209,6 +10220,19 @@  build_binary_op (location_t location, en
       return error_mark_node;
     }
 
+  if (flag_ubsan && doing_div_or_mod)
+    {
+      ret = ubsan_instrument_division (location, code, op0, op1);
+      resultcode = COMPOUND_EXPR;
+      goto return_build_binary_op;
+    }
+  else if (flag_ubsan && doing_shift)
+    {
+      ret = ubsan_instrument_shift (location, code, op0, op1);
+      resultcode = COMPOUND_EXPR;
+      goto return_build_binary_op;
+    }
+
   if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE || code0 == COMPLEX_TYPE
        || code0 == FIXED_POINT_TYPE || code0 == VECTOR_TYPE)
       &&
--- gcc/asan.c.mp	2013-06-07 23:01:16.773536143 +0200
+++ gcc/asan.c	2013-06-07 23:04:47.603161271 +0200
@@ -2034,6 +2034,9 @@  initialize_sanitizer_builtins (void)
   tree BT_FN_VOID = build_function_type_list (void_type_node, NULL_TREE);
   tree BT_FN_VOID_PTR
     = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
+  tree BT_FN_VOID_PTR_PTR_PTR
+    = build_function_type_list (void_type_node, ptr_type_node,
+				ptr_type_node, ptr_type_node, NULL_TREE);
   tree BT_FN_VOID_PTR_PTRMODE
     = build_function_type_list (void_type_node, ptr_type_node,
 				build_nonstandard_integer_type (POINTER_SIZE,
@@ -2099,6 +2102,9 @@  initialize_sanitizer_builtins (void)
 #undef ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST
 #define ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST \
   ECF_TM_PURE | ATTR_NORETURN_NOTHROW_LEAF_LIST
+#undef ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST
+#define ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST \
+  /* ECF_COLD missing */ ATTR_NORETURN_NOTHROW_LEAF_LIST
 #undef DEF_SANITIZER_BUILTIN
 #define DEF_SANITIZER_BUILTIN(ENUM, NAME, TYPE, ATTRS) \
   decl = add_builtin_function ("__builtin_" NAME, TYPE, ENUM,		\