diff mbox

Optimize a >= 0 && a < b if VR info says b >= 0 (PR tree-optimization/77664)

Message ID 20161006212208.GB7282@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Oct. 6, 2016, 9:22 p.m. UTC
Hi!

For signed a, if we see a >= 0 && a < b and from VR know that b >= 0,
we can optimize those 2 comparisons into one unsigned -
(unsigned) a < (unsigned) b.  Similarly for a < 0 || a > b (and also
for a <= b in the first and a >= b in the second).

The patch implements it in the reassoc optimize_range_tests optimization,
so that it can handle even comparisons that aren't adjacent in the && or ||
or & or | conditions, and can handle the conditions across multiple bbs as
well.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-10-06  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/77664
	* tree-ssa-reassoc.c (update_range_test): Also clear low and high
	for the other ranges.
	(optimize_range_tests_diff): Fix up formatting.
	(optimize_range_tests_var_bound): New function.
	(optimize_range_tests): Use it.

	* gcc.dg/tree-ssa/pr77664.c: New test.
	* gcc.dg/pr77664.c: New test.


	Jakub

Comments

Marc Glisse Oct. 6, 2016, 9:55 p.m. UTC | #1
On Thu, 6 Oct 2016, Jakub Jelinek wrote:

> For signed a, if we see a >= 0 && a < b and from VR know that b >= 0,
> we can optimize those 2 comparisons into one unsigned -
> (unsigned) a < (unsigned) b.  Similarly for a < 0 || a > b (and also
> for a <= b in the first and a >= b in the second).

I guess that the slightly more general:
X >= A && X < B where we know that A <= B
--> (unsigned)X - (unsigned)A < (unsigned)B - (unsigned)A

generates too many operations and does not gain anything? The case where A 
and B are constants seems to be handled by ifcombine, currently.
Jakub Jelinek Oct. 6, 2016, 10:01 p.m. UTC | #2
On Thu, Oct 06, 2016 at 11:55:00PM +0200, Marc Glisse wrote:
> On Thu, 6 Oct 2016, Jakub Jelinek wrote:
> 
> >For signed a, if we see a >= 0 && a < b and from VR know that b >= 0,
> >we can optimize those 2 comparisons into one unsigned -
> >(unsigned) a < (unsigned) b.  Similarly for a < 0 || a > b (and also
> >for a <= b in the first and a >= b in the second).
> 
> I guess that the slightly more general:
> X >= A && X < B where we know that A <= B
> --> (unsigned)X - (unsigned)A < (unsigned)B - (unsigned)A
> 
> generates too many operations and does not gain anything? 

Yeah, I think replacing 2 comparisons and one && with 2 subtractions and one
comparison isn't a clear win.
For constant A and B the reassoc optimize_range_tests of course handles it
for a long time already.

> The case where A
> and B are constants seems to be handled by ifcombine, currently.

	Jakub
Richard Biener Oct. 7, 2016, 7:40 a.m. UTC | #3
On Thu, 6 Oct 2016, Jakub Jelinek wrote:

> Hi!
> 
> For signed a, if we see a >= 0 && a < b and from VR know that b >= 0,
> we can optimize those 2 comparisons into one unsigned -
> (unsigned) a < (unsigned) b.  Similarly for a < 0 || a > b (and also
> for a <= b in the first and a >= b in the second).
> 
> The patch implements it in the reassoc optimize_range_tests optimization,
> so that it can handle even comparisons that aren't adjacent in the && or ||
> or & or | conditions, and can handle the conditions across multiple bbs as
> well.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2016-10-06  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/77664
> 	* tree-ssa-reassoc.c (update_range_test): Also clear low and high
> 	for the other ranges.
> 	(optimize_range_tests_diff): Fix up formatting.
> 	(optimize_range_tests_var_bound): New function.
> 	(optimize_range_tests): Use it.
> 
> 	* gcc.dg/tree-ssa/pr77664.c: New test.
> 	* gcc.dg/pr77664.c: New test.
> 
> --- gcc/tree-ssa-reassoc.c.jj	2016-10-05 17:01:28.724673717 +0200
> +++ gcc/tree-ssa-reassoc.c	2016-10-06 13:34:08.872512318 +0200
> @@ -2428,6 +2428,8 @@ update_range_test (struct range_entry *r
>        else
>  	oe->op = error_mark_node;
>        range->exp = NULL_TREE;
> +      range->low = NULL_TREE;
> +      range->high = NULL_TREE;
>      }
>    return true;
>  }
> @@ -2485,10 +2487,10 @@ optimize_range_tests_xor (enum tree_code
>     ((X - 43U) & ~(75U - 43U)) <= 3U.  */
>  static bool
>  optimize_range_tests_diff (enum tree_code opcode, tree type,
> -			    tree lowi, tree lowj, tree highi, tree highj,
> -			    vec<operand_entry *> *ops,
> -			    struct range_entry *rangei,
> -			    struct range_entry *rangej)
> +			   tree lowi, tree lowj, tree highi, tree highj,
> +			   vec<operand_entry *> *ops,
> +			   struct range_entry *rangei,
> +			   struct range_entry *rangej)
>  {
>    tree tem1, tem2, mask;
>    /* Check highi - lowi == highj - lowj.  */
> @@ -2829,6 +2831,197 @@ optimize_range_tests_to_bit_test (enum t
>    return any_changes;
>  }
>  
> +/* Attempt to optimize for signed a and b where b is known to be >= 0:
> +   a >= 0 && a < b into (unsigned) a < (unsigned) b
> +   a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
> +
> +static bool
> +optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
> +				vec<operand_entry *> *ops,
> +				struct range_entry *ranges)
> +{
> +  int i;
> +  bool any_changes = false;
> +  hash_map<tree, int> *map = NULL;
> +
> +  for (i = first; i < length; i++)
> +    {
> +      if (ranges[i].exp == NULL_TREE || !ranges[i].in_p)
> +	continue;
> +
> +      tree type = TREE_TYPE (ranges[i].exp);
> +      if (!INTEGRAL_TYPE_P (type)
> +	  || TYPE_UNSIGNED (type)
> +	  || ranges[i].low == NULL_TREE
> +	  || !integer_zerop (ranges[i].low)
> +	  || ranges[i].high != NULL_TREE)
> +	continue;
> +      /* EXP >= 0 here.  */
> +      if (map == NULL)
> +	map = new hash_map <tree, int>;
> +      map->put (ranges[i].exp, i);
> +    }
> +
> +  if (map == NULL)
> +    return false;
> +
> +  for (i = 0; i < length; i++)
> +    {
> +      if (ranges[i].low == NULL_TREE
> +	  || ranges[i].high == NULL_TREE
> +	  || !integer_zerop (ranges[i].low)
> +	  || !integer_zerop (ranges[i].high))
> +	continue;
> +
> +      gimple *stmt;
> +      tree_code ccode;
> +      tree rhs1, rhs2;
> +      if (ranges[i].exp)
> +	{
> +	  stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
> +	  if (!is_gimple_assign (stmt))
> +	    continue;
> +	  ccode = gimple_assign_rhs_code (stmt);
> +	  rhs1 = gimple_assign_rhs1 (stmt);
> +	  rhs2 = gimple_assign_rhs2 (stmt);
> +	}
> +      else
> +	{
> +	  operand_entry *oe = (*ops)[ranges[i].idx];
> +	  stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
> +	  if (gimple_code (stmt) != GIMPLE_COND)
> +	    continue;
> +	  ccode = gimple_cond_code (stmt);
> +	  rhs1 = gimple_cond_lhs (stmt);
> +	  rhs2 = gimple_cond_rhs (stmt);
> +	}
> +
> +      if (TREE_CODE (rhs1) != SSA_NAME
> +	  || rhs2 == NULL_TREE
> +	  || TREE_CODE (rhs2) != SSA_NAME)
> +	continue;
> +
> +      switch (ccode)
> +	{
> +	case GT_EXPR:
> +	case GE_EXPR:
> +	  if (!ranges[i].in_p)
> +	    std::swap (rhs1, rhs2);
> +	  ccode = swap_tree_comparison (ccode);
> +	  break;
> +	case LT_EXPR:
> +	case LE_EXPR:
> +	  if (ranges[i].in_p)
> +	    std::swap (rhs1, rhs2);
> +	  break;
> +	default:
> +	  continue;
> +	}
> +
> +      int *idx = map->get (rhs1);
> +      if (idx == NULL)
> +	continue;
> +
> +      wide_int nz = get_nonzero_bits (rhs2);
> +      if (wi::neg_p (nz))
> +	continue;
> +
> +      /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
> +	 and RHS2 is known to be RHS2 >= 0.  */
> +      tree utype = unsigned_type_for (TREE_TYPE (rhs1));
> +
> +      enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
> +      if ((ranges[*idx].strict_overflow_p
> +	   || ranges[i].strict_overflow_p)
> +	  && issue_strict_overflow_warning (wc))
> +	warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
> +		    "assuming signed overflow does not occur "
> +		    "when simplifying range test");
> +
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +	{
> +	  struct range_entry *r = &ranges[*idx];
> +	  fprintf (dump_file, "Optimizing range test ");
> +	  print_generic_expr (dump_file, r->exp, 0);
> +	  fprintf (dump_file, " +[");
> +	  print_generic_expr (dump_file, r->low, 0);
> +	  fprintf (dump_file, ", ");
> +	  print_generic_expr (dump_file, r->high, 0);
> +	  fprintf (dump_file, "] and comparison ");
> +	  print_generic_expr (dump_file, rhs1, 0);
> +	  fprintf (dump_file, " %s ", op_symbol_code (ccode));
> +	  print_generic_expr (dump_file, rhs2, 0);
> +	  fprintf (dump_file, "\n into (");
> +	  print_generic_expr (dump_file, utype, 0);
> +	  fprintf (dump_file, ") ");
> +	  print_generic_expr (dump_file, rhs1, 0);
> +	  fprintf (dump_file, " %s (", op_symbol_code (ccode));
> +	  print_generic_expr (dump_file, utype, 0);
> +	  fprintf (dump_file, ") ");
> +	  print_generic_expr (dump_file, rhs2, 0);
> +	  fprintf (dump_file, "\n");
> +	}
> +
> +      if (ranges[i].in_p)
> +	std::swap (rhs1, rhs2);
> +
> +      unsigned int uid = gimple_uid (stmt);
> +      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
> +      gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
> +      gimple_set_uid (g, uid);
> +      rhs1 = gimple_assign_lhs (g);
> +      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +      g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
> +      gimple_set_uid (g, uid);
> +      rhs2 = gimple_assign_lhs (g);
> +      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +      if (tree_swap_operands_p (rhs1, rhs2, false))
> +	{
> +	  std::swap (rhs1, rhs2);
> +	  ccode = swap_tree_comparison (ccode);
> +	}
> +      if (gimple_code (stmt) == GIMPLE_COND)
> +	{
> +	  gcond *c = as_a <gcond *> (stmt);
> +	  gimple_cond_set_code (c, ccode);
> +	  gimple_cond_set_lhs (c, rhs1);
> +	  gimple_cond_set_rhs (c, rhs2);
> +	  update_stmt (stmt);
> +	}
> +      else
> +	{
> +	  g = gimple_build_assign (make_ssa_name (TREE_TYPE (ranges[i].exp)),
> +				   ccode, rhs1, rhs2);
> +	  gimple_set_uid (g, uid);
> +	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> +	  ranges[i].exp = gimple_assign_lhs (g);
> +	  (*ops)[ranges[i].idx]->op = ranges[i].exp;
> +	}
> +      ranges[i].strict_overflow_p = false;
> +      operand_entry *oe = (*ops)[ranges[*idx].idx];
> +      /* Now change all the other range test immediate uses, so that
> +	 those tests will be optimized away.  */
> +      if (opcode == ERROR_MARK)
> +	{
> +	  if (oe->op)
> +	    oe->op = build_int_cst (TREE_TYPE (oe->op),
> +				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
> +	  else
> +	    oe->op = (oe->rank == BIT_IOR_EXPR
> +		      ? boolean_false_node : boolean_true_node);
> +	}
> +      else
> +	oe->op = error_mark_node;
> +      ranges[*idx].exp = NULL_TREE;
> +      ranges[*idx].low = NULL_TREE;
> +      ranges[*idx].high = NULL_TREE;
> +      any_changes = true;
> +    }
> +
> +  delete map;
> +  return any_changes;
> +}
> +
>  /* Optimize range tests, similarly how fold_range_test optimizes
>     it on trees.  The tree code for the binary
>     operation between all the operands is OPCODE.
> @@ -2917,6 +3110,8 @@ optimize_range_tests (enum tree_code opc
>    if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
>      any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
>  						     ops, ranges);
> +  any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
> +						 ranges);
>  
>    if (any_changes && opcode != ERROR_MARK)
>      {
> --- gcc/testsuite/gcc.dg/tree-ssa/pr77664.c.jj	2016-10-06 13:04:18.069234540 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr77664.c	2016-10-06 13:45:48.669579669 +0200
> @@ -0,0 +1,49 @@
> +/* PR tree-optimization/77664 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-reassoc1-details" } */
> +
> +extern void foo (void);
> +
> +/* { dg-final { scan-tree-dump-times "Optimizing range test \[^\n\r]* and comparison" 6 "reassoc1" } } */
> +
> +__attribute__((noinline, noclone)) void
> +fn1 (long long int a, unsigned short b, int c)
> +{
> +  if (a >= 0 && c && a < b)
> +    foo ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +fn2 (long long int a, unsigned short b, int c)
> +{
> +  if (a < 0 || c || a >= b)
> +    foo ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +fn3 (long long int a, unsigned short b)
> +{
> +  if (a < 0 || b < a)
> +    foo ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +fn4 (long long int a, unsigned short b)
> +{
> +  if (a <= b && a >= 0)
> +    foo ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +fn5 (long long int a, unsigned short b)
> +{
> +  if (a < 0 | a > b)
> +    foo ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +fn6 (long long int a, unsigned short b)
> +{
> +  if (b >= a & a >= 0)
> +    foo ();
> +}
> --- gcc/testsuite/gcc.dg/pr77664.c.jj	2016-10-06 13:08:58.892676906 +0200
> +++ gcc/testsuite/gcc.dg/pr77664.c	2016-10-06 13:45:14.018025091 +0200
> @@ -0,0 +1,55 @@
> +/* PR tree-optimization/77664 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include "tree-ssa/pr77664.c"
> +
> +int cnt;
> +
> +__attribute__((noinline, noclone)) void
> +foo (void)
> +{
> +  cnt++;
> +}
> +
> +int
> +main ()
> +{
> +  fn1 (65534U, 65535U, 7); if (cnt != 1) __builtin_abort ();
> +  fn1 (65534U, 65535U, 0); if (cnt != 1) __builtin_abort ();
> +  fn1 (65535U, 65535U, 1); if (cnt != 1) __builtin_abort ();
> +  fn1 (0, 65535U, 1); if (cnt != 2) __builtin_abort ();
> +  fn1 (-1, 65535U, 1); if (cnt != 2) __builtin_abort ();
> +  fn1 (0, 0, 1); if (cnt != 2) __builtin_abort ();
> +  fn2 (65534U, 65535U, 7); if (cnt != 3) __builtin_abort ();
> +  fn2 (65534U, 65535U, 0); if (cnt != 3) __builtin_abort ();
> +  fn2 (65535U, 65535U, 0); if (cnt != 4) __builtin_abort ();
> +  fn2 (0, 65535U, 0); if (cnt != 4) __builtin_abort ();
> +  fn2 (-1, 65535U, 0); if (cnt != 5) __builtin_abort ();
> +  fn2 (0, 0, 0); if (cnt != 6) __builtin_abort ();
> +  fn3 (-1, 65534U); if (cnt != 7) __builtin_abort ();
> +  fn3 (0, 65534U); if (cnt != 7) __builtin_abort ();
> +  fn3 (65534U, 65534U); if (cnt != 7) __builtin_abort ();
> +  fn3 (65535U, 65534U); if (cnt != 8) __builtin_abort ();
> +  fn3 (0, 0); if (cnt != 8) __builtin_abort ();
> +  fn3 (1, 0); if (cnt != 9) __builtin_abort ();
> +  fn4 (-1, 65534U); if (cnt != 9) __builtin_abort ();
> +  fn4 (0, 65534U); if (cnt != 10) __builtin_abort ();
> +  fn4 (65534U, 65534U); if (cnt != 11) __builtin_abort ();
> +  fn4 (65535U, 65534U); if (cnt != 11) __builtin_abort ();
> +  fn4 (0, 0); if (cnt != 12) __builtin_abort ();
> +  fn4 (1, 0); if (cnt != 12) __builtin_abort ();
> +  fn5 (-1, 65534U); if (cnt != 13) __builtin_abort ();
> +  fn5 (0, 65534U); if (cnt != 13) __builtin_abort ();
> +  fn5 (65534U, 65534U); if (cnt != 13) __builtin_abort ();
> +  fn5 (65535U, 65534U); if (cnt != 14) __builtin_abort ();
> +  fn5 (0, 0); if (cnt != 14) __builtin_abort ();
> +  fn5 (1, 0); if (cnt != 15) __builtin_abort ();
> +  fn6 (-1, 65534U); if (cnt != 15) __builtin_abort ();
> +  fn6 (0, 65534U); if (cnt != 16) __builtin_abort ();
> +  fn6 (65534U, 65534U); if (cnt != 17) __builtin_abort ();
> +  fn6 (65535U, 65534U); if (cnt != 17) __builtin_abort ();
> +  fn6 (0, 0); if (cnt != 18) __builtin_abort ();
> +  fn6 (1, 0); if (cnt != 18) __builtin_abort ();
> +  return 0;
> +}
> 
> 	Jakub
> 
>
diff mbox

Patch

--- gcc/tree-ssa-reassoc.c.jj	2016-10-05 17:01:28.724673717 +0200
+++ gcc/tree-ssa-reassoc.c	2016-10-06 13:34:08.872512318 +0200
@@ -2428,6 +2428,8 @@  update_range_test (struct range_entry *r
       else
 	oe->op = error_mark_node;
       range->exp = NULL_TREE;
+      range->low = NULL_TREE;
+      range->high = NULL_TREE;
     }
   return true;
 }
@@ -2485,10 +2487,10 @@  optimize_range_tests_xor (enum tree_code
    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
 static bool
 optimize_range_tests_diff (enum tree_code opcode, tree type,
-			    tree lowi, tree lowj, tree highi, tree highj,
-			    vec<operand_entry *> *ops,
-			    struct range_entry *rangei,
-			    struct range_entry *rangej)
+			   tree lowi, tree lowj, tree highi, tree highj,
+			   vec<operand_entry *> *ops,
+			   struct range_entry *rangei,
+			   struct range_entry *rangej)
 {
   tree tem1, tem2, mask;
   /* Check highi - lowi == highj - lowj.  */
@@ -2829,6 +2831,197 @@  optimize_range_tests_to_bit_test (enum t
   return any_changes;
 }
 
+/* Attempt to optimize for signed a and b where b is known to be >= 0:
+   a >= 0 && a < b into (unsigned) a < (unsigned) b
+   a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
+
+static bool
+optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
+				vec<operand_entry *> *ops,
+				struct range_entry *ranges)
+{
+  int i;
+  bool any_changes = false;
+  hash_map<tree, int> *map = NULL;
+
+  for (i = first; i < length; i++)
+    {
+      if (ranges[i].exp == NULL_TREE || !ranges[i].in_p)
+	continue;
+
+      tree type = TREE_TYPE (ranges[i].exp);
+      if (!INTEGRAL_TYPE_P (type)
+	  || TYPE_UNSIGNED (type)
+	  || ranges[i].low == NULL_TREE
+	  || !integer_zerop (ranges[i].low)
+	  || ranges[i].high != NULL_TREE)
+	continue;
+      /* EXP >= 0 here.  */
+      if (map == NULL)
+	map = new hash_map <tree, int>;
+      map->put (ranges[i].exp, i);
+    }
+
+  if (map == NULL)
+    return false;
+
+  for (i = 0; i < length; i++)
+    {
+      if (ranges[i].low == NULL_TREE
+	  || ranges[i].high == NULL_TREE
+	  || !integer_zerop (ranges[i].low)
+	  || !integer_zerop (ranges[i].high))
+	continue;
+
+      gimple *stmt;
+      tree_code ccode;
+      tree rhs1, rhs2;
+      if (ranges[i].exp)
+	{
+	  stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
+	  if (!is_gimple_assign (stmt))
+	    continue;
+	  ccode = gimple_assign_rhs_code (stmt);
+	  rhs1 = gimple_assign_rhs1 (stmt);
+	  rhs2 = gimple_assign_rhs2 (stmt);
+	}
+      else
+	{
+	  operand_entry *oe = (*ops)[ranges[i].idx];
+	  stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
+	  if (gimple_code (stmt) != GIMPLE_COND)
+	    continue;
+	  ccode = gimple_cond_code (stmt);
+	  rhs1 = gimple_cond_lhs (stmt);
+	  rhs2 = gimple_cond_rhs (stmt);
+	}
+
+      if (TREE_CODE (rhs1) != SSA_NAME
+	  || rhs2 == NULL_TREE
+	  || TREE_CODE (rhs2) != SSA_NAME)
+	continue;
+
+      switch (ccode)
+	{
+	case GT_EXPR:
+	case GE_EXPR:
+	  if (!ranges[i].in_p)
+	    std::swap (rhs1, rhs2);
+	  ccode = swap_tree_comparison (ccode);
+	  break;
+	case LT_EXPR:
+	case LE_EXPR:
+	  if (ranges[i].in_p)
+	    std::swap (rhs1, rhs2);
+	  break;
+	default:
+	  continue;
+	}
+
+      int *idx = map->get (rhs1);
+      if (idx == NULL)
+	continue;
+
+      wide_int nz = get_nonzero_bits (rhs2);
+      if (wi::neg_p (nz))
+	continue;
+
+      /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
+	 and RHS2 is known to be RHS2 >= 0.  */
+      tree utype = unsigned_type_for (TREE_TYPE (rhs1));
+
+      enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
+      if ((ranges[*idx].strict_overflow_p
+	   || ranges[i].strict_overflow_p)
+	  && issue_strict_overflow_warning (wc))
+	warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
+		    "assuming signed overflow does not occur "
+		    "when simplifying range test");
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  struct range_entry *r = &ranges[*idx];
+	  fprintf (dump_file, "Optimizing range test ");
+	  print_generic_expr (dump_file, r->exp, 0);
+	  fprintf (dump_file, " +[");
+	  print_generic_expr (dump_file, r->low, 0);
+	  fprintf (dump_file, ", ");
+	  print_generic_expr (dump_file, r->high, 0);
+	  fprintf (dump_file, "] and comparison ");
+	  print_generic_expr (dump_file, rhs1, 0);
+	  fprintf (dump_file, " %s ", op_symbol_code (ccode));
+	  print_generic_expr (dump_file, rhs2, 0);
+	  fprintf (dump_file, "\n into (");
+	  print_generic_expr (dump_file, utype, 0);
+	  fprintf (dump_file, ") ");
+	  print_generic_expr (dump_file, rhs1, 0);
+	  fprintf (dump_file, " %s (", op_symbol_code (ccode));
+	  print_generic_expr (dump_file, utype, 0);
+	  fprintf (dump_file, ") ");
+	  print_generic_expr (dump_file, rhs2, 0);
+	  fprintf (dump_file, "\n");
+	}
+
+      if (ranges[i].in_p)
+	std::swap (rhs1, rhs2);
+
+      unsigned int uid = gimple_uid (stmt);
+      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+      gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
+      gimple_set_uid (g, uid);
+      rhs1 = gimple_assign_lhs (g);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
+      gimple_set_uid (g, uid);
+      rhs2 = gimple_assign_lhs (g);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      if (tree_swap_operands_p (rhs1, rhs2, false))
+	{
+	  std::swap (rhs1, rhs2);
+	  ccode = swap_tree_comparison (ccode);
+	}
+      if (gimple_code (stmt) == GIMPLE_COND)
+	{
+	  gcond *c = as_a <gcond *> (stmt);
+	  gimple_cond_set_code (c, ccode);
+	  gimple_cond_set_lhs (c, rhs1);
+	  gimple_cond_set_rhs (c, rhs2);
+	  update_stmt (stmt);
+	}
+      else
+	{
+	  g = gimple_build_assign (make_ssa_name (TREE_TYPE (ranges[i].exp)),
+				   ccode, rhs1, rhs2);
+	  gimple_set_uid (g, uid);
+	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+	  ranges[i].exp = gimple_assign_lhs (g);
+	  (*ops)[ranges[i].idx]->op = ranges[i].exp;
+	}
+      ranges[i].strict_overflow_p = false;
+      operand_entry *oe = (*ops)[ranges[*idx].idx];
+      /* Now change all the other range test immediate uses, so that
+	 those tests will be optimized away.  */
+      if (opcode == ERROR_MARK)
+	{
+	  if (oe->op)
+	    oe->op = build_int_cst (TREE_TYPE (oe->op),
+				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
+	  else
+	    oe->op = (oe->rank == BIT_IOR_EXPR
+		      ? boolean_false_node : boolean_true_node);
+	}
+      else
+	oe->op = error_mark_node;
+      ranges[*idx].exp = NULL_TREE;
+      ranges[*idx].low = NULL_TREE;
+      ranges[*idx].high = NULL_TREE;
+      any_changes = true;
+    }
+
+  delete map;
+  return any_changes;
+}
+
 /* Optimize range tests, similarly how fold_range_test optimizes
    it on trees.  The tree code for the binary
    operation between all the operands is OPCODE.
@@ -2917,6 +3110,8 @@  optimize_range_tests (enum tree_code opc
   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
 						     ops, ranges);
+  any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
+						 ranges);
 
   if (any_changes && opcode != ERROR_MARK)
     {
--- gcc/testsuite/gcc.dg/tree-ssa/pr77664.c.jj	2016-10-06 13:04:18.069234540 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr77664.c	2016-10-06 13:45:48.669579669 +0200
@@ -0,0 +1,49 @@ 
+/* PR tree-optimization/77664 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc1-details" } */
+
+extern void foo (void);
+
+/* { dg-final { scan-tree-dump-times "Optimizing range test \[^\n\r]* and comparison" 6 "reassoc1" } } */
+
+__attribute__((noinline, noclone)) void
+fn1 (long long int a, unsigned short b, int c)
+{
+  if (a >= 0 && c && a < b)
+    foo ();
+}
+
+__attribute__((noinline, noclone)) void
+fn2 (long long int a, unsigned short b, int c)
+{
+  if (a < 0 || c || a >= b)
+    foo ();
+}
+
+__attribute__((noinline, noclone)) void
+fn3 (long long int a, unsigned short b)
+{
+  if (a < 0 || b < a)
+    foo ();
+}
+
+__attribute__((noinline, noclone)) void
+fn4 (long long int a, unsigned short b)
+{
+  if (a <= b && a >= 0)
+    foo ();
+}
+
+__attribute__((noinline, noclone)) void
+fn5 (long long int a, unsigned short b)
+{
+  if (a < 0 | a > b)
+    foo ();
+}
+
+__attribute__((noinline, noclone)) void
+fn6 (long long int a, unsigned short b)
+{
+  if (b >= a & a >= 0)
+    foo ();
+}
--- gcc/testsuite/gcc.dg/pr77664.c.jj	2016-10-06 13:08:58.892676906 +0200
+++ gcc/testsuite/gcc.dg/pr77664.c	2016-10-06 13:45:14.018025091 +0200
@@ -0,0 +1,55 @@ 
+/* PR tree-optimization/77664 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "tree-ssa/pr77664.c"
+
+int cnt;
+
+__attribute__((noinline, noclone)) void
+foo (void)
+{
+  cnt++;
+}
+
+int
+main ()
+{
+  fn1 (65534U, 65535U, 7); if (cnt != 1) __builtin_abort ();
+  fn1 (65534U, 65535U, 0); if (cnt != 1) __builtin_abort ();
+  fn1 (65535U, 65535U, 1); if (cnt != 1) __builtin_abort ();
+  fn1 (0, 65535U, 1); if (cnt != 2) __builtin_abort ();
+  fn1 (-1, 65535U, 1); if (cnt != 2) __builtin_abort ();
+  fn1 (0, 0, 1); if (cnt != 2) __builtin_abort ();
+  fn2 (65534U, 65535U, 7); if (cnt != 3) __builtin_abort ();
+  fn2 (65534U, 65535U, 0); if (cnt != 3) __builtin_abort ();
+  fn2 (65535U, 65535U, 0); if (cnt != 4) __builtin_abort ();
+  fn2 (0, 65535U, 0); if (cnt != 4) __builtin_abort ();
+  fn2 (-1, 65535U, 0); if (cnt != 5) __builtin_abort ();
+  fn2 (0, 0, 0); if (cnt != 6) __builtin_abort ();
+  fn3 (-1, 65534U); if (cnt != 7) __builtin_abort ();
+  fn3 (0, 65534U); if (cnt != 7) __builtin_abort ();
+  fn3 (65534U, 65534U); if (cnt != 7) __builtin_abort ();
+  fn3 (65535U, 65534U); if (cnt != 8) __builtin_abort ();
+  fn3 (0, 0); if (cnt != 8) __builtin_abort ();
+  fn3 (1, 0); if (cnt != 9) __builtin_abort ();
+  fn4 (-1, 65534U); if (cnt != 9) __builtin_abort ();
+  fn4 (0, 65534U); if (cnt != 10) __builtin_abort ();
+  fn4 (65534U, 65534U); if (cnt != 11) __builtin_abort ();
+  fn4 (65535U, 65534U); if (cnt != 11) __builtin_abort ();
+  fn4 (0, 0); if (cnt != 12) __builtin_abort ();
+  fn4 (1, 0); if (cnt != 12) __builtin_abort ();
+  fn5 (-1, 65534U); if (cnt != 13) __builtin_abort ();
+  fn5 (0, 65534U); if (cnt != 13) __builtin_abort ();
+  fn5 (65534U, 65534U); if (cnt != 13) __builtin_abort ();
+  fn5 (65535U, 65534U); if (cnt != 14) __builtin_abort ();
+  fn5 (0, 0); if (cnt != 14) __builtin_abort ();
+  fn5 (1, 0); if (cnt != 15) __builtin_abort ();
+  fn6 (-1, 65534U); if (cnt != 15) __builtin_abort ();
+  fn6 (0, 65534U); if (cnt != 16) __builtin_abort ();
+  fn6 (65534U, 65534U); if (cnt != 17) __builtin_abort ();
+  fn6 (65535U, 65534U); if (cnt != 17) __builtin_abort ();
+  fn6 (0, 0); if (cnt != 18) __builtin_abort ();
+  fn6 (1, 0); if (cnt != 18) __builtin_abort ();
+  return 0;
+}