Patchwork fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)

login
register
mail settings
Submitter Jakub Jelinek
Date Sept. 29, 2011, 9:15 p.m.
Message ID <20110929211552.GW2687@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/116999/
State New
Headers show

Comments

Jakub Jelinek - Sept. 29, 2011, 9:15 p.m.
Hi!

This patch implements a fold_range_test like optimization on GIMPLE, inside
tree-ssa-reassoc and tweaks fold-const.c so that most of the code can be
shared in between the two.
The advantage of the reassoc optimization is that it doesn't attempt to
merge just 2 ranges at a time, instead it sorts the ranges for the same
SSA_NAME and thus can optimize even cases where source code doesn't have
the numbers in a range test in increasing or decreasing order (and also
can optimize things that were in multiple statements in the source).
Additionally, it optimizes cases like
x == 1 || x == 3
into (x & ~2) == 1.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-09-27  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/46309
	* fold-const.c (make_range, merge_ranges): Remove prototypes.
	(range_binop): Likewise, no longer static.
	(make_range_step): New function.
	(make_range): Use it.
	* tree.h (make_range_step, range_binop): New prototypes.
	* Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H).
	* tree-ssa-reassoc.c: Include diagnostic-core.h.
	(struct range_entry): New type.
	(init_range_entry, range_entry_cmp, update_range_test,
	optimize_range_tests): New functions.
	(reassociate_bb): Call optimize_range_tests.

	* gcc.dg/pr46309.c: New test.


	Jakub
Richard Guenther - Sept. 30, 2011, 10:33 a.m.
On Thu, Sep 29, 2011 at 11:15 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> This patch implements a fold_range_test like optimization on GIMPLE, inside
> tree-ssa-reassoc and tweaks fold-const.c so that most of the code can be
> shared in between the two.
> The advantage of the reassoc optimization is that it doesn't attempt to
> merge just 2 ranges at a time, instead it sorts the ranges for the same
> SSA_NAME and thus can optimize even cases where source code doesn't have
> the numbers in a range test in increasing or decreasing order (and also
> can optimize things that were in multiple statements in the source).
> Additionally, it optimizes cases like
> x == 1 || x == 3
> into (x & ~2) == 1.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2011-09-27  Jakub Jelinek  <jakub@redhat.com>
>
>        PR tree-optimization/46309
>        * fold-const.c (make_range, merge_ranges): Remove prototypes.
>        (range_binop): Likewise, no longer static.
>        (make_range_step): New function.
>        (make_range): Use it.
>        * tree.h (make_range_step, range_binop): New prototypes.
>        * Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H).
>        * tree-ssa-reassoc.c: Include diagnostic-core.h.
>        (struct range_entry): New type.
>        (init_range_entry, range_entry_cmp, update_range_test,
>        optimize_range_tests): New functions.
>        (reassociate_bb): Call optimize_range_tests.
>
>        * gcc.dg/pr46309.c: New test.
>
> --- gcc/fold-const.c.jj 2011-09-05 12:28:53.000000000 +0200
> +++ gcc/fold-const.c    2011-09-27 16:39:09.000000000 +0200
> @@ -112,12 +112,8 @@ static tree decode_field_reference (loca
>  static int all_ones_mask_p (const_tree, int);
>  static tree sign_bit_p (tree, const_tree);
>  static int simple_operand_p (const_tree);
> -static tree range_binop (enum tree_code, tree, tree, int, tree, int);
>  static tree range_predecessor (tree);
>  static tree range_successor (tree);
> -extern tree make_range (tree, int *, tree *, tree *, bool *);
> -extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
> -                         tree, tree);
>  static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
>  static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, tree);
>  static tree unextend (tree, int, int, tree);
> @@ -3731,7 +3727,7 @@ simple_operand_p (const_tree exp)
>    must be specified for a comparison.  ARG1 will be converted to ARG0's
>    type if both are specified.  */
>
> -static tree
> +tree
>  range_binop (enum tree_code code, tree type, tree arg0, int upper0_p,
>             tree arg1, int upper1_p)
>  {
> @@ -3790,6 +3786,255 @@ range_binop (enum tree_code code, tree t
>   return constant_boolean_node (result, type);
>  }
>
> +/* Helper routine for make_range.  Perform one step for it, return
> +   new expression if the loop should continue or NULL_TREE if it should
> +   stop.  */
> +
> +tree
> +make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1,
> +                tree exp_type, tree *p_low, tree *p_high, int *p_in_p,
> +                bool *strict_overflow_p)
> +{
> +  tree arg0_type = TREE_TYPE (arg0);
> +  tree n_low, n_high, low = *p_low, high = *p_high;
> +  int in_p = *p_in_p, n_in_p;
> +
> +  switch (code)
> +    {
> +    case TRUTH_NOT_EXPR:
> +      *p_in_p = ! in_p;
> +      return arg0;
> +
> +    case EQ_EXPR: case NE_EXPR:
> +    case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
> +      /* We can only do something if the range is testing for zero
> +        and if the second operand is an integer constant.  Note that
> +        saying something is "in" the range we make is done by
> +        complementing IN_P since it will set in the initial case of
> +        being not equal to zero; "out" is leaving it alone.  */
> +      if (low == NULL_TREE || high == NULL_TREE
> +         || ! integer_zerop (low) || ! integer_zerop (high)
> +         || TREE_CODE (arg1) != INTEGER_CST)
> +       return NULL_TREE;
> +
> +      switch (code)
> +       {
> +       case NE_EXPR:  /* - [c, c]  */
> +         low = high = arg1;
> +         break;
> +       case EQ_EXPR:  /* + [c, c]  */
> +         in_p = ! in_p, low = high = arg1;
> +         break;
> +       case GT_EXPR:  /* - [-, c] */
> +         low = 0, high = arg1;
> +         break;
> +       case GE_EXPR:  /* + [c, -] */
> +         in_p = ! in_p, low = arg1, high = 0;
> +         break;
> +       case LT_EXPR:  /* - [c, -] */
> +         low = arg1, high = 0;
> +         break;
> +       case LE_EXPR:  /* + [-, c] */
> +         in_p = ! in_p, low = 0, high = arg1;
> +         break;
> +       default:
> +         gcc_unreachable ();
> +       }
> +
> +      /* If this is an unsigned comparison, we also know that EXP is
> +        greater than or equal to zero.  We base the range tests we make
> +        on that fact, so we record it here so we can parse existing
> +        range tests.  We test arg0_type since often the return type
> +        of, e.g. EQ_EXPR, is boolean.  */
> +      if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
> +       {
> +         if (! merge_ranges (&n_in_p, &n_low, &n_high,
> +                             in_p, low, high, 1,
> +                             build_int_cst (arg0_type, 0),
> +                             NULL_TREE))
> +           return NULL_TREE;
> +
> +         in_p = n_in_p, low = n_low, high = n_high;
> +
> +         /* If the high bound is missing, but we have a nonzero low
> +            bound, reverse the range so it goes from zero to the low bound
> +            minus 1.  */
> +         if (high == 0 && low && ! integer_zerop (low))
> +           {
> +             in_p = ! in_p;
> +             high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
> +                                 integer_one_node, 0);
> +             low = build_int_cst (arg0_type, 0);
> +           }
> +       }
> +
> +      *p_low = low;
> +      *p_high = high;
> +      *p_in_p = in_p;
> +      return arg0;
> +
> +    case NEGATE_EXPR:
> +      /* (-x) IN [a,b] -> x in [-b, -a]  */
> +      n_low = range_binop (MINUS_EXPR, exp_type,
> +                          build_int_cst (exp_type, 0),
> +                          0, high, 1);
> +      n_high = range_binop (MINUS_EXPR, exp_type,
> +                           build_int_cst (exp_type, 0),
> +                           0, low, 0);
> +      if (n_high != 0 && TREE_OVERFLOW (n_high))
> +       return NULL_TREE;
> +      goto normalize;
> +
> +    case BIT_NOT_EXPR:
> +      /* ~ X -> -X - 1  */
> +      return build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
> +                        build_int_cst (exp_type, 1));
> +
> +    case PLUS_EXPR:
> +    case MINUS_EXPR:
> +      if (TREE_CODE (arg1) != INTEGER_CST)
> +       return NULL_TREE;
> +
> +      /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
> +        move a constant to the other side.  */
> +      if (!TYPE_UNSIGNED (arg0_type)
> +         && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
> +       return NULL_TREE;
> +
> +      /* If EXP is signed, any overflow in the computation is undefined,
> +        so we don't worry about it so long as our computations on
> +        the bounds don't overflow.  For unsigned, overflow is defined
> +        and this is exactly the right thing.  */
> +      n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> +                          arg0_type, low, 0, arg1, 0);
> +      n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> +                           arg0_type, high, 1, arg1, 0);
> +      if ((n_low != 0 && TREE_OVERFLOW (n_low))
> +         || (n_high != 0 && TREE_OVERFLOW (n_high)))
> +       return NULL_TREE;
> +
> +      if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
> +       *strict_overflow_p = true;
> +
> +      normalize:
> +       /* Check for an unsigned range which has wrapped around the maximum
> +          value thus making n_high < n_low, and normalize it.  */
> +       if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
> +         {
> +           low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
> +                              integer_one_node, 0);
> +           high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
> +                               integer_one_node, 0);
> +
> +           /* If the range is of the form +/- [ x+1, x ], we won't
> +              be able to normalize it.  But then, it represents the
> +              whole range or the empty set, so make it
> +              +/- [ -, - ].  */
> +           if (tree_int_cst_equal (n_low, low)
> +               && tree_int_cst_equal (n_high, high))
> +             low = high = 0;
> +           else
> +             in_p = ! in_p;
> +         }
> +       else
> +         low = n_low, high = n_high;
> +
> +       *p_low = low;
> +       *p_high = high;
> +       *p_in_p = in_p;
> +       return arg0;
> +
> +    CASE_CONVERT:
> +    case NON_LVALUE_EXPR:
> +      if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
> +       return NULL_TREE;
> +
> +      if (! INTEGRAL_TYPE_P (arg0_type)
> +         || (low != 0 && ! int_fits_type_p (low, arg0_type))
> +         || (high != 0 && ! int_fits_type_p (high, arg0_type)))
> +       return NULL_TREE;
> +
> +      n_low = low, n_high = high;
> +
> +      if (n_low != 0)
> +       n_low = fold_convert_loc (loc, arg0_type, n_low);
> +
> +      if (n_high != 0)
> +       n_high = fold_convert_loc (loc, arg0_type, n_high);
> +
> +      /* If we're converting arg0 from an unsigned type, to exp,
> +        a signed type,  we will be doing the comparison as unsigned.
> +        The tests above have already verified that LOW and HIGH
> +        are both positive.
> +
> +        So we have to ensure that we will handle large unsigned
> +        values the same way that the current signed bounds treat
> +        negative values.  */
> +
> +      if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
> +       {
> +         tree high_positive;
> +         tree equiv_type;
> +         /* For fixed-point modes, we need to pass the saturating flag
> +            as the 2nd parameter.  */
> +         if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
> +           equiv_type
> +             = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type),
> +                                               TYPE_SATURATING (arg0_type));
> +         else
> +           equiv_type
> +             = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), 1);
> +
> +         /* A range without an upper bound is, naturally, unbounded.
> +            Since convert would have cropped a very large value, use
> +            the max value for the destination type.  */
> +         high_positive
> +           = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
> +             : TYPE_MAX_VALUE (arg0_type);
> +
> +         if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
> +           high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
> +                                            fold_convert_loc (loc, arg0_type,
> +                                                              high_positive),
> +                                            build_int_cst (arg0_type, 1));
> +
> +         /* If the low bound is specified, "and" the range with the
> +            range for which the original unsigned value will be
> +            positive.  */
> +         if (low != 0)
> +           {
> +             if (! merge_ranges (&n_in_p, &n_low, &n_high, 1, n_low, n_high,
> +                                 1, fold_convert_loc (loc, arg0_type,
> +                                                      integer_zero_node),
> +                                 high_positive))
> +               return NULL_TREE;
> +
> +             in_p = (n_in_p == in_p);
> +           }
> +         else
> +           {
> +             /* Otherwise, "or" the range with the range of the input
> +                that will be interpreted as negative.  */
> +             if (! merge_ranges (&n_in_p, &n_low, &n_high, 0, n_low, n_high,
> +                                 1, fold_convert_loc (loc, arg0_type,
> +                                                      integer_zero_node),
> +                                 high_positive))
> +               return NULL_TREE;
> +
> +             in_p = (in_p != n_in_p);
> +           }
> +       }
> +
> +      *p_low = n_low;
> +      *p_high = n_high;
> +      *p_in_p = in_p;
> +      return arg0;
> +
> +    default:
> +      return NULL_TREE;
> +    }
> +}
> +
>  /* Given EXP, a logical expression, set the range it is testing into
>    variables denoted by PIN_P, PLOW, and PHIGH.  Return the expression
>    actually being tested.  *PLOW and *PHIGH will be made of the same
> @@ -3804,10 +4049,10 @@ make_range (tree exp, int *pin_p, tree *
>            bool *strict_overflow_p)
>  {
>   enum tree_code code;
> -  tree arg0 = NULL_TREE, arg1 = NULL_TREE;
> -  tree exp_type = NULL_TREE, arg0_type = NULL_TREE;
> -  int in_p, n_in_p;
> -  tree low, high, n_low, n_high;
> +  tree arg0, arg1 = NULL_TREE;
> +  tree exp_type, nexp;
> +  int in_p;
> +  tree low, high;
>   location_t loc = EXPR_LOCATION (exp);
>
>   /* Start with simply saying "EXP != 0" and then look at the code of EXP
> @@ -3823,255 +4068,26 @@ make_range (tree exp, int *pin_p, tree *
>     {
>       code = TREE_CODE (exp);
>       exp_type = TREE_TYPE (exp);
> +      arg0 = NULL_TREE;
>
>       if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
>        {
>          if (TREE_OPERAND_LENGTH (exp) > 0)
>            arg0 = TREE_OPERAND (exp, 0);
> -         if (TREE_CODE_CLASS (code) == tcc_comparison
> -             || TREE_CODE_CLASS (code) == tcc_unary
> -             || TREE_CODE_CLASS (code) == tcc_binary)
> -           arg0_type = TREE_TYPE (arg0);
>          if (TREE_CODE_CLASS (code) == tcc_binary
>              || TREE_CODE_CLASS (code) == tcc_comparison
>              || (TREE_CODE_CLASS (code) == tcc_expression
>                  && TREE_OPERAND_LENGTH (exp) > 1))
>            arg1 = TREE_OPERAND (exp, 1);
>        }
> +      if (arg0 == NULL_TREE)
> +       break;
>
> -      switch (code)
> -       {
> -       case TRUTH_NOT_EXPR:
> -         in_p = ! in_p, exp = arg0;
> -         continue;
> -
> -       case EQ_EXPR: case NE_EXPR:
> -       case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
> -         /* We can only do something if the range is testing for zero
> -            and if the second operand is an integer constant.  Note that
> -            saying something is "in" the range we make is done by
> -            complementing IN_P since it will set in the initial case of
> -            being not equal to zero; "out" is leaving it alone.  */
> -         if (low == 0 || high == 0
> -             || ! integer_zerop (low) || ! integer_zerop (high)
> -             || TREE_CODE (arg1) != INTEGER_CST)
> -           break;
> -
> -         switch (code)
> -           {
> -           case NE_EXPR:  /* - [c, c]  */
> -             low = high = arg1;
> -             break;
> -           case EQ_EXPR:  /* + [c, c]  */
> -             in_p = ! in_p, low = high = arg1;
> -             break;
> -           case GT_EXPR:  /* - [-, c] */
> -             low = 0, high = arg1;
> -             break;
> -           case GE_EXPR:  /* + [c, -] */
> -             in_p = ! in_p, low = arg1, high = 0;
> -             break;
> -           case LT_EXPR:  /* - [c, -] */
> -             low = arg1, high = 0;
> -             break;
> -           case LE_EXPR:  /* + [-, c] */
> -             in_p = ! in_p, low = 0, high = arg1;
> -             break;
> -           default:
> -             gcc_unreachable ();
> -           }
> -
> -         /* If this is an unsigned comparison, we also know that EXP is
> -            greater than or equal to zero.  We base the range tests we make
> -            on that fact, so we record it here so we can parse existing
> -            range tests.  We test arg0_type since often the return type
> -            of, e.g. EQ_EXPR, is boolean.  */
> -         if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
> -           {
> -             if (! merge_ranges (&n_in_p, &n_low, &n_high,
> -                                 in_p, low, high, 1,
> -                                 build_int_cst (arg0_type, 0),
> -                                 NULL_TREE))
> -               break;
> -
> -             in_p = n_in_p, low = n_low, high = n_high;
> -
> -             /* If the high bound is missing, but we have a nonzero low
> -                bound, reverse the range so it goes from zero to the low bound
> -                minus 1.  */
> -             if (high == 0 && low && ! integer_zerop (low))
> -               {
> -                 in_p = ! in_p;
> -                 high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
> -                                     integer_one_node, 0);
> -                 low = build_int_cst (arg0_type, 0);
> -               }
> -           }
> -
> -         exp = arg0;
> -         continue;
> -
> -       case NEGATE_EXPR:
> -         /* (-x) IN [a,b] -> x in [-b, -a]  */
> -         n_low = range_binop (MINUS_EXPR, exp_type,
> -                              build_int_cst (exp_type, 0),
> -                              0, high, 1);
> -         n_high = range_binop (MINUS_EXPR, exp_type,
> -                               build_int_cst (exp_type, 0),
> -                               0, low, 0);
> -         if (n_high != 0 && TREE_OVERFLOW (n_high))
> -           break;
> -         goto normalize;
> -
> -       case BIT_NOT_EXPR:
> -         /* ~ X -> -X - 1  */
> -         exp = build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
> -                           build_int_cst (exp_type, 1));
> -         continue;
> -
> -       case PLUS_EXPR:  case MINUS_EXPR:
> -         if (TREE_CODE (arg1) != INTEGER_CST)
> -           break;
> -
> -         /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
> -            move a constant to the other side.  */
> -         if (!TYPE_UNSIGNED (arg0_type)
> -             && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
> -           break;
> -
> -         /* If EXP is signed, any overflow in the computation is undefined,
> -            so we don't worry about it so long as our computations on
> -            the bounds don't overflow.  For unsigned, overflow is defined
> -            and this is exactly the right thing.  */
> -         n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> -                              arg0_type, low, 0, arg1, 0);
> -         n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> -                               arg0_type, high, 1, arg1, 0);
> -         if ((n_low != 0 && TREE_OVERFLOW (n_low))
> -             || (n_high != 0 && TREE_OVERFLOW (n_high)))
> -           break;
> -
> -         if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
> -           *strict_overflow_p = true;
> -
> -       normalize:
> -         /* Check for an unsigned range which has wrapped around the maximum
> -            value thus making n_high < n_low, and normalize it.  */
> -         if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
> -           {
> -             low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
> -                                integer_one_node, 0);
> -             high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
> -                                 integer_one_node, 0);
> -
> -             /* If the range is of the form +/- [ x+1, x ], we won't
> -                be able to normalize it.  But then, it represents the
> -                whole range or the empty set, so make it
> -                +/- [ -, - ].  */
> -             if (tree_int_cst_equal (n_low, low)
> -                 && tree_int_cst_equal (n_high, high))
> -               low = high = 0;
> -             else
> -               in_p = ! in_p;
> -           }
> -         else
> -           low = n_low, high = n_high;
> -
> -         exp = arg0;
> -         continue;
> -
> -       CASE_CONVERT: case NON_LVALUE_EXPR:
> -         if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
> -           break;
> -
> -         if (! INTEGRAL_TYPE_P (arg0_type)
> -             || (low != 0 && ! int_fits_type_p (low, arg0_type))
> -             || (high != 0 && ! int_fits_type_p (high, arg0_type)))
> -           break;
> -
> -         n_low = low, n_high = high;
> -
> -         if (n_low != 0)
> -           n_low = fold_convert_loc (loc, arg0_type, n_low);
> -
> -         if (n_high != 0)
> -           n_high = fold_convert_loc (loc, arg0_type, n_high);
> -
> -
> -         /* If we're converting arg0 from an unsigned type, to exp,
> -            a signed type,  we will be doing the comparison as unsigned.
> -            The tests above have already verified that LOW and HIGH
> -            are both positive.
> -
> -            So we have to ensure that we will handle large unsigned
> -            values the same way that the current signed bounds treat
> -            negative values.  */
> -
> -         if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
> -           {
> -             tree high_positive;
> -             tree equiv_type;
> -             /* For fixed-point modes, we need to pass the saturating flag
> -                as the 2nd parameter.  */
> -             if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
> -               equiv_type = lang_hooks.types.type_for_mode
> -                            (TYPE_MODE (arg0_type),
> -                             TYPE_SATURATING (arg0_type));
> -             else
> -               equiv_type = lang_hooks.types.type_for_mode
> -                            (TYPE_MODE (arg0_type), 1);
> -
> -             /* A range without an upper bound is, naturally, unbounded.
> -                Since convert would have cropped a very large value, use
> -                the max value for the destination type.  */
> -             high_positive
> -               = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
> -               : TYPE_MAX_VALUE (arg0_type);
> -
> -             if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
> -               high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
> -                                            fold_convert_loc (loc, arg0_type,
> -                                                              high_positive),
> -                                            build_int_cst (arg0_type, 1));
> -
> -             /* If the low bound is specified, "and" the range with the
> -                range for which the original unsigned value will be
> -                positive.  */
> -             if (low != 0)
> -               {
> -                 if (! merge_ranges (&n_in_p, &n_low, &n_high,
> -                                     1, n_low, n_high, 1,
> -                                     fold_convert_loc (loc, arg0_type,
> -                                                       integer_zero_node),
> -                                     high_positive))
> -                   break;
> -
> -                 in_p = (n_in_p == in_p);
> -               }
> -             else
> -               {
> -                 /* Otherwise, "or" the range with the range of the input
> -                    that will be interpreted as negative.  */
> -                 if (! merge_ranges (&n_in_p, &n_low, &n_high,
> -                                     0, n_low, n_high, 1,
> -                                     fold_convert_loc (loc, arg0_type,
> -                                                       integer_zero_node),
> -                                     high_positive))
> -                   break;
> -
> -                 in_p = (in_p != n_in_p);
> -               }
> -           }
> -
> -         exp = arg0;
> -         low = n_low, high = n_high;
> -         continue;
> -
> -       default:
> -         break;
> -       }
> -
> -      break;
> +      nexp = make_range_step (loc, code, arg0, arg1, exp_type, &low,
> +                             &high, &in_p, strict_overflow_p);
> +      if (nexp == NULL_TREE)
> +       break;
> +      exp = nexp;
>     }
>
>   /* If EXP is a constant, we can evaluate whether this is true or false.  */
> --- gcc/tree.h.jj       2011-09-20 21:43:07.000000000 +0200
> +++ gcc/tree.h  2011-09-27 16:38:58.000000000 +0200
> @@ -5384,7 +5384,10 @@ extern unsigned int get_pointer_alignmen
>  extern tree fold_call_stmt (gimple, bool);
>  extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum built_in_function);
>  extern tree make_range (tree, int *, tree *, tree *, bool *);
> +extern tree make_range_step (location_t, enum tree_code, tree, tree, tree,
> +                            tree *, tree *, int *, bool *);
>  extern tree build_range_check (location_t, tree, tree, int, tree, tree);
> +extern tree range_binop (enum tree_code, tree, tree, int, tree, int);
>  extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
>                          tree, tree);
>  extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
> --- gcc/Makefile.in.jj  2011-09-18 21:20:04.000000000 +0200
> +++ gcc/Makefile.in     2011-09-29 14:09:42.000000000 +0200
> @@ -2641,7 +2641,7 @@ tree-ssa-reassoc.o : tree-ssa-reassoc.c
>    $(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(FLAGS_H) \
>    tree-iterator.h $(BASIC_BLOCK_H) $(GIMPLE_H) $(TREE_INLINE_H) \
>    $(VEC_H) langhooks.h alloc-pool.h pointer-set.h $(CFGLOOP_H) \
> -   tree-pretty-print.h gimple-pretty-print.h
> +   tree-pretty-print.h gimple-pretty-print.h $(DIAGNOSTIC_CORE_H)
>  tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
>    $(TREE_H) $(TM_P_H) $(GGC_H) output.h \
>    $(DIAGNOSTIC_H) $(BASIC_BLOCK_H) $(FLAGS_H) $(TIMEVAR_H) $(TM_H) \
> --- gcc/tree-ssa-reassoc.c.jj   2011-09-08 11:21:10.000000000 +0200
> +++ gcc/tree-ssa-reassoc.c      2011-09-29 13:33:00.000000000 +0200
> @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.
>  #include "flags.h"
>  #include "target.h"
>  #include "params.h"
> +#include "diagnostic-core.h"
>
>  /*  This is a simple global reassociation pass.  It is, in part, based
>     on the LLVM pass of the same name (They do some things more/less
> @@ -1568,6 +1569,422 @@ optimize_ops_list (enum tree_code opcode
>     optimize_ops_list (opcode, ops);
>  }
>
> +/* The following functions are subroutines to optimize_range_tests and allow
> +   it to try to change a logical combination of comparisons into a range
> +   test.
> +
> +   For example, both
> +       X == 2 || X == 5 || X == 3 || X == 4
> +   and
> +       X >= 2 && X <= 5
> +   are converted to
> +       (unsigned) (X - 2) <= 3
> +
> +   For more information see comments above fold_test_range in fold-const.c,
> +   this implementation is for GIMPLE.  */
> +
> +struct range_entry
> +{
> +  tree exp;
> +  tree low;
> +  tree high;
> +  bool in_p;
> +  bool strict_overflow_p;
> +  unsigned int idx, next;
> +};
> +
> +/* This is similar to make_range in fold-const.c, but on top of
> +   GIMPLE instead of trees.  */
> +
> +static void
> +init_range_entry (struct range_entry *r, tree exp)
> +{
> +  int in_p;
> +  tree low, high;
> +  bool is_bool, strict_overflow_p;
> +
> +  r->exp = NULL_TREE;
> +  r->in_p = false;
> +  r->strict_overflow_p = false;
> +  r->low = NULL_TREE;
> +  r->high = NULL_TREE;
> +  if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
> +    return;
> +
> +  /* Start with simply saying "EXP != 0" and then look at the code of EXP
> +     and see if we can refine the range.  Some of the cases below may not
> +     happen, but it doesn't seem worth worrying about this.  We "continue"
> +     the outer loop when we've changed something; otherwise we "break"
> +     the switch, which will "break" the while.  */
> +  low = build_int_cst (TREE_TYPE (exp), 0);
> +  high = low;
> +  in_p = 0;
> +  strict_overflow_p = false;
> +  is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;

Effective boolean are also TYPE_PRECISION () == 1 types.  Remember
we don't preserve conversions from BOOLEAN_TYPE to such types.

> +
> +  while (1)
> +    {
> +      gimple stmt;
> +      enum tree_code code;
> +      tree arg0, arg1, exp_type;
> +      tree nexp;
> +      location_t loc;
> +
> +      if (TREE_CODE (exp) != SSA_NAME)
> +       break;
> +
> +      stmt = SSA_NAME_DEF_STMT (exp);
> +      if (!is_gimple_assign (stmt))
> +       break;
> +
> +      code = gimple_assign_rhs_code (stmt);
> +      arg0 = gimple_assign_rhs1 (stmt);
> +      arg1 = gimple_assign_rhs2 (stmt);
> +      exp_type = TREE_TYPE (exp);
> +      loc = gimple_location (stmt);
> +      switch (code)
> +       {
> +       case BIT_NOT_EXPR:
> +         if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)

See above.

> +           {
> +             in_p = !in_p;
> +             exp = arg0;
> +             continue;
> +           }
> +         break;
> +       case SSA_NAME:
> +         exp = arg0;
> +         continue;
> +       CASE_CONVERT:
> +         is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE;

Likewise.  Though I wonder why !=?  Does it matter whether the extension
sign- or zero-extends?

> +         goto do_default;
> +       case EQ_EXPR:
> +       case NE_EXPR:
> +       case LT_EXPR:
> +       case LE_EXPR:
> +       case GE_EXPR:
> +       case GT_EXPR:
> +         is_bool = true;
> +         /* FALLTHRU */
> +       do_default:
> +       default:
> +         if (!is_bool)
> +           return;
> +         nexp = make_range_step (loc, code, arg0, arg1, exp_type,
> +                                 &low, &high, &in_p,
> +                                 &strict_overflow_p);
> +         if (nexp != NULL_TREE)
> +           {
> +             exp = nexp;
> +             gcc_assert (TREE_CODE (exp) == SSA_NAME);
> +             continue;
> +           }
> +         break;
> +       }
> +      break;
> +    }
> +  if (is_bool)
> +    {
> +      r->exp = exp;
> +      r->in_p = in_p;
> +      r->low = low;
> +      r->high = high;
> +      r->strict_overflow_p = strict_overflow_p;
> +    }
> +}
> +
> +/* Comparison function for qsort.  Sort entries
> +   without SSA_NAME exp first, then with SSA_NAMEs sorted
> +   by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
> +   by increasing ->low and if ->low is the same, by increasing
> +   ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
> +   maximum.  */
> +
> +static int
> +range_entry_cmp (const void *a, const void *b)
> +{
> +  const struct range_entry *p = (const struct range_entry *) a;
> +  const struct range_entry *q = (const struct range_entry *) b;
> +
> +  if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
> +    {
> +      if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
> +       {
> +         /* Group range_entries for the same SSA_NAME together.  */
> +         if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
> +           return -1;
> +         else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
> +           return 1;
> +         /* If ->low is different, NULL low goes first, then by
> +            ascending low.  */
> +         if (p->low != NULL_TREE)
> +           {
> +             if (q->low != NULL_TREE)
> +               {
> +                 if (integer_onep (range_binop (LT_EXPR, integer_type_node,
> +                                                p->low, 0, q->low, 0)))

that's just

tem = fold_binary (LT_EXPR, boolean_type_node, p->low, q->low);
if (tem && integer_onep (tem))
  return -1;

which avoids building the LT_EXPR tree if it doesn't fold.  Similar below.
(ISTR some integer_onep () variant that handles a NULL argument ...)

> +                   return -1;
> +                 if (integer_onep (range_binop (GT_EXPR, integer_type_node,
> +                                                p->low, 0, q->low, 0)))
> +                   return 1;
> +               }
> +             else
> +               return 1;
> +           }
> +         else if (q->low != NULL_TREE)
> +           return -1;
> +         /* If ->high is different, NULL high goes last, before that by
> +            ascending high.  */
> +         if (p->high != NULL_TREE)
> +           {
> +             if (q->high != NULL_TREE)
> +               {
> +                 if (integer_onep (range_binop (LT_EXPR, integer_type_node,
> +                                                p->high, 0, q->high, 0)))
> +                   return -1;
> +                 if (integer_onep (range_binop (GT_EXPR, integer_type_node,
> +                                                p->high, 0, q->high, 0)))
> +                   return 1;
> +               }
> +             else
> +               return -1;
> +           }
> +         else if (p->high != NULL_TREE)
> +           return 1;
> +         /* If both ranges are the same, sort below by ascending idx.  */
> +       }
> +      else
> +       return 1;
> +    }
> +  else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
> +    return -1;
> +
> +  if (p->idx < q->idx)
> +    return -1;
> +  else
> +    {
> +      gcc_checking_assert (p->idx > q->idx);
> +      return 1;
> +    }
> +}
> +
> +/* Helper routine of optimize_range_test.
> +   [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
> +   RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
> +   OPCODE and OPS are arguments of optimize_range_tests.  Return
> +   true if the range merge has been successful.  */
> +
> +static bool
> +update_range_test (struct range_entry *range, struct range_entry *otherrange,
> +                  unsigned int count, enum tree_code opcode,
> +                  VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
> +                  tree low, tree high, bool strict_overflow_p)
> +{
> +  tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
> +  location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
> +  tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
> +  enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
> +  gimple_stmt_iterator gsi;
> +
> +  if (tem == NULL_TREE)
> +    return false;
> +
> +  if (strict_overflow_p && issue_strict_overflow_warning (wc))
> +    warning_at (loc, OPT_Wstrict_overflow,
> +               "assuming signed overflow does not occur "
> +               "when simplifying range test");
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      struct range_entry *r;
> +      fprintf (dump_file, "Optimizing range tests ");
> +      print_generic_expr (dump_file, range->exp, 0);
> +      fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
> +      print_generic_expr (dump_file, range->low, 0);
> +      fprintf (dump_file, ", ");
> +      print_generic_expr (dump_file, range->high, 0);
> +      fprintf (dump_file, "]");
> +      for (r = otherrange; r < otherrange + count; r++)
> +       {
> +         fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
> +         print_generic_expr (dump_file, r->low, 0);
> +         fprintf (dump_file, ", ");
> +         print_generic_expr (dump_file, r->high, 0);
> +         fprintf (dump_file, "]");
> +       }
> +      fprintf (dump_file, "\n into ");
> +      print_generic_expr (dump_file, tem, 0);
> +      fprintf (dump_file, "\n");
> +    }
> +
> +  if (opcode == BIT_IOR_EXPR)
> +    tem = invert_truthvalue_loc (loc, tem);
> +
> +  tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
> +  gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
> +  tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
> +                                 GSI_SAME_STMT);
> +
> +  VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
> +  range->exp = exp;
> +  range->low = low;
> +  range->high = high;
> +  range->in_p = in_p;
> +  range->strict_overflow_p = false;
> +
> +  for (range = otherrange; range < otherrange + count; range++)
> +    {
> +      VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
> +      range->exp = NULL_TREE;
> +    }
> +  return true;
> +}
> +
> +/* Optimize range tests, similarly how fold_range_test optimizes
> +   it on trees.  The tree code for the binary
> +   operation between all the operands is OPCODE.  */
> +
> +static void
> +optimize_range_tests (enum tree_code opcode,
> +                     VEC (operand_entry_t, heap) **ops)
> +{
> +  unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
> +  operand_entry_t oe;
> +  struct range_entry *ranges;
> +  bool any_changes = false;
> +
> +  if (length == 1)
> +    return;
> +
> +  ranges = XNEWVEC (struct range_entry, length);
> +  for (i = 0; i < length; i++)
> +    {
> +      ranges[i].idx = i;
> +      init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
> +      /* For | invert it now, we will invert it again before emitting
> +        the optimized expression.  */
> +      if (opcode == BIT_IOR_EXPR)
> +       ranges[i].in_p = !ranges[i].in_p;
> +    }
> +
> +  qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
> +  for (i = 0; i < length; i++)
> +    if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
> +      break;
> +
> +  /* Try to merge ranges.  */
> +  for (first = i; i < length; i++)
> +    {
> +      tree low = ranges[i].low;
> +      tree high = ranges[i].high;
> +      int in_p = ranges[i].in_p;
> +      bool strict_overflow_p = ranges[i].strict_overflow_p;
> +
> +      for (j = i + 1; j < length; j++)
> +       {

That looks quadratic - do we want to limit this with a --param, simply
partitioning the array into quadratic chunks?

> +         if (ranges[i].exp != ranges[j].exp)
> +           break;

Or isn't it too bad because of this check?

> +         if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
> +                            ranges[j].in_p, ranges[j].low, ranges[j].high))
> +           break;

And this early out?  I suppose some comment on why together with
the sorting this is of limited complexity would help.

> +         strict_overflow_p |= ranges[j].strict_overflow_p;
> +       }
> +
> +      if (j > i + 1
> +         && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
> +                               ops, ranges[i].exp, in_p, low, high,
> +                               strict_overflow_p))
> +       {
> +         i = j - 1;
> +         any_changes = true;
> +       }
> +    }
> +
> +  /* Optimize X == CST1 || X == CST2
> +     if popcount (CST1 ^ CST2) == 1 into
> +     (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
> +     Similarly for ranges.  E.g.
> +     X != 2 && X != 3 && X != 10 && X != 11
> +     will be transformed by the above loop into
> +     (X - 2U) <= 1U && (X - 10U) <= 1U
> +     and this loop can transform that into
> +     ((X & ~8) - 2U) <= 1U.  */
> +  for (i = first; i < length; i++)
> +    {
> +      tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
> +
> +      if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
> +       continue;
> +      type = TREE_TYPE (ranges[i].exp);
> +      if (!INTEGRAL_TYPE_P (type))
> +       continue;
> +      lowi = ranges[i].low;
> +      if (lowi == NULL_TREE)
> +       lowi = TYPE_MIN_VALUE (type);
> +      highi = ranges[i].high;
> +      if (highi == NULL_TREE)
> +       continue;
> +      for (j = i + 1; j < length && j < i + 64; j++)
> +       {
> +         if (ranges[j].exp == NULL_TREE)
> +           continue;
> +         if (ranges[i].exp != ranges[j].exp)
> +           break;
> +         if (ranges[j].in_p)
> +           continue;
> +         lowj = ranges[j].low;
> +         if (lowj == NULL_TREE)
> +           continue;
> +         highj = ranges[j].high;
> +         if (highj == NULL_TREE)
> +           highj = TYPE_MAX_VALUE (type);
> +         if (!integer_onep (range_binop (GT_EXPR, integer_type_node,
> +                                         lowj, 0, highi, 0)))
> +           continue;

See above.

> +         lowxor = range_binop (BIT_XOR_EXPR, NULL_TREE, lowi, 0, lowj, 0);
> +         if (lowxor == NULL_TREE)
> +           continue;

Likewise.

> +         gcc_checking_assert (!integer_zerop (lowxor));
> +         tem = range_binop (MINUS_EXPR, NULL_TREE, lowxor, 0,
> +                            build_int_cst (type, 1), 0);
> +         tem = range_binop (BIT_AND_EXPR, NULL_TREE, lowxor, 0, tem, 0);
> +         if (!integer_zerop (tem))
> +           continue;
> +         highxor = range_binop (BIT_XOR_EXPR, NULL_TREE, highi, 0, highj, 0);
> +         if (!tree_int_cst_equal (lowxor, highxor))
> +           continue;
> +         tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
> +         exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
> +         lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
> +         highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
> +         if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
> +                                ranges[i].in_p, lowj, highj,
> +                                ranges[i].strict_overflow_p
> +                                || ranges[j].strict_overflow_p))
> +           {
> +             any_changes = true;
> +             break;
> +           }
> +       }
> +    }
> +
> +  if (any_changes)
> +    {
> +      j = 0;
> +      FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
> +       {
> +         if (oe->op == error_mark_node)
> +           continue;
> +         else if (i != j)
> +           VEC_replace (operand_entry_t, *ops, j, oe);
> +         j++;
> +       }
> +      VEC_truncate (operand_entry_t, *ops, j);
> +    }
> +
> +  XDELETEVEC (ranges);
> +}
> +
>  /* Return true if OPERAND is defined by a PHI node which uses the LHS
>    of STMT in it's operands.  This is also known as a "destructive
>    update" operation.  */
> @@ -2447,6 +2864,9 @@ reassociate_bb (basic_block bb)
>                  optimize_ops_list (rhs_code, &ops);
>                }
>
> +             if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)

I think we want to check that the type is boolean here, so we don't setup
the machinery needlessly?

Otherwise it looks like a nice improvement.

Thanks,
Richard.

> +               optimize_range_tests (rhs_code, &ops);
> +
>              if (VEC_length (operand_entry_t, ops) == 1)
>                {
>                  if (dump_file && (dump_flags & TDF_DETAILS))
> --- gcc/testsuite/gcc.dg/pr46309.c.jj   2011-09-29 14:03:28.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr46309.c      2011-09-29 14:08:32.000000000 +0200
> @@ -0,0 +1,66 @@
> +/* PR tree-optimization/46309 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-reassoc-details" } */
> +
> +int
> +f1 (int a)
> +{
> +  int v1 = (a == 3);
> +  int v2 = (a == 1);
> +  int v3 = (a == 4);
> +  int v4 = (a == 2);
> +  return v1 || v2 || v3 || v4;
> +}
> +
> +int
> +f2 (int a)
> +{
> +  int v1 = (a == 1);
> +  int v2 = (a == 2);
> +  int v3 = (a == 3);
> +  int v4 = (a == 4);
> +  return v1 || v2 || v3 || v4;
> +}
> +
> +int
> +f3 (int a)
> +{
> +  int v1 = (a == 3);
> +  int v2 = (a == 1);
> +  return v1 || v2;
> +}
> +
> +int
> +f4 (int a)
> +{
> +  int v1 = (a == 1);
> +  int v2 = (a == 2);
> +  return v1 || v2;
> +}
> +
> +int
> +f5 (unsigned int a)
> +{
> +  int v1 = (a <= 31);
> +  int v2 = (a >= 64 && a <= 95);
> +  return v1 || v2;
> +}
> +
> +int
> +f6 (unsigned int a)
> +{
> +  int v1 = (a <= 31);
> +  int v2 = (a >= 64 && a <= 95);
> +  int v3 = (a >= 128 && a <= 159);
> +  int v4 = (a >= 192 && a <= 223);
> +  return v1 || v2 || v3 || v4;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.2, 2. and -.3, 3. and -.4, 4.\[\n\r\]* into" 2 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.3, 3.\[\n\r\]* into" 1 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.2, 2.\[\n\r\]* into" 1 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.0, 31. and -.64, 95.\[\n\r\]* into" 2 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.128, 159. and -.192, 223.\[\n\r\]* into" 1 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests D.\[0-9\]*_\[0-9\]* -.0, 31. and -.128, 159.\[\n\r\]* into" 1 "reassoc2" } } */
> +/* { dg-final { cleanup-tree-dump "reassoc1" } } */
> +/* { dg-final { cleanup-tree-dump "reassoc2" } } */
>
>        Jakub
>
Jakub Jelinek - Sept. 30, 2011, 11:11 a.m.
On Fri, Sep 30, 2011 at 12:33:07PM +0200, Richard Guenther wrote:
> > +  low = build_int_cst (TREE_TYPE (exp), 0);
> > +  high = low;
> > +  in_p = 0;
> > +  strict_overflow_p = false;
> > +  is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;
> 
> Effective boolean are also TYPE_PRECISION () == 1 types.  Remember
> we don't preserve conversions from BOOLEAN_TYPE to such types.

I can replace these with TYPE_PRECISION (TREE_TYPE (exp)) == 1;
checks if you prefer, though maybe it would need to also do
&& TYPE_UNSIGNED (TREE_TYPE (exp)), at least if different operands of
the | have different inner 1-bit signedness we could not merge them.

> > +       CASE_CONVERT:
> > +         is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE;
> 
> Likewise.  Though I wonder why !=?  Does it matter whether the extension
> sign- or zero-extends?

I think the |= is needed, this loop follows through not just casts, but
then also through the comparisons and again through casts.
It doesn't matter if the types or casts inside of the comparison argument
are bool or not (and likely they will not be), yet we don't want to stop
iterating because of that.
It wants to reconstruct ranges from what e.g. fold in fold_range_test
already created before, so stuff like
(int) (((unsigned) x - 64U) <= 31U)
for + [64, 95] range.

> > +                 if (integer_onep (range_binop (LT_EXPR, integer_type_node,
> > +                                                p->low, 0, q->low, 0)))
> 
> that's just
> 
> tem = fold_binary (LT_EXPR, boolean_type_node, p->low, q->low);
> if (tem && integer_onep (tem))
>   return -1;

Ok, can change that.

> which avoids building the LT_EXPR tree if it doesn't fold.  Similar below.
> (ISTR some integer_onep () variant that handles a NULL argument ...)

Couldn't find any.  integer_onep needs non-NULL, compare_tree_int too.

> > +  /* Try to merge ranges.  */
> > +  for (first = i; i < length; i++)
> > +    {
> > +      tree low = ranges[i].low;
> > +      tree high = ranges[i].high;
> > +      int in_p = ranges[i].in_p;
> > +      bool strict_overflow_p = ranges[i].strict_overflow_p;
> > +
> > +      for (j = i + 1; j < length; j++)
> > +       {
> 
> That looks quadratic - do we want to limit this with a --param, simply
> partitioning the array into quadratic chunks?

This isn't quadratic (except in the hypothetical case
where all merge_ranges calls would succeed, but then
build_range_test would fail).  In the likely case where if range merges
succeed then update_range_test succeeds too this is just linear (plus the
qsort before that which isn't linear though).
So perhaps:
+      if (j > i + 1
+         && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
+                               ops, ranges[i].exp, in_p, low, high,
+                               strict_overflow_p))
+       {
+         i = j - 1;
+         any_changes = true;
+       }
could be
+      if (j > i + 1)
+	{
+         if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
+                               ops, ranges[i].exp, in_p, low, high,
+                               strict_overflow_p))
+	    any_changes = true;
+	  i = j - 1;
+	}
(then it isn't quadratic), or could try a few times before giving up:
+      if (j > i + 1)
+	{
+         if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
+                               ops, ranges[i].exp, in_p, low, high,
+                               strict_overflow_p))
+	    {
+	      any_changes = true;
+	      i = j - 1;
+	    }
+	  else if (update_fail_count == 64)
+	    i = j - 1;
+	  else
+	    update_fail_count = 0;
+	}
where int update_fail_count = 0; would be after the
      bool strict_overflow_p = ranges[i].strict_overflow_p;
line in the outer loop.

> > +         if (ranges[i].exp != ranges[j].exp)
> > +           break;
> 
> Or isn't it too bad because of this check?

The above limits the chunks to a particular SSA_NAME.  Within each
chunk for the same SSA_NAME, the ranges are sorted in a way that
merge_ranges will likely succeed, so it isn't quadratic unless
update_range_test fails (see above).

BTW, the second loop (the one that attempts to optimize
x == 1 || x == 3 into (x & ~2) == 1 etc. is quadratic, which is why
there is
  for (j = i + 1; j < length && j < i + 64; j++)
don't think it is a limit people will often run into and thus I don't
think it is worth adding a --param= for that.

> > +         if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
> > +                            ranges[j].in_p, ranges[j].low, ranges[j].high))
> > +           break;
> 
> And this early out?  I suppose some comment on why together with
> the sorting this is of limited complexity would help.
> 
> > @@ -2447,6 +2864,9 @@ reassociate_bb (basic_block bb)
> >                  optimize_ops_list (rhs_code, &ops);
> >                }
> >
> > +             if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
> 
> I think we want to check that the type is boolean here, so we don't setup
> the machinery needlessly?

It is boolean only in some testcases, the is_bool stuff discussed at the
beginning above was originally just an early return
  if (TREE_CODE (TREE_TYPE (exp)) != BOOLEAN_TYPE)
    return;
before the loop, but it turned out that often the type of the | operands
is integer, with either bool casted to integer, or with the type of EQ_EXPR
etc. being integer instead of bool.

	Jakub
Richard Guenther - Sept. 30, 2011, 12:26 p.m.
On Fri, Sep 30, 2011 at 1:11 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Sep 30, 2011 at 12:33:07PM +0200, Richard Guenther wrote:
>> > +  low = build_int_cst (TREE_TYPE (exp), 0);
>> > +  high = low;
>> > +  in_p = 0;
>> > +  strict_overflow_p = false;
>> > +  is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;
>>
>> Effective boolean are also TYPE_PRECISION () == 1 types.  Remember
>> we don't preserve conversions from BOOLEAN_TYPE to such types.
>
> I can replace these with TYPE_PRECISION (TREE_TYPE (exp)) == 1;
> checks if you prefer, though maybe it would need to also do
> && TYPE_UNSIGNED (TREE_TYPE (exp)), at least if different operands of
> the | have different inner 1-bit signedness we could not merge them.

The canonical test for boolean-kind types is now

  TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
  || TYPE_PRECISION (TREE_TYPE (exp)) == 1

Ada for example has non-1-precision BOOLEAN_TYPEs.  But, see
very below.

>> > +       CASE_CONVERT:
>> > +         is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE;
>>
>> Likewise.  Though I wonder why !=?  Does it matter whether the extension
>> sign- or zero-extends?
>
> I think the |= is needed, this loop follows through not just casts, but
> then also through the comparisons and again through casts.
> It doesn't matter if the types or casts inside of the comparison argument
> are bool or not (and likely they will not be), yet we don't want to stop
> iterating because of that.
> It wants to reconstruct ranges from what e.g. fold in fold_range_test
> already created before, so stuff like
> (int) (((unsigned) x - 64U) <= 31U)
> for + [64, 95] range.
>
>> > +                 if (integer_onep (range_binop (LT_EXPR, integer_type_node,
>> > +                                                p->low, 0, q->low, 0)))
>>
>> that's just
>>
>> tem = fold_binary (LT_EXPR, boolean_type_node, p->low, q->low);
>> if (tem && integer_onep (tem))
>>   return -1;
>
> Ok, can change that.
>
>> which avoids building the LT_EXPR tree if it doesn't fold.  Similar below.
>> (ISTR some integer_onep () variant that handles a NULL argument ...)
>
> Couldn't find any.  integer_onep needs non-NULL, compare_tree_int too.
>
>> > +  /* Try to merge ranges.  */
>> > +  for (first = i; i < length; i++)
>> > +    {
>> > +      tree low = ranges[i].low;
>> > +      tree high = ranges[i].high;
>> > +      int in_p = ranges[i].in_p;
>> > +      bool strict_overflow_p = ranges[i].strict_overflow_p;
>> > +
>> > +      for (j = i + 1; j < length; j++)
>> > +       {
>>
>> That looks quadratic - do we want to limit this with a --param, simply
>> partitioning the array into quadratic chunks?
>
> This isn't quadratic (except in the hypothetical case
> where all merge_ranges calls would succeed, but then
> build_range_test would fail).  In the likely case where if range merges
> succeed then update_range_test succeeds too this is just linear (plus the
> qsort before that which isn't linear though).
> So perhaps:
> +      if (j > i + 1
> +         && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
> +                               ops, ranges[i].exp, in_p, low, high,
> +                               strict_overflow_p))
> +       {
> +         i = j - 1;
> +         any_changes = true;
> +       }
> could be
> +      if (j > i + 1)
> +       {
> +         if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
> +                               ops, ranges[i].exp, in_p, low, high,
> +                               strict_overflow_p))
> +           any_changes = true;
> +         i = j - 1;
> +       }
> (then it isn't quadratic), or could try a few times before giving up:
> +      if (j > i + 1)
> +       {
> +         if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
> +                               ops, ranges[i].exp, in_p, low, high,
> +                               strict_overflow_p))
> +           {
> +             any_changes = true;
> +             i = j - 1;
> +           }
> +         else if (update_fail_count == 64)
> +           i = j - 1;
> +         else
> +           update_fail_count = 0;
> +       }
> where int update_fail_count = 0; would be after the
>      bool strict_overflow_p = ranges[i].strict_overflow_p;
> line in the outer loop.
>
>> > +         if (ranges[i].exp != ranges[j].exp)
>> > +           break;
>>
>> Or isn't it too bad because of this check?
>
> The above limits the chunks to a particular SSA_NAME.  Within each
> chunk for the same SSA_NAME, the ranges are sorted in a way that
> merge_ranges will likely succeed, so it isn't quadratic unless
> update_range_test fails (see above).
>
> BTW, the second loop (the one that attempts to optimize
> x == 1 || x == 3 into (x & ~2) == 1 etc. is quadratic, which is why
> there is
>  for (j = i + 1; j < length && j < i + 64; j++)
> don't think it is a limit people will often run into and thus I don't
> think it is worth adding a --param= for that.

Ok.

>> > +         if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
>> > +                            ranges[j].in_p, ranges[j].low, ranges[j].high))
>> > +           break;
>>
>> And this early out?  I suppose some comment on why together with
>> the sorting this is of limited complexity would help.
>>
>> > @@ -2447,6 +2864,9 @@ reassociate_bb (basic_block bb)
>> >                  optimize_ops_list (rhs_code, &ops);
>> >                }
>> >
>> > +             if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
>>
>> I think we want to check that the type is boolean here, so we don't setup
>> the machinery needlessly?
>
> It is boolean only in some testcases, the is_bool stuff discussed at the
> beginning above was originally just an early return
>  if (TREE_CODE (TREE_TYPE (exp)) != BOOLEAN_TYPE)
>    return;
> before the loop, but it turned out that often the type of the | operands
> is integer, with either bool casted to integer, or with the type of EQ_EXPR
> etc. being integer instead of bool.

Really?  The type of EQ_EXPR should be always either BOOLEAN_TYPE
or INTEGRAL_TYPE_P with TYPE_PRECISION == 1.  That's what
the gimple verifier checks.  Or do you mean that fold introduces these
kind of types during range-test simplification?

Richard.

>
>        Jakub
>
Jakub Jelinek - Sept. 30, 2011, 12:44 p.m.
On Fri, Sep 30, 2011 at 02:26:40PM +0200, Richard Guenther wrote:
> > It is boolean only in some testcases, the is_bool stuff discussed at the
> > beginning above was originally just an early return
> >  if (TREE_CODE (TREE_TYPE (exp)) != BOOLEAN_TYPE)
> >    return;
> > before the loop, but it turned out that often the type of the | operands
> > is integer, with either bool casted to integer, or with the type of EQ_EXPR
> > etc. being integer instead of bool.
> 
> Really?  The type of EQ_EXPR should be always either BOOLEAN_TYPE
> or INTEGRAL_TYPE_P with TYPE_PRECISION == 1.  That's what
> the gimple verifier checks.  Or do you mean that fold introduces these
> kind of types during range-test simplification?

Consider:

int
f1 (int a, int b)
{
  int v1 = (a <= 64);
  int v2 = (a == 66);
  int v3 = (a == 67);
  int v4 = (a == 65);
  return b || v1 || v2 || v3 || v4;
}

int
f2 (int a, int b)
{
  int v1 = (a <= 64);
  int v2 = (a == 66);
  int v3 = (a == 67);
  int v4 = (a == 65);
  return b | v1 | v2 | v3 | v4;
}

in *.dse1 f1 is:
  D.2744_2 = a_1(D) <= 64;
  v1_3 = (int) D.2744_2;
  D.2745_4 = a_1(D) == 66;
  v2_5 = (int) D.2745_4;
  D.2746_6 = a_1(D) == 67;
  v3_7 = (int) D.2746_6;
  D.2747_8 = a_1(D) == 65;
  v4_9 = (int) D.2747_8;
  D.2749_11 = b_10(D) | v1_3;
  D.2750_12 = D.2749_11 | v2_5;
  D.2751_13 = D.2750_12 | v3_7;
  D.2752_14 = D.2751_13 | v4_9;
  D.2753_15 = D.2752_14 != 0;
  D.2748_16 = (int) D.2753_15;
  return D.2748_16;
and f2 is:
  D.2735_2 = a_1(D) <= 64;
  v1_3 = (int) D.2735_2;
  D.2736_4 = a_1(D) == 66;
  v2_5 = (int) D.2736_4;
  D.2737_6 = a_1(D) == 67;
  v3_7 = (int) D.2737_6;
  D.2738_8 = a_1(D) == 65;
  v4_9 = (int) D.2738_8;
  D.2740_11 = b_10(D) | v1_3;
  D.2741_12 = D.2740_11 | v2_5;
  D.2742_13 = D.2741_12 | v3_7;
  D.2739_14 = D.2742_13 | v4_9;
  return D.2739_14;
In both cases, the arguments of BIT_IOR_EXPR are ints
and init_range_entry needs to go through the casts to reach the
comparison (on which it figures out that the value is really 0/1,
well, in this case already on the rhs of the cast, as it is _Bool).

	Jakub
Richard Guenther - Sept. 30, 2011, 1:14 p.m.
On Fri, Sep 30, 2011 at 2:44 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Sep 30, 2011 at 02:26:40PM +0200, Richard Guenther wrote:
>> > It is boolean only in some testcases, the is_bool stuff discussed at the
>> > beginning above was originally just an early return
>> >  if (TREE_CODE (TREE_TYPE (exp)) != BOOLEAN_TYPE)
>> >    return;
>> > before the loop, but it turned out that often the type of the | operands
>> > is integer, with either bool casted to integer, or with the type of EQ_EXPR
>> > etc. being integer instead of bool.
>>
>> Really?  The type of EQ_EXPR should be always either BOOLEAN_TYPE
>> or INTEGRAL_TYPE_P with TYPE_PRECISION == 1.  That's what
>> the gimple verifier checks.  Or do you mean that fold introduces these
>> kind of types during range-test simplification?
>
> Consider:
>
> int
> f1 (int a, int b)
> {
>  int v1 = (a <= 64);
>  int v2 = (a == 66);
>  int v3 = (a == 67);
>  int v4 = (a == 65);
>  return b || v1 || v2 || v3 || v4;
> }
>
> int
> f2 (int a, int b)
> {
>  int v1 = (a <= 64);
>  int v2 = (a == 66);
>  int v3 = (a == 67);
>  int v4 = (a == 65);
>  return b | v1 | v2 | v3 | v4;
> }
>
> in *.dse1 f1 is:
>  D.2744_2 = a_1(D) <= 64;
>  v1_3 = (int) D.2744_2;
>  D.2745_4 = a_1(D) == 66;
>  v2_5 = (int) D.2745_4;
>  D.2746_6 = a_1(D) == 67;
>  v3_7 = (int) D.2746_6;
>  D.2747_8 = a_1(D) == 65;
>  v4_9 = (int) D.2747_8;
>  D.2749_11 = b_10(D) | v1_3;
>  D.2750_12 = D.2749_11 | v2_5;
>  D.2751_13 = D.2750_12 | v3_7;
>  D.2752_14 = D.2751_13 | v4_9;
>  D.2753_15 = D.2752_14 != 0;
>  D.2748_16 = (int) D.2753_15;
>  return D.2748_16;
> and f2 is:
>  D.2735_2 = a_1(D) <= 64;
>  v1_3 = (int) D.2735_2;
>  D.2736_4 = a_1(D) == 66;
>  v2_5 = (int) D.2736_4;
>  D.2737_6 = a_1(D) == 67;
>  v3_7 = (int) D.2737_6;
>  D.2738_8 = a_1(D) == 65;
>  v4_9 = (int) D.2738_8;
>  D.2740_11 = b_10(D) | v1_3;
>  D.2741_12 = D.2740_11 | v2_5;
>  D.2742_13 = D.2741_12 | v3_7;
>  D.2739_14 = D.2742_13 | v4_9;
>  return D.2739_14;
> In both cases, the arguments of BIT_IOR_EXPR are ints
> and init_range_entry needs to go through the casts to reach the
> comparison (on which it figures out that the value is really 0/1,
> well, in this case already on the rhs of the cast, as it is _Bool).

Ah, indeed.  I'll have a look at the updated patch.

Richard.

>        Jakub
>

Patch

--- gcc/fold-const.c.jj	2011-09-05 12:28:53.000000000 +0200
+++ gcc/fold-const.c	2011-09-27 16:39:09.000000000 +0200
@@ -112,12 +112,8 @@  static tree decode_field_reference (loca
 static int all_ones_mask_p (const_tree, int);
 static tree sign_bit_p (tree, const_tree);
 static int simple_operand_p (const_tree);
-static tree range_binop (enum tree_code, tree, tree, int, tree, int);
 static tree range_predecessor (tree);
 static tree range_successor (tree);
-extern tree make_range (tree, int *, tree *, tree *, bool *);
-extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
-			  tree, tree);
 static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
 static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, tree);
 static tree unextend (tree, int, int, tree);
@@ -3731,7 +3727,7 @@  simple_operand_p (const_tree exp)
    must be specified for a comparison.  ARG1 will be converted to ARG0's
    type if both are specified.  */
 
-static tree
+tree
 range_binop (enum tree_code code, tree type, tree arg0, int upper0_p,
 	     tree arg1, int upper1_p)
 {
@@ -3790,6 +3786,255 @@  range_binop (enum tree_code code, tree t
   return constant_boolean_node (result, type);
 }
 
+/* Helper routine for make_range.  Perform one step for it, return
+   new expression if the loop should continue or NULL_TREE if it should
+   stop.  */
+
+tree
+make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1,
+		 tree exp_type, tree *p_low, tree *p_high, int *p_in_p,
+		 bool *strict_overflow_p)
+{
+  tree arg0_type = TREE_TYPE (arg0);
+  tree n_low, n_high, low = *p_low, high = *p_high;
+  int in_p = *p_in_p, n_in_p;
+
+  switch (code)
+    {
+    case TRUTH_NOT_EXPR:
+      *p_in_p = ! in_p;
+      return arg0;
+
+    case EQ_EXPR: case NE_EXPR:
+    case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
+      /* We can only do something if the range is testing for zero
+	 and if the second operand is an integer constant.  Note that
+	 saying something is "in" the range we make is done by
+	 complementing IN_P since it will set in the initial case of
+	 being not equal to zero; "out" is leaving it alone.  */
+      if (low == NULL_TREE || high == NULL_TREE
+	  || ! integer_zerop (low) || ! integer_zerop (high)
+	  || TREE_CODE (arg1) != INTEGER_CST)
+	return NULL_TREE;
+
+      switch (code)
+	{
+	case NE_EXPR:  /* - [c, c]  */
+	  low = high = arg1;
+	  break;
+	case EQ_EXPR:  /* + [c, c]  */
+	  in_p = ! in_p, low = high = arg1;
+	  break;
+	case GT_EXPR:  /* - [-, c] */
+	  low = 0, high = arg1;
+	  break;
+	case GE_EXPR:  /* + [c, -] */
+	  in_p = ! in_p, low = arg1, high = 0;
+	  break;
+	case LT_EXPR:  /* - [c, -] */
+	  low = arg1, high = 0;
+	  break;
+	case LE_EXPR:  /* + [-, c] */
+	  in_p = ! in_p, low = 0, high = arg1;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* If this is an unsigned comparison, we also know that EXP is
+	 greater than or equal to zero.  We base the range tests we make
+	 on that fact, so we record it here so we can parse existing
+	 range tests.  We test arg0_type since often the return type
+	 of, e.g. EQ_EXPR, is boolean.  */
+      if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
+	{
+	  if (! merge_ranges (&n_in_p, &n_low, &n_high,
+			      in_p, low, high, 1,
+			      build_int_cst (arg0_type, 0),
+			      NULL_TREE))
+	    return NULL_TREE;
+
+	  in_p = n_in_p, low = n_low, high = n_high;
+
+	  /* If the high bound is missing, but we have a nonzero low
+	     bound, reverse the range so it goes from zero to the low bound
+	     minus 1.  */
+	  if (high == 0 && low && ! integer_zerop (low))
+	    {
+	      in_p = ! in_p;
+	      high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
+				  integer_one_node, 0);
+	      low = build_int_cst (arg0_type, 0);
+	    }
+	}
+
+      *p_low = low;
+      *p_high = high;
+      *p_in_p = in_p;
+      return arg0;
+
+    case NEGATE_EXPR:
+      /* (-x) IN [a,b] -> x in [-b, -a]  */
+      n_low = range_binop (MINUS_EXPR, exp_type,
+			   build_int_cst (exp_type, 0),
+			   0, high, 1);
+      n_high = range_binop (MINUS_EXPR, exp_type,
+			    build_int_cst (exp_type, 0),
+			    0, low, 0);
+      if (n_high != 0 && TREE_OVERFLOW (n_high))
+	return NULL_TREE;
+      goto normalize;
+
+    case BIT_NOT_EXPR:
+      /* ~ X -> -X - 1  */
+      return build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
+			 build_int_cst (exp_type, 1));
+
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+      if (TREE_CODE (arg1) != INTEGER_CST)
+	return NULL_TREE;
+
+      /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
+	 move a constant to the other side.  */
+      if (!TYPE_UNSIGNED (arg0_type)
+	  && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
+	return NULL_TREE;
+
+      /* If EXP is signed, any overflow in the computation is undefined,
+	 so we don't worry about it so long as our computations on
+	 the bounds don't overflow.  For unsigned, overflow is defined
+	 and this is exactly the right thing.  */
+      n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
+			   arg0_type, low, 0, arg1, 0);
+      n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
+			    arg0_type, high, 1, arg1, 0);
+      if ((n_low != 0 && TREE_OVERFLOW (n_low))
+	  || (n_high != 0 && TREE_OVERFLOW (n_high)))
+	return NULL_TREE;
+
+      if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
+	*strict_overflow_p = true;
+
+      normalize:
+	/* Check for an unsigned range which has wrapped around the maximum
+	   value thus making n_high < n_low, and normalize it.  */
+	if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
+	  {
+	    low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
+			       integer_one_node, 0);
+	    high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
+				integer_one_node, 0);
+
+	    /* If the range is of the form +/- [ x+1, x ], we won't
+	       be able to normalize it.  But then, it represents the
+	       whole range or the empty set, so make it
+	       +/- [ -, - ].  */
+	    if (tree_int_cst_equal (n_low, low)
+		&& tree_int_cst_equal (n_high, high))
+	      low = high = 0;
+	    else
+	      in_p = ! in_p;
+	  }
+	else
+	  low = n_low, high = n_high;
+
+	*p_low = low;
+	*p_high = high;
+	*p_in_p = in_p;
+	return arg0;
+
+    CASE_CONVERT:
+    case NON_LVALUE_EXPR:
+      if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
+	return NULL_TREE;
+
+      if (! INTEGRAL_TYPE_P (arg0_type)
+	  || (low != 0 && ! int_fits_type_p (low, arg0_type))
+	  || (high != 0 && ! int_fits_type_p (high, arg0_type)))
+	return NULL_TREE;
+
+      n_low = low, n_high = high;
+
+      if (n_low != 0)
+	n_low = fold_convert_loc (loc, arg0_type, n_low);
+
+      if (n_high != 0)
+	n_high = fold_convert_loc (loc, arg0_type, n_high);
+
+      /* If we're converting arg0 from an unsigned type, to exp,
+	 a signed type,  we will be doing the comparison as unsigned.
+	 The tests above have already verified that LOW and HIGH
+	 are both positive.
+
+	 So we have to ensure that we will handle large unsigned
+	 values the same way that the current signed bounds treat
+	 negative values.  */
+
+      if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
+	{
+	  tree high_positive;
+	  tree equiv_type;
+	  /* For fixed-point modes, we need to pass the saturating flag
+	     as the 2nd parameter.  */
+	  if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
+	    equiv_type
+	      = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type),
+						TYPE_SATURATING (arg0_type));
+	  else
+	    equiv_type
+	      = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), 1);
+
+	  /* A range without an upper bound is, naturally, unbounded.
+	     Since convert would have cropped a very large value, use
+	     the max value for the destination type.  */
+	  high_positive
+	    = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
+	      : TYPE_MAX_VALUE (arg0_type);
+
+	  if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
+	    high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
+					     fold_convert_loc (loc, arg0_type,
+							       high_positive),
+					     build_int_cst (arg0_type, 1));
+
+	  /* If the low bound is specified, "and" the range with the
+	     range for which the original unsigned value will be
+	     positive.  */
+	  if (low != 0)
+	    {
+	      if (! merge_ranges (&n_in_p, &n_low, &n_high, 1, n_low, n_high,
+				  1, fold_convert_loc (loc, arg0_type,
+						       integer_zero_node),
+				  high_positive))
+		return NULL_TREE;
+
+	      in_p = (n_in_p == in_p);
+	    }
+	  else
+	    {
+	      /* Otherwise, "or" the range with the range of the input
+		 that will be interpreted as negative.  */
+	      if (! merge_ranges (&n_in_p, &n_low, &n_high, 0, n_low, n_high,
+				  1, fold_convert_loc (loc, arg0_type,
+						       integer_zero_node),
+				  high_positive))
+		return NULL_TREE;
+
+	      in_p = (in_p != n_in_p);
+	    }
+	}
+
+      *p_low = n_low;
+      *p_high = n_high;
+      *p_in_p = in_p;
+      return arg0;
+
+    default:
+      return NULL_TREE;
+    }
+}
+
 /* Given EXP, a logical expression, set the range it is testing into
    variables denoted by PIN_P, PLOW, and PHIGH.  Return the expression
    actually being tested.  *PLOW and *PHIGH will be made of the same
@@ -3804,10 +4049,10 @@  make_range (tree exp, int *pin_p, tree *
 	    bool *strict_overflow_p)
 {
   enum tree_code code;
-  tree arg0 = NULL_TREE, arg1 = NULL_TREE;
-  tree exp_type = NULL_TREE, arg0_type = NULL_TREE;
-  int in_p, n_in_p;
-  tree low, high, n_low, n_high;
+  tree arg0, arg1 = NULL_TREE;
+  tree exp_type, nexp;
+  int in_p;
+  tree low, high;
   location_t loc = EXPR_LOCATION (exp);
 
   /* Start with simply saying "EXP != 0" and then look at the code of EXP
@@ -3823,255 +4068,26 @@  make_range (tree exp, int *pin_p, tree *
     {
       code = TREE_CODE (exp);
       exp_type = TREE_TYPE (exp);
+      arg0 = NULL_TREE;
 
       if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
 	{
 	  if (TREE_OPERAND_LENGTH (exp) > 0)
 	    arg0 = TREE_OPERAND (exp, 0);
-	  if (TREE_CODE_CLASS (code) == tcc_comparison
-	      || TREE_CODE_CLASS (code) == tcc_unary
-	      || TREE_CODE_CLASS (code) == tcc_binary)
-	    arg0_type = TREE_TYPE (arg0);
 	  if (TREE_CODE_CLASS (code) == tcc_binary
 	      || TREE_CODE_CLASS (code) == tcc_comparison
 	      || (TREE_CODE_CLASS (code) == tcc_expression
 		  && TREE_OPERAND_LENGTH (exp) > 1))
 	    arg1 = TREE_OPERAND (exp, 1);
 	}
+      if (arg0 == NULL_TREE)
+	break;
 
-      switch (code)
-	{
-	case TRUTH_NOT_EXPR:
-	  in_p = ! in_p, exp = arg0;
-	  continue;
-
-	case EQ_EXPR: case NE_EXPR:
-	case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
-	  /* We can only do something if the range is testing for zero
-	     and if the second operand is an integer constant.  Note that
-	     saying something is "in" the range we make is done by
-	     complementing IN_P since it will set in the initial case of
-	     being not equal to zero; "out" is leaving it alone.  */
-	  if (low == 0 || high == 0
-	      || ! integer_zerop (low) || ! integer_zerop (high)
-	      || TREE_CODE (arg1) != INTEGER_CST)
-	    break;
-
-	  switch (code)
-	    {
-	    case NE_EXPR:  /* - [c, c]  */
-	      low = high = arg1;
-	      break;
-	    case EQ_EXPR:  /* + [c, c]  */
-	      in_p = ! in_p, low = high = arg1;
-	      break;
-	    case GT_EXPR:  /* - [-, c] */
-	      low = 0, high = arg1;
-	      break;
-	    case GE_EXPR:  /* + [c, -] */
-	      in_p = ! in_p, low = arg1, high = 0;
-	      break;
-	    case LT_EXPR:  /* - [c, -] */
-	      low = arg1, high = 0;
-	      break;
-	    case LE_EXPR:  /* + [-, c] */
-	      in_p = ! in_p, low = 0, high = arg1;
-	      break;
-	    default:
-	      gcc_unreachable ();
-	    }
-
-	  /* If this is an unsigned comparison, we also know that EXP is
-	     greater than or equal to zero.  We base the range tests we make
-	     on that fact, so we record it here so we can parse existing
-	     range tests.  We test arg0_type since often the return type
-	     of, e.g. EQ_EXPR, is boolean.  */
-	  if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
-	    {
-	      if (! merge_ranges (&n_in_p, &n_low, &n_high,
-				  in_p, low, high, 1,
-				  build_int_cst (arg0_type, 0),
-				  NULL_TREE))
-		break;
-
-	      in_p = n_in_p, low = n_low, high = n_high;
-
-	      /* If the high bound is missing, but we have a nonzero low
-		 bound, reverse the range so it goes from zero to the low bound
-		 minus 1.  */
-	      if (high == 0 && low && ! integer_zerop (low))
-		{
-		  in_p = ! in_p;
-		  high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
-				      integer_one_node, 0);
-		  low = build_int_cst (arg0_type, 0);
-		}
-	    }
-
-	  exp = arg0;
-	  continue;
-
-	case NEGATE_EXPR:
-	  /* (-x) IN [a,b] -> x in [-b, -a]  */
-	  n_low = range_binop (MINUS_EXPR, exp_type,
-			       build_int_cst (exp_type, 0),
-			       0, high, 1);
-	  n_high = range_binop (MINUS_EXPR, exp_type,
-				build_int_cst (exp_type, 0),
-				0, low, 0);
-	  if (n_high != 0 && TREE_OVERFLOW (n_high))
-	    break;
-	  goto normalize;
-
-	case BIT_NOT_EXPR:
-	  /* ~ X -> -X - 1  */
-	  exp = build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
-			    build_int_cst (exp_type, 1));
-	  continue;
-
-	case PLUS_EXPR:  case MINUS_EXPR:
-	  if (TREE_CODE (arg1) != INTEGER_CST)
-	    break;
-
-	  /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
-	     move a constant to the other side.  */
-	  if (!TYPE_UNSIGNED (arg0_type)
-	      && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
-	    break;
-
-	  /* If EXP is signed, any overflow in the computation is undefined,
-	     so we don't worry about it so long as our computations on
-	     the bounds don't overflow.  For unsigned, overflow is defined
-	     and this is exactly the right thing.  */
-	  n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
-			       arg0_type, low, 0, arg1, 0);
-	  n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
-				arg0_type, high, 1, arg1, 0);
-	  if ((n_low != 0 && TREE_OVERFLOW (n_low))
-	      || (n_high != 0 && TREE_OVERFLOW (n_high)))
-	    break;
-
-	  if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
-	    *strict_overflow_p = true;
-
-	normalize:
-	  /* Check for an unsigned range which has wrapped around the maximum
-	     value thus making n_high < n_low, and normalize it.  */
-	  if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
-	    {
-	      low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
-				 integer_one_node, 0);
-	      high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
-				  integer_one_node, 0);
-
-	      /* If the range is of the form +/- [ x+1, x ], we won't
-		 be able to normalize it.  But then, it represents the
-		 whole range or the empty set, so make it
-		 +/- [ -, - ].  */
-	      if (tree_int_cst_equal (n_low, low)
-		  && tree_int_cst_equal (n_high, high))
-		low = high = 0;
-	      else
-		in_p = ! in_p;
-	    }
-	  else
-	    low = n_low, high = n_high;
-
-	  exp = arg0;
-	  continue;
-
-	CASE_CONVERT: case NON_LVALUE_EXPR:
-	  if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
-	    break;
-
-	  if (! INTEGRAL_TYPE_P (arg0_type)
-	      || (low != 0 && ! int_fits_type_p (low, arg0_type))
-	      || (high != 0 && ! int_fits_type_p (high, arg0_type)))
-	    break;
-
-	  n_low = low, n_high = high;
-
-	  if (n_low != 0)
-	    n_low = fold_convert_loc (loc, arg0_type, n_low);
-
-	  if (n_high != 0)
-	    n_high = fold_convert_loc (loc, arg0_type, n_high);
-
-
-	  /* If we're converting arg0 from an unsigned type, to exp,
-	     a signed type,  we will be doing the comparison as unsigned.
-	     The tests above have already verified that LOW and HIGH
-	     are both positive.
-
-	     So we have to ensure that we will handle large unsigned
-	     values the same way that the current signed bounds treat
-	     negative values.  */
-
-	  if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
-	    {
-	      tree high_positive;
-	      tree equiv_type;
-	      /* For fixed-point modes, we need to pass the saturating flag
-		 as the 2nd parameter.  */
-	      if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
-		equiv_type = lang_hooks.types.type_for_mode
-			     (TYPE_MODE (arg0_type),
-			      TYPE_SATURATING (arg0_type));
-	      else
-		equiv_type = lang_hooks.types.type_for_mode
-			     (TYPE_MODE (arg0_type), 1);
-
-	      /* A range without an upper bound is, naturally, unbounded.
-		 Since convert would have cropped a very large value, use
-		 the max value for the destination type.  */
-	      high_positive
-		= TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
-		: TYPE_MAX_VALUE (arg0_type);
-
-	      if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
-		high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
-					     fold_convert_loc (loc, arg0_type,
-							       high_positive),
-					     build_int_cst (arg0_type, 1));
-
-	      /* If the low bound is specified, "and" the range with the
-		 range for which the original unsigned value will be
-		 positive.  */
-	      if (low != 0)
-		{
-		  if (! merge_ranges (&n_in_p, &n_low, &n_high,
-				      1, n_low, n_high, 1,
-				      fold_convert_loc (loc, arg0_type,
-							integer_zero_node),
-				      high_positive))
-		    break;
-
-		  in_p = (n_in_p == in_p);
-		}
-	      else
-		{
-		  /* Otherwise, "or" the range with the range of the input
-		     that will be interpreted as negative.  */
-		  if (! merge_ranges (&n_in_p, &n_low, &n_high,
-				      0, n_low, n_high, 1,
-				      fold_convert_loc (loc, arg0_type,
-							integer_zero_node),
-				      high_positive))
-		    break;
-
-		  in_p = (in_p != n_in_p);
-		}
-	    }
-
-	  exp = arg0;
-	  low = n_low, high = n_high;
-	  continue;
-
-	default:
-	  break;
-	}
-
-      break;
+      nexp = make_range_step (loc, code, arg0, arg1, exp_type, &low,
+			      &high, &in_p, strict_overflow_p);
+      if (nexp == NULL_TREE)
+	break;
+      exp = nexp;
     }
 
   /* If EXP is a constant, we can evaluate whether this is true or false.  */
--- gcc/tree.h.jj	2011-09-20 21:43:07.000000000 +0200
+++ gcc/tree.h	2011-09-27 16:38:58.000000000 +0200
@@ -5384,7 +5384,10 @@  extern unsigned int get_pointer_alignmen
 extern tree fold_call_stmt (gimple, bool);
 extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum built_in_function);
 extern tree make_range (tree, int *, tree *, tree *, bool *);
+extern tree make_range_step (location_t, enum tree_code, tree, tree, tree,
+			     tree *, tree *, int *, bool *);
 extern tree build_range_check (location_t, tree, tree, int, tree, tree);
+extern tree range_binop (enum tree_code, tree, tree, int, tree, int);
 extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
 			  tree, tree);
 extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
--- gcc/Makefile.in.jj	2011-09-18 21:20:04.000000000 +0200
+++ gcc/Makefile.in	2011-09-29 14:09:42.000000000 +0200
@@ -2641,7 +2641,7 @@  tree-ssa-reassoc.o : tree-ssa-reassoc.c 
    $(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(FLAGS_H) \
    tree-iterator.h $(BASIC_BLOCK_H) $(GIMPLE_H) $(TREE_INLINE_H) \
    $(VEC_H) langhooks.h alloc-pool.h pointer-set.h $(CFGLOOP_H) \
-   tree-pretty-print.h gimple-pretty-print.h
+   tree-pretty-print.h gimple-pretty-print.h $(DIAGNOSTIC_CORE_H)
 tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
    $(TREE_H) $(TM_P_H) $(GGC_H) output.h \
    $(DIAGNOSTIC_H) $(BASIC_BLOCK_H) $(FLAGS_H) $(TIMEVAR_H) $(TM_H) \
--- gcc/tree-ssa-reassoc.c.jj	2011-09-08 11:21:10.000000000 +0200
+++ gcc/tree-ssa-reassoc.c	2011-09-29 13:33:00.000000000 +0200
@@ -42,6 +42,7 @@  along with GCC; see the file COPYING3.  
 #include "flags.h"
 #include "target.h"
 #include "params.h"
+#include "diagnostic-core.h"
 
 /*  This is a simple global reassociation pass.  It is, in part, based
     on the LLVM pass of the same name (They do some things more/less
@@ -1568,6 +1569,422 @@  optimize_ops_list (enum tree_code opcode
     optimize_ops_list (opcode, ops);
 }
 
+/* The following functions are subroutines to optimize_range_tests and allow
+   it to try to change a logical combination of comparisons into a range
+   test.
+
+   For example, both
+	X == 2 || X == 5 || X == 3 || X == 4
+   and
+	X >= 2 && X <= 5
+   are converted to
+	(unsigned) (X - 2) <= 3
+
+   For more information see comments above fold_test_range in fold-const.c,
+   this implementation is for GIMPLE.  */
+
+struct range_entry
+{
+  tree exp;
+  tree low;
+  tree high;
+  bool in_p;
+  bool strict_overflow_p;
+  unsigned int idx, next;
+};
+
+/* This is similar to make_range in fold-const.c, but on top of
+   GIMPLE instead of trees.  */
+
+static void
+init_range_entry (struct range_entry *r, tree exp)
+{
+  int in_p;
+  tree low, high;
+  bool is_bool, strict_overflow_p;
+
+  r->exp = NULL_TREE;
+  r->in_p = false;
+  r->strict_overflow_p = false;
+  r->low = NULL_TREE;
+  r->high = NULL_TREE;
+  if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
+    return;
+
+  /* Start with simply saying "EXP != 0" and then look at the code of EXP
+     and see if we can refine the range.  Some of the cases below may not
+     happen, but it doesn't seem worth worrying about this.  We "continue"
+     the outer loop when we've changed something; otherwise we "break"
+     the switch, which will "break" the while.  */
+  low = build_int_cst (TREE_TYPE (exp), 0);
+  high = low;
+  in_p = 0;
+  strict_overflow_p = false;
+  is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;
+
+  while (1)
+    {
+      gimple stmt;
+      enum tree_code code;
+      tree arg0, arg1, exp_type;
+      tree nexp;
+      location_t loc;
+
+      if (TREE_CODE (exp) != SSA_NAME)
+	break;
+
+      stmt = SSA_NAME_DEF_STMT (exp);
+      if (!is_gimple_assign (stmt))
+	break;
+
+      code = gimple_assign_rhs_code (stmt);
+      arg0 = gimple_assign_rhs1 (stmt);
+      arg1 = gimple_assign_rhs2 (stmt);
+      exp_type = TREE_TYPE (exp);
+      loc = gimple_location (stmt);
+      switch (code)
+	{
+	case BIT_NOT_EXPR:
+	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
+	    {
+	      in_p = !in_p;
+	      exp = arg0;
+	      continue;
+	    }
+	  break;
+	case SSA_NAME:
+	  exp = arg0;
+	  continue;
+	CASE_CONVERT:
+	  is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE;
+	  goto do_default;
+	case EQ_EXPR:
+	case NE_EXPR:
+	case LT_EXPR:
+	case LE_EXPR:
+	case GE_EXPR:
+	case GT_EXPR:
+	  is_bool = true;
+	  /* FALLTHRU */
+	do_default:
+	default:
+	  if (!is_bool)
+	    return;
+	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
+				  &low, &high, &in_p,
+				  &strict_overflow_p);
+	  if (nexp != NULL_TREE)
+	    {
+	      exp = nexp;
+	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
+	      continue;
+	    }
+	  break;
+	}
+      break;
+    }
+  if (is_bool)
+    {
+      r->exp = exp;
+      r->in_p = in_p;
+      r->low = low;
+      r->high = high;
+      r->strict_overflow_p = strict_overflow_p;
+    }
+}
+
+/* Comparison function for qsort.  Sort entries
+   without SSA_NAME exp first, then with SSA_NAMEs sorted
+   by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
+   by increasing ->low and if ->low is the same, by increasing
+   ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
+   maximum.  */
+
+static int
+range_entry_cmp (const void *a, const void *b)
+{
+  const struct range_entry *p = (const struct range_entry *) a;
+  const struct range_entry *q = (const struct range_entry *) b;
+
+  if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
+    {
+      if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
+	{
+	  /* Group range_entries for the same SSA_NAME together.  */
+	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
+	    return -1;
+	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
+	    return 1;
+	  /* If ->low is different, NULL low goes first, then by
+	     ascending low.  */
+	  if (p->low != NULL_TREE)
+	    {
+	      if (q->low != NULL_TREE)
+		{
+		  if (integer_onep (range_binop (LT_EXPR, integer_type_node,
+						 p->low, 0, q->low, 0)))
+		    return -1;
+		  if (integer_onep (range_binop (GT_EXPR, integer_type_node,
+						 p->low, 0, q->low, 0)))
+		    return 1;
+		}
+	      else
+		return 1;
+	    }
+	  else if (q->low != NULL_TREE)
+	    return -1;
+	  /* If ->high is different, NULL high goes last, before that by
+	     ascending high.  */
+	  if (p->high != NULL_TREE)
+	    {
+	      if (q->high != NULL_TREE)
+		{
+		  if (integer_onep (range_binop (LT_EXPR, integer_type_node,
+						 p->high, 0, q->high, 0)))
+		    return -1;
+		  if (integer_onep (range_binop (GT_EXPR, integer_type_node,
+						 p->high, 0, q->high, 0)))
+		    return 1;
+		}
+	      else
+		return -1;
+	    }
+	  else if (p->high != NULL_TREE)
+	    return 1;
+	  /* If both ranges are the same, sort below by ascending idx.  */
+	}
+      else
+	return 1;
+    }
+  else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
+    return -1;
+
+  if (p->idx < q->idx)
+    return -1;
+  else
+    {
+      gcc_checking_assert (p->idx > q->idx);
+      return 1;
+    }
+}
+
+/* Helper routine of optimize_range_test.
+   [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
+   RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
+   OPCODE and OPS are arguments of optimize_range_tests.  Return
+   true if the range merge has been successful.  */
+
+static bool
+update_range_test (struct range_entry *range, struct range_entry *otherrange,
+		   unsigned int count, enum tree_code opcode,
+		   VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
+		   tree low, tree high, bool strict_overflow_p)
+{
+  tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
+  location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
+  tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
+  enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
+  gimple_stmt_iterator gsi;
+
+  if (tem == NULL_TREE)
+    return false;
+
+  if (strict_overflow_p && issue_strict_overflow_warning (wc))
+    warning_at (loc, OPT_Wstrict_overflow,
+		"assuming signed overflow does not occur "
+		"when simplifying range test");
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      struct range_entry *r;
+      fprintf (dump_file, "Optimizing range tests ");
+      print_generic_expr (dump_file, range->exp, 0);
+      fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
+      print_generic_expr (dump_file, range->low, 0);
+      fprintf (dump_file, ", ");
+      print_generic_expr (dump_file, range->high, 0);
+      fprintf (dump_file, "]");
+      for (r = otherrange; r < otherrange + count; r++)
+	{
+	  fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
+	  print_generic_expr (dump_file, r->low, 0);
+	  fprintf (dump_file, ", ");
+	  print_generic_expr (dump_file, r->high, 0);
+	  fprintf (dump_file, "]");
+	}
+      fprintf (dump_file, "\n into ");
+      print_generic_expr (dump_file, tem, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  if (opcode == BIT_IOR_EXPR)
+    tem = invert_truthvalue_loc (loc, tem);
+
+  tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
+  gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
+  tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
+				  GSI_SAME_STMT);
+
+  VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
+  range->exp = exp;
+  range->low = low;
+  range->high = high;
+  range->in_p = in_p;
+  range->strict_overflow_p = false;
+
+  for (range = otherrange; range < otherrange + count; range++)
+    {
+      VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
+      range->exp = NULL_TREE;
+    }
+  return true;
+}
+
+/* Optimize range tests, similarly how fold_range_test optimizes
+   it on trees.  The tree code for the binary
+   operation between all the operands is OPCODE.  */
+
+static void
+optimize_range_tests (enum tree_code opcode,
+		      VEC (operand_entry_t, heap) **ops)
+{
+  unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
+  operand_entry_t oe;
+  struct range_entry *ranges;
+  bool any_changes = false;
+
+  if (length == 1)
+    return;
+
+  ranges = XNEWVEC (struct range_entry, length);
+  for (i = 0; i < length; i++)
+    {
+      ranges[i].idx = i;
+      init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
+      /* For | invert it now, we will invert it again before emitting
+	 the optimized expression.  */
+      if (opcode == BIT_IOR_EXPR)
+	ranges[i].in_p = !ranges[i].in_p;
+    }
+
+  qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
+  for (i = 0; i < length; i++)
+    if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
+      break;
+
+  /* Try to merge ranges.  */
+  for (first = i; i < length; i++)
+    {
+      tree low = ranges[i].low;
+      tree high = ranges[i].high;
+      int in_p = ranges[i].in_p;
+      bool strict_overflow_p = ranges[i].strict_overflow_p;
+
+      for (j = i + 1; j < length; j++)
+	{
+	  if (ranges[i].exp != ranges[j].exp)
+	    break;
+	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
+			     ranges[j].in_p, ranges[j].low, ranges[j].high))
+	    break;
+	  strict_overflow_p |= ranges[j].strict_overflow_p;
+	}
+
+      if (j > i + 1
+	  && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
+				ops, ranges[i].exp, in_p, low, high,
+				strict_overflow_p))
+	{
+	  i = j - 1;
+	  any_changes = true;
+	}
+    }
+
+  /* Optimize X == CST1 || X == CST2
+     if popcount (CST1 ^ CST2) == 1 into
+     (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
+     Similarly for ranges.  E.g.
+     X != 2 && X != 3 && X != 10 && X != 11
+     will be transformed by the above loop into
+     (X - 2U) <= 1U && (X - 10U) <= 1U
+     and this loop can transform that into
+     ((X & ~8) - 2U) <= 1U.  */
+  for (i = first; i < length; i++)
+    {
+      tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
+
+      if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
+	continue;
+      type = TREE_TYPE (ranges[i].exp);
+      if (!INTEGRAL_TYPE_P (type))
+	continue;
+      lowi = ranges[i].low;
+      if (lowi == NULL_TREE)
+	lowi = TYPE_MIN_VALUE (type);
+      highi = ranges[i].high;
+      if (highi == NULL_TREE)
+	continue;
+      for (j = i + 1; j < length && j < i + 64; j++)
+	{
+	  if (ranges[j].exp == NULL_TREE)
+	    continue;
+	  if (ranges[i].exp != ranges[j].exp)
+	    break;
+	  if (ranges[j].in_p)
+	    continue;
+	  lowj = ranges[j].low;
+	  if (lowj == NULL_TREE)
+	    continue;
+	  highj = ranges[j].high;
+	  if (highj == NULL_TREE)
+	    highj = TYPE_MAX_VALUE (type);
+	  if (!integer_onep (range_binop (GT_EXPR, integer_type_node,
+					  lowj, 0, highi, 0)))
+	    continue;
+	  lowxor = range_binop (BIT_XOR_EXPR, NULL_TREE, lowi, 0, lowj, 0);
+	  if (lowxor == NULL_TREE)
+	    continue;
+	  gcc_checking_assert (!integer_zerop (lowxor));
+	  tem = range_binop (MINUS_EXPR, NULL_TREE, lowxor, 0,
+			     build_int_cst (type, 1), 0);
+	  tem = range_binop (BIT_AND_EXPR, NULL_TREE, lowxor, 0, tem, 0);
+	  if (!integer_zerop (tem))
+	    continue;
+	  highxor = range_binop (BIT_XOR_EXPR, NULL_TREE, highi, 0, highj, 0);
+	  if (!tree_int_cst_equal (lowxor, highxor))
+	    continue;
+	  tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
+	  exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
+	  lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
+	  highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
+	  if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
+				 ranges[i].in_p, lowj, highj,
+				 ranges[i].strict_overflow_p
+				 || ranges[j].strict_overflow_p))
+	    {
+	      any_changes = true;
+	      break;
+	    }
+	}
+    }
+
+  if (any_changes)
+    {
+      j = 0;
+      FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
+	{
+	  if (oe->op == error_mark_node)
+	    continue;
+	  else if (i != j)
+	    VEC_replace (operand_entry_t, *ops, j, oe);
+	  j++;
+	}
+      VEC_truncate (operand_entry_t, *ops, j);
+    }
+
+  XDELETEVEC (ranges);
+}
+
 /* Return true if OPERAND is defined by a PHI node which uses the LHS
    of STMT in it's operands.  This is also known as a "destructive
    update" operation.  */
@@ -2447,6 +2864,9 @@  reassociate_bb (basic_block bb)
 		  optimize_ops_list (rhs_code, &ops);
 		}
 
+	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
+		optimize_range_tests (rhs_code, &ops);
+
 	      if (VEC_length (operand_entry_t, ops) == 1)
 		{
 		  if (dump_file && (dump_flags & TDF_DETAILS))
--- gcc/testsuite/gcc.dg/pr46309.c.jj	2011-09-29 14:03:28.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr46309.c	2011-09-29 14:08:32.000000000 +0200
@@ -0,0 +1,66 @@ 
+/* PR tree-optimization/46309 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc-details" } */
+
+int
+f1 (int a)
+{
+  int v1 = (a == 3);
+  int v2 = (a == 1);
+  int v3 = (a == 4);
+  int v4 = (a == 2);
+  return v1 || v2 || v3 || v4;
+}
+
+int
+f2 (int a)
+{
+  int v1 = (a == 1);
+  int v2 = (a == 2);
+  int v3 = (a == 3);
+  int v4 = (a == 4);
+  return v1 || v2 || v3 || v4;
+}
+
+int
+f3 (int a)
+{
+  int v1 = (a == 3);
+  int v2 = (a == 1);
+  return v1 || v2;
+}
+
+int
+f4 (int a)
+{
+  int v1 = (a == 1);
+  int v2 = (a == 2);
+  return v1 || v2;
+}
+
+int
+f5 (unsigned int a)
+{
+  int v1 = (a <= 31);
+  int v2 = (a >= 64 && a <= 95);
+  return v1 || v2;
+}
+
+int
+f6 (unsigned int a)
+{
+  int v1 = (a <= 31);
+  int v2 = (a >= 64 && a <= 95);
+  int v3 = (a >= 128 && a <= 159);
+  int v4 = (a >= 192 && a <= 223);
+  return v1 || v2 || v3 || v4;
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.2, 2. and -.3, 3. and -.4, 4.\[\n\r\]* into" 2 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.3, 3.\[\n\r\]* into" 1 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.2, 2.\[\n\r\]* into" 1 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.0, 31. and -.64, 95.\[\n\r\]* into" 2 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.128, 159. and -.192, 223.\[\n\r\]* into" 1 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests D.\[0-9\]*_\[0-9\]* -.0, 31. and -.128, 159.\[\n\r\]* into" 1 "reassoc2" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc2" } } */