diff mbox

tree-switch-conversion fixes (PR tree-optimization/45830)

Message ID 20101119193159.GJ29412@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Nov. 19, 2010, 7:31 p.m. UTC
Hi!

This patch fixes a bunch of issues in switchconv pass:
1) if the switch would be expanded using bit tests, switch conversion
   optimization actually produces worse code (especially for -Os, but
   for other levels as well)
2) the pass can sometimes create completely unnecessarily huge
   tables when the values would fit in much smaller integral data type
   - for -Os I guess it is a win as soon as the table has two or more
   entries, for -O2 I think the cut-off can be larger, because the sign or
   zero extension can eat a cycle or two, on the other side
   if it makes the .rodata table 2, 4 or 8 times smaller, it decreases
   D-cache footprint
3) create_temp_arrays/free_temp_arrays wasn't using libiberty macros
   and even made a mistake in the types (fortunately usually all pointers
   have the same size), plus it is IMHO wasteful to do 4 allocations
   when just 2 (or, if we ignored the types, just 1) is enough

On the included testcases on x86_64 the changes are:
   text    data     bss     dec     hex filename
    181       0       0     181      b5 before/gcc.target/i386/pr45830.o
     83       0       0      83      53 after/gcc.target/i386/pr45830.o
   3229     160       0    3389     d3d before/gcc.c-torture/execute/pr45830.o
   1233     160       0    1393     571 after/gcc.c-torture/execute/pr45830.o

before/gcc.target/i386/pr45830.o
  [ 1] .text             PROGBITS        0000000000000000 000040 000015 00  AX  0   0 16
  [ 5] .rodata           PROGBITS        0000000000000000 000060 000070 00   A  0   0 32
after/gcc.target/i386/pr45830.o
  [ 1] .text             PROGBITS        0000000000000000 000040 000023 00  AX  0   0 16
before/gcc.c-torture/execute/pr45830.o
  [ 1] .text             PROGBITS        0000000000000000 000040 0001a5 00  AX  0   0 16
  [ 5] .rodata           PROGBITS        0000000000000000 0002a0 000a98 00   A  0   0 32
after/gcc.c-torture/execute/pr45830.o
  [ 1] .text             PROGBITS        0000000000000000 000040 0001a5 00  AX  0   0 16
  [ 5] .rodata           PROGBITS        0000000000000000 0002a0 0002cc 00   A  0   0 32
As you can see, on the second testcase .text size is even identical and
.rodata is 3.8 times smaller, on the first testcase .text size grew by 14 bytes,
but 112 bytes disappeared from .rodata section (but the code no longer needs to wait
for the memory read).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2010-11-19  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/45830
	* stmt.c (expand_switch_using_bit_tests_p): New function.
	(expand_case): Use it.
	* tree.h (expand_switch_using_bit_tests_p): New prototype.
	* tree-switch-conversion.c (struct switch_conv_info): Add
	bit_test_uniq, bit_test_count and bit_test_bb fields.
	(check_range): Fix a comment.
	(check_process_case): Compute bit_test_uniq and bit_test_count.
	(create_temp_arrays): Use XCNEWVEC, merge 3 arrays into one
	allocation.
	(free_temp_arrays): Use XDELETEVEC, adjust for the 3 arrays merging.
	(constructor_contains_same_values_p): Use FOR_EACH_VEC_ELT.
	(array_value_type): New function.
	(build_one_array): Use it, if it returned different type,
	fold_convert all constructor fields and convert back to the
	wider type in the generated code.
	(process_switch): Initialize bit_test_uniq, bit_test_count and
	bit_test_bb fields.  Don't optimize if expand_switch_using_bit_tests_p
	returned true.

	* gcc.target/i386/pr45830.c: New test.
	* gcc.c-torture/execute/pr45830.c: New test.
	

	Jakub

Comments

Richard Biener Nov. 19, 2010, 10:43 p.m. UTC | #1
On Fri, 19 Nov 2010, Jakub Jelinek wrote:

> Hi!
> 
> This patch fixes a bunch of issues in switchconv pass:
> 1) if the switch would be expanded using bit tests, switch conversion
>    optimization actually produces worse code (especially for -Os, but
>    for other levels as well)
> 2) the pass can sometimes create completely unnecessarily huge
>    tables when the values would fit in much smaller integral data type
>    - for -Os I guess it is a win as soon as the table has two or more
>    entries, for -O2 I think the cut-off can be larger, because the sign or
>    zero extension can eat a cycle or two, on the other side
>    if it makes the .rodata table 2, 4 or 8 times smaller, it decreases
>    D-cache footprint
> 3) create_temp_arrays/free_temp_arrays wasn't using libiberty macros
>    and even made a mistake in the types (fortunately usually all pointers
>    have the same size), plus it is IMHO wasteful to do 4 allocations
>    when just 2 (or, if we ignored the types, just 1) is enough
> 
> On the included testcases on x86_64 the changes are:
>    text    data     bss     dec     hex filename
>     181       0       0     181      b5 before/gcc.target/i386/pr45830.o
>      83       0       0      83      53 after/gcc.target/i386/pr45830.o
>    3229     160       0    3389     d3d before/gcc.c-torture/execute/pr45830.o
>    1233     160       0    1393     571 after/gcc.c-torture/execute/pr45830.o
> 
> before/gcc.target/i386/pr45830.o
>   [ 1] .text             PROGBITS        0000000000000000 000040 000015 00  AX  0   0 16
>   [ 5] .rodata           PROGBITS        0000000000000000 000060 000070 00   A  0   0 32
> after/gcc.target/i386/pr45830.o
>   [ 1] .text             PROGBITS        0000000000000000 000040 000023 00  AX  0   0 16
> before/gcc.c-torture/execute/pr45830.o
>   [ 1] .text             PROGBITS        0000000000000000 000040 0001a5 00  AX  0   0 16
>   [ 5] .rodata           PROGBITS        0000000000000000 0002a0 000a98 00   A  0   0 32
> after/gcc.c-torture/execute/pr45830.o
>   [ 1] .text             PROGBITS        0000000000000000 000040 0001a5 00  AX  0   0 16
>   [ 5] .rodata           PROGBITS        0000000000000000 0002a0 0002cc 00   A  0   0 32
> As you can see, on the second testcase .text size is even identical and
> .rodata is 3.8 times smaller, on the first testcase .text size grew by 14 bytes,
> but 112 bytes disappeared from .rodata section (but the code no longer needs to wait
> for the memory read).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2010-11-19  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/45830
> 	* stmt.c (expand_switch_using_bit_tests_p): New function.
> 	(expand_case): Use it.
> 	* tree.h (expand_switch_using_bit_tests_p): New prototype.
> 	* tree-switch-conversion.c (struct switch_conv_info): Add
> 	bit_test_uniq, bit_test_count and bit_test_bb fields.
> 	(check_range): Fix a comment.
> 	(check_process_case): Compute bit_test_uniq and bit_test_count.
> 	(create_temp_arrays): Use XCNEWVEC, merge 3 arrays into one
> 	allocation.
> 	(free_temp_arrays): Use XDELETEVEC, adjust for the 3 arrays merging.
> 	(constructor_contains_same_values_p): Use FOR_EACH_VEC_ELT.
> 	(array_value_type): New function.
> 	(build_one_array): Use it, if it returned different type,
> 	fold_convert all constructor fields and convert back to the
> 	wider type in the generated code.
> 	(process_switch): Initialize bit_test_uniq, bit_test_count and
> 	bit_test_bb fields.  Don't optimize if expand_switch_using_bit_tests_p
> 	returned true.
> 
> 	* gcc.target/i386/pr45830.c: New test.
> 	* gcc.c-torture/execute/pr45830.c: New test.
> 	
> --- gcc/stmt.c.jj	2010-11-15 18:53:41.000000000 +0100
> +++ gcc/stmt.c	2010-11-19 13:26:50.457354826 +0100
> @@ -2250,6 +2250,25 @@ emit_case_bit_tests (tree index_type, tr
>  #define HAVE_tablejump 0
>  #endif
>  
> +/* Return true if a switch should be expanded as a bit test.
> +   INDEX_EXPR is the index expression, RANGE is the difference between
> +   highest and lowest case, UNIQ is number of unique case node targets
> +   not counting the default case and COUNT is the number of comparisons
> +   needed, not counting the default case.  */
> +bool
> +expand_switch_using_bit_tests_p (tree index_expr, tree range,
> +				 unsigned int uniq, unsigned int count)
> +{
> +  return (CASE_USE_BIT_TESTS
> +	  && ! TREE_CONSTANT (index_expr)
> +	  && compare_tree_int (range, GET_MODE_BITSIZE (word_mode)) < 0
> +	  && compare_tree_int (range, 0) > 0
> +	  && lshift_cheap_p ()
> +	  && ((uniq == 1 && count >= 3)
> +	      || (uniq == 2 && count >= 5)
> +	      || (uniq == 3 && count >= 6)));
> +}
> +
>  /* Terminate a case (Pascal/Ada) or switch (C) statement
>     in which ORIG_INDEX is the expression to be tested.
>     If ORIG_TYPE is not NULL, it is the original ORIG_INDEX
> @@ -2384,14 +2403,7 @@ expand_case (gimple stmt)
>        /* Try implementing this switch statement by a short sequence of
>  	 bit-wise comparisons.  However, we let the binary-tree case
>  	 below handle constant index expressions.  */
> -      if (CASE_USE_BIT_TESTS
> -	  && ! TREE_CONSTANT (index_expr)
> -	  && compare_tree_int (range, GET_MODE_BITSIZE (word_mode)) < 0
> -	  && compare_tree_int (range, 0) > 0
> -	  && lshift_cheap_p ()
> -	  && ((uniq == 1 && count >= 3)
> -	      || (uniq == 2 && count >= 5)
> -	      || (uniq == 3 && count >= 6)))
> +      if (expand_switch_using_bit_tests_p (index_expr, range, uniq, count))
>  	{
>  	  /* Optimize the case where all the case values fit in a
>  	     word without having to subtract MINVAL.  In this case,
> --- gcc/tree.h.jj	2010-11-15 23:28:02.000000000 +0100
> +++ gcc/tree.h	2010-11-15 23:28:02.000000000 +0100
> @@ -5314,6 +5314,8 @@ extern bool parse_input_constraint (cons
>  				    const char * const *, bool *, bool *);
>  extern void expand_asm_stmt (gimple);
>  extern tree resolve_asm_operand_names (tree, tree, tree, tree);
> +extern bool expand_switch_using_bit_tests_p (tree, tree, unsigned int,
> +					     unsigned int);
>  extern void expand_case (gimple);
>  extern void expand_decl (tree);
>  #ifdef HARD_CONST
> --- gcc/tree-switch-conversion.c.jj	2010-09-06 11:46:47.000000000 +0200
> +++ gcc/tree-switch-conversion.c	2010-11-19 17:01:15.698511106 +0100
> @@ -156,6 +156,12 @@ struct switch_conv_info
>    /* String reason why the case wasn't a good candidate that is written to the
>       dump file, if there is one.  */
>    const char *reason;
> +
> +  /* Parameters for expand_switch_using_bit_tests.  Should be computed
> +     the same way as in expand_case.  */
> +  unsigned int bit_test_uniq;
> +  unsigned int bit_test_count;
> +  basic_block bit_test_bb[2];
>  };
>  
>  /* Global pass info.  */
> @@ -174,7 +180,7 @@ check_range (gimple swtch)
>    tree range_max;
>  
>    /* The gimplifier has already sorted the cases by CASE_LOW and ensured there
> -     is a default label which is the last in the vector.  */
> +     is a default label which is the first in the vector.  */
>  
>    min_case = gimple_switch_label (swtch, 1);
>    info.range_min = CASE_LOW (min_case);
> @@ -234,7 +240,26 @@ check_process_case (tree cs)
>        info.default_count = e->count;
>      }
>    else
> -    info.other_count += e->count;
> +    {
> +      int i;
> +      info.other_count += e->count;
> +      for (i = 0; i < 2; i++)
> +	if (info.bit_test_bb[i] == label_bb)
> +	  break;
> +	else if (info.bit_test_bb[i] == NULL)
> +	  {
> +	    info.bit_test_bb[i] = label_bb;
> +	    info.bit_test_uniq++;
> +	    break;
> +	  }
> +      if (i == 2)
> +	info.bit_test_uniq = 3;
> +      if (CASE_HIGH (cs) != NULL_TREE
> +	  && ! tree_int_cst_equal (CASE_LOW (cs), CASE_HIGH (cs)))
> +	info.bit_test_count += 2;
> +      else
> +	info.bit_test_count++;
> +    }
>  
>    if (!label_bb)
>      {
> @@ -336,13 +361,10 @@ create_temp_arrays (void)
>  {
>    int i;
>  
> -  info.default_values = (tree *) xcalloc (info.phi_count, sizeof (tree));
> -  info.constructors = (VEC (constructor_elt, gc) **) xcalloc (info.phi_count,
> -							      sizeof (tree));
> -  info.target_inbound_names = (tree *) xcalloc (info.phi_count, sizeof (tree));
> -  info.target_outbound_names = (tree *) xcalloc (info.phi_count,
> -						 sizeof (tree));
> -
> +  info.default_values = XCNEWVEC (tree, info.phi_count * 3);
> +  info.constructors = XCNEWVEC (VEC (constructor_elt, gc) *, info.phi_count);
> +  info.target_inbound_names = info.default_values + info.phi_count;
> +  info.target_outbound_names = info.target_inbound_names + info.phi_count;
>    for (i = 0; i < info.phi_count; i++)
>      info.constructors[i]
>        = VEC_alloc (constructor_elt, gc, tree_low_cst (info.range_size, 1) + 1);
> @@ -355,10 +377,8 @@ create_temp_arrays (void)
>  static void
>  free_temp_arrays (void)
>  {
> -  free (info.constructors);
> -  free (info.default_values);
> -  free (info.target_inbound_names);
> -  free (info.target_outbound_names);
> +  XDELETEVEC (info.constructors);
> +  XDELETEVEC (info.default_values);
>  }
>  
>  /* Populate the array of default values in the order of phi nodes.
> @@ -468,13 +488,12 @@ build_constructors (gimple swtch)
>  static tree
>  constructor_contains_same_values_p (VEC (constructor_elt, gc) *vec)
>  {
> -  int i, len = VEC_length (constructor_elt, vec);
> +  unsigned int i;
>    tree prev = NULL_TREE;
> +  constructor_elt *elt;
>  
> -  for (i = 0; i < len; i++)
> +  FOR_EACH_VEC_ELT (constructor_elt, vec, i, elt)
>      {
> -      constructor_elt *elt = VEC_index (constructor_elt, vec, i);
> -
>        if (!prev)
>  	prev = elt->value;
>        else if (!operand_equal_p (elt->value, prev, OEP_ONLY_CONST))
> @@ -483,6 +502,79 @@ constructor_contains_same_values_p (VEC 
>    return prev;
>  }
>  
> +/* Return type which should be used for array elements, either TYPE,
> +   or for integral type some smaller integral type that can still hold
> +   all the constants.  */
> +
> +static tree
> +array_value_type (gimple swtch, tree type, int num)
> +{
> +  unsigned int i, len = VEC_length (constructor_elt, info.constructors[num]);
> +  constructor_elt *elt;
> +  enum machine_mode mode;
> +  int sign = 0;
> +  tree smaller_type;
> +
> +  if (!INTEGRAL_TYPE_P (type))
> +    return type;
> +
> +  mode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (TYPE_MODE (type)));
> +  if (GET_MODE_SIZE (TYPE_MODE (type)) <= GET_MODE_SIZE (mode))
> +    return type;
> +
> +  if (len < (optimize_bb_for_size_p (gimple_bb (swtch)) ? 2 : 32))
> +    return type;
> +
> +  FOR_EACH_VEC_ELT (constructor_elt, info.constructors[num], i, elt)
> +    {
> +      double_int cst;
> +
> +      if (TREE_CODE (elt->value) != INTEGER_CST)
> +	return type;
> +
> +      cst = TREE_INT_CST (elt->value);
> +      while (1)
> +	{
> +	  unsigned int prec = GET_MODE_BITSIZE (mode);
> +	  if (prec > HOST_BITS_PER_WIDE_INT)
> +	    return type;
> +
> +	  if (sign >= 0
> +	      && double_int_equal_p (cst, double_int_zext (cst, prec)))
> +	    {
> +	      if (sign == 0
> +		  && double_int_equal_p (cst, double_int_sext (cst, prec)))
> +		break;
> +	      sign = 1;
> +	      break;
> +	    }
> +	  if (sign <= 0
> +	      && double_int_equal_p (cst, double_int_sext (cst, prec)))
> +	    {
> +	      sign = -1;
> +	      break;
> +	    }
> +
> +	  if (sign == 1)
> +	    sign = 0;
> +
> +	  mode = GET_MODE_WIDER_MODE (mode);
> +	  if (mode == VOIDmode
> +	      || GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TYPE_MODE (type)))
> +	    return type;
> +	}
> +    }
> +
> +  if (sign == 0)
> +    sign = TYPE_UNSIGNED (type) ? 1 : -1;
> +  smaller_type = lang_hooks.types.type_for_mode (mode, sign >= 0);
> +  if (GET_MODE_SIZE (TYPE_MODE (type))
> +      <= GET_MODE_SIZE (TYPE_MODE (smaller_type)))
> +    return type;
> +
> +  return smaller_type;
> +}
> +
>  /* Create an appropriate array type and declaration and assemble a static array
>     variable.  Also create a load statement that initializes the variable in
>     question with a value from the static array.  SWTCH is the switch statement
> @@ -512,10 +604,19 @@ build_one_array (gimple swtch, int num, 
>      load = gimple_build_assign (name, cst);
>    else
>      {
> -      tree array_type, ctor, decl, value_type, fetch;
> +      tree array_type, ctor, decl, value_type, fetch, default_type;
>  
> -      value_type = TREE_TYPE (info.default_values[num]);
> +      default_type = TREE_TYPE (info.default_values[num]);
> +      value_type = array_value_type (swtch, default_type, num);
>        array_type = build_array_type (value_type, arr_index_type);
> +      if (default_type != value_type)
> +	{
> +	  unsigned int i;
> +	  constructor_elt *elt;
> +
> +	  FOR_EACH_VEC_ELT (constructor_elt, info.constructors[num], i, elt)
> +	    elt->value = fold_convert (value_type, elt->value);
> +	}
>        ctor = build_constructor (array_type, info.constructors[num]);
>        TREE_CONSTANT (ctor) = true;
>        TREE_STATIC (ctor) = true;
> @@ -534,6 +635,12 @@ build_one_array (gimple swtch, int num, 
>  
>        fetch = build4 (ARRAY_REF, value_type, decl, tidx, NULL_TREE,
>  		      NULL_TREE);
> +      if (default_type != value_type)
> +	{
> +	  fetch = fold_convert (default_type, fetch);
> +	  fetch = force_gimple_operand_gsi (&gsi, fetch, true, NULL_TREE,
> +					    true, GSI_SAME_STMT);
> +	}
>        load = gimple_build_assign (name, fetch);
>      }
>  
> @@ -818,6 +925,10 @@ process_switch (gimple swtch)
>    info.default_prob = 0;
>    info.default_count = 0;
>    info.other_count = 0;
> +  info.bit_test_uniq = 0;
> +  info.bit_test_count = 0;
> +  info.bit_test_bb[0] = NULL;
> +  info.bit_test_bb[1] = NULL;
>  
>    /* An ERROR_MARK occurs for various reasons including invalid data type.
>       (comment from stmt.c) */
> @@ -841,6 +952,18 @@ process_switch (gimple swtch)
>  	return false;
>        }
>  
> +  if (info.bit_test_uniq <= 2)
> +    {
> +      rtl_profile_for_bb (gimple_bb (swtch));
> +      if (expand_switch_using_bit_tests_p (gimple_switch_index (swtch),
> +					   info.range_size, info.bit_test_uniq,
> +					   info.bit_test_count))
> +	{
> +	  info.reason = "  Expanding as bit test is preferable\n";
> +	  return false;
> +	}
> +    }
> +
>    if (!check_final_bb ())
>      return false;
>  
> --- gcc/testsuite/gcc.target/i386/pr45830.c.jj	2010-11-19 16:58:08.345372833 +0100
> +++ gcc/testsuite/gcc.target/i386/pr45830.c	2010-11-19 17:00:54.425260908 +0100
> @@ -0,0 +1,31 @@
> +/* PR target/45830 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-switchconv-all -mtune=generic" } */
> +
> +int
> +foo (int *a)
> +{
> +  switch (*a)
> +    {
> +    case 0:
> +    case 3:
> +    case 1:
> +    case 2:
> +    case 4:
> +    case 23:
> +    case 26:
> +    case 19:
> +    case 5:
> +    case 21:
> +    case 20:
> +    case 22:
> +    case 27:
> +      return 1;
> +    default:
> +      return 0;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "Expanding as bit test is preferable" "switchconv" } } */
> +/* { dg-final { scan-assembler-not "CSWTCH" } } */
> +/* { dg-final { cleanup-tree-dump "switchconv" } } */
> --- gcc/testsuite/gcc.c-torture/execute/pr45830.c.jj	2010-11-19 16:56:39.214313262 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr45830.c	2010-11-19 16:55:39.000000000 +0100
> @@ -0,0 +1,97 @@
> +/* PR tree-optimization/45830 */
> +
> +extern void abort (void);
> +
> +long long va, vb, vc, vd, ve;
> +
> +__attribute__((noinline)) int
> +foo (int x)
> +{
> +  long long a, b, c, d, e;
> +  switch (x)
> +    {
> +    case 0:
> +    case 3:
> +    case 1:
> +    case 2:
> +    case 4:
> +      a = 1;
> +      b = 129;
> +      c = -12;
> +      d = -4;
> +      e = 128;
> +      break;
> +    case 23:
> +    case 26:
> +    case 19:
> +    case 65:
> +    case 5:
> +      a = 2;
> +      b = 138;
> +      c = 115;
> +      d = 128;
> +      e = -16;
> +      break;
> +    case 21:
> +    case 20:
> +    case 22:
> +    case 38:
> +    case 27:
> +    case 66:
> +    case 45:
> +    case 47:
> +      a = 3;
> +      b = 6;
> +      c = 127;
> +      d = 25;
> +      e = 257;
> +      break;
> +    default:
> +      a = 0;
> +      b = 18;
> +      c = 0;
> +      d = 64;
> +      e = 32768L;
> +      break;
> +    }
> +  va = a;
> +  vb = b;
> +  vc = c;
> +  vd = d;
> +  ve = e;
> +}
> +
> +int
> +bar (int x)
> +{
> +  if (x < 0)
> +    return 3;
> +  if (x < 5)
> +    return 0;
> +  if (x == 5 || x == 19 || x == 23 | x == 26 || x == 65)
> +    return 1;
> +  if ((x >= 20 && x <= 22) || x == 27 || x == 38
> +      || x == 45 || x == 47 || x == 66)
> +    return 2;
> +  return 3;
> +}
> +
> +long long expected[] =
> +{ 1, 129, -12, -4, 128, 2, 138, 115, 128, -16,
> +  3, 6, 127, 25, 257, 0, 18, 0, 64, 32768L };
> +
> +int
> +main (void)
> +{
> +  int i, v;
> +  for (i = -4; i < 70; i++)
> +    {
> +      foo (i);
> +      v = bar (i);
> +      if (va != expected[5 * v] || vb != expected[5 * v + 1]
> +	  || vc != expected[5 * v + 2] || vd != expected[5 * v + 3]
> +	  || ve != expected[5 * v + 4])
> +	abort ();
> +    }
> +  return 0;
> +}
> 
> 	Jakub
> 
>
diff mbox

Patch

--- gcc/stmt.c.jj	2010-11-15 18:53:41.000000000 +0100
+++ gcc/stmt.c	2010-11-19 13:26:50.457354826 +0100
@@ -2250,6 +2250,25 @@  emit_case_bit_tests (tree index_type, tr
 #define HAVE_tablejump 0
 #endif
 
+/* Return true if a switch should be expanded as a bit test.
+   INDEX_EXPR is the index expression, RANGE is the difference between
+   highest and lowest case, UNIQ is number of unique case node targets
+   not counting the default case and COUNT is the number of comparisons
+   needed, not counting the default case.  */
+bool
+expand_switch_using_bit_tests_p (tree index_expr, tree range,
+				 unsigned int uniq, unsigned int count)
+{
+  return (CASE_USE_BIT_TESTS
+	  && ! TREE_CONSTANT (index_expr)
+	  && compare_tree_int (range, GET_MODE_BITSIZE (word_mode)) < 0
+	  && compare_tree_int (range, 0) > 0
+	  && lshift_cheap_p ()
+	  && ((uniq == 1 && count >= 3)
+	      || (uniq == 2 && count >= 5)
+	      || (uniq == 3 && count >= 6)));
+}
+
 /* Terminate a case (Pascal/Ada) or switch (C) statement
    in which ORIG_INDEX is the expression to be tested.
    If ORIG_TYPE is not NULL, it is the original ORIG_INDEX
@@ -2384,14 +2403,7 @@  expand_case (gimple stmt)
       /* Try implementing this switch statement by a short sequence of
 	 bit-wise comparisons.  However, we let the binary-tree case
 	 below handle constant index expressions.  */
-      if (CASE_USE_BIT_TESTS
-	  && ! TREE_CONSTANT (index_expr)
-	  && compare_tree_int (range, GET_MODE_BITSIZE (word_mode)) < 0
-	  && compare_tree_int (range, 0) > 0
-	  && lshift_cheap_p ()
-	  && ((uniq == 1 && count >= 3)
-	      || (uniq == 2 && count >= 5)
-	      || (uniq == 3 && count >= 6)))
+      if (expand_switch_using_bit_tests_p (index_expr, range, uniq, count))
 	{
 	  /* Optimize the case where all the case values fit in a
 	     word without having to subtract MINVAL.  In this case,
--- gcc/tree.h.jj	2010-11-15 23:28:02.000000000 +0100
+++ gcc/tree.h	2010-11-15 23:28:02.000000000 +0100
@@ -5314,6 +5314,8 @@  extern bool parse_input_constraint (cons
 				    const char * const *, bool *, bool *);
 extern void expand_asm_stmt (gimple);
 extern tree resolve_asm_operand_names (tree, tree, tree, tree);
+extern bool expand_switch_using_bit_tests_p (tree, tree, unsigned int,
+					     unsigned int);
 extern void expand_case (gimple);
 extern void expand_decl (tree);
 #ifdef HARD_CONST
--- gcc/tree-switch-conversion.c.jj	2010-09-06 11:46:47.000000000 +0200
+++ gcc/tree-switch-conversion.c	2010-11-19 17:01:15.698511106 +0100
@@ -156,6 +156,12 @@  struct switch_conv_info
   /* String reason why the case wasn't a good candidate that is written to the
      dump file, if there is one.  */
   const char *reason;
+
+  /* Parameters for expand_switch_using_bit_tests.  Should be computed
+     the same way as in expand_case.  */
+  unsigned int bit_test_uniq;
+  unsigned int bit_test_count;
+  basic_block bit_test_bb[2];
 };
 
 /* Global pass info.  */
@@ -174,7 +180,7 @@  check_range (gimple swtch)
   tree range_max;
 
   /* The gimplifier has already sorted the cases by CASE_LOW and ensured there
-     is a default label which is the last in the vector.  */
+     is a default label which is the first in the vector.  */
 
   min_case = gimple_switch_label (swtch, 1);
   info.range_min = CASE_LOW (min_case);
@@ -234,7 +240,26 @@  check_process_case (tree cs)
       info.default_count = e->count;
     }
   else
-    info.other_count += e->count;
+    {
+      int i;
+      info.other_count += e->count;
+      for (i = 0; i < 2; i++)
+	if (info.bit_test_bb[i] == label_bb)
+	  break;
+	else if (info.bit_test_bb[i] == NULL)
+	  {
+	    info.bit_test_bb[i] = label_bb;
+	    info.bit_test_uniq++;
+	    break;
+	  }
+      if (i == 2)
+	info.bit_test_uniq = 3;
+      if (CASE_HIGH (cs) != NULL_TREE
+	  && ! tree_int_cst_equal (CASE_LOW (cs), CASE_HIGH (cs)))
+	info.bit_test_count += 2;
+      else
+	info.bit_test_count++;
+    }
 
   if (!label_bb)
     {
@@ -336,13 +361,10 @@  create_temp_arrays (void)
 {
   int i;
 
-  info.default_values = (tree *) xcalloc (info.phi_count, sizeof (tree));
-  info.constructors = (VEC (constructor_elt, gc) **) xcalloc (info.phi_count,
-							      sizeof (tree));
-  info.target_inbound_names = (tree *) xcalloc (info.phi_count, sizeof (tree));
-  info.target_outbound_names = (tree *) xcalloc (info.phi_count,
-						 sizeof (tree));
-
+  info.default_values = XCNEWVEC (tree, info.phi_count * 3);
+  info.constructors = XCNEWVEC (VEC (constructor_elt, gc) *, info.phi_count);
+  info.target_inbound_names = info.default_values + info.phi_count;
+  info.target_outbound_names = info.target_inbound_names + info.phi_count;
   for (i = 0; i < info.phi_count; i++)
     info.constructors[i]
       = VEC_alloc (constructor_elt, gc, tree_low_cst (info.range_size, 1) + 1);
@@ -355,10 +377,8 @@  create_temp_arrays (void)
 static void
 free_temp_arrays (void)
 {
-  free (info.constructors);
-  free (info.default_values);
-  free (info.target_inbound_names);
-  free (info.target_outbound_names);
+  XDELETEVEC (info.constructors);
+  XDELETEVEC (info.default_values);
 }
 
 /* Populate the array of default values in the order of phi nodes.
@@ -468,13 +488,12 @@  build_constructors (gimple swtch)
 static tree
 constructor_contains_same_values_p (VEC (constructor_elt, gc) *vec)
 {
-  int i, len = VEC_length (constructor_elt, vec);
+  unsigned int i;
   tree prev = NULL_TREE;
+  constructor_elt *elt;
 
-  for (i = 0; i < len; i++)
+  FOR_EACH_VEC_ELT (constructor_elt, vec, i, elt)
     {
-      constructor_elt *elt = VEC_index (constructor_elt, vec, i);
-
       if (!prev)
 	prev = elt->value;
       else if (!operand_equal_p (elt->value, prev, OEP_ONLY_CONST))
@@ -483,6 +502,79 @@  constructor_contains_same_values_p (VEC 
   return prev;
 }
 
+/* Return type which should be used for array elements, either TYPE,
+   or for integral type some smaller integral type that can still hold
+   all the constants.  */
+
+static tree
+array_value_type (gimple swtch, tree type, int num)
+{
+  unsigned int i, len = VEC_length (constructor_elt, info.constructors[num]);
+  constructor_elt *elt;
+  enum machine_mode mode;
+  int sign = 0;
+  tree smaller_type;
+
+  if (!INTEGRAL_TYPE_P (type))
+    return type;
+
+  mode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (TYPE_MODE (type)));
+  if (GET_MODE_SIZE (TYPE_MODE (type)) <= GET_MODE_SIZE (mode))
+    return type;
+
+  if (len < (optimize_bb_for_size_p (gimple_bb (swtch)) ? 2 : 32))
+    return type;
+
+  FOR_EACH_VEC_ELT (constructor_elt, info.constructors[num], i, elt)
+    {
+      double_int cst;
+
+      if (TREE_CODE (elt->value) != INTEGER_CST)
+	return type;
+
+      cst = TREE_INT_CST (elt->value);
+      while (1)
+	{
+	  unsigned int prec = GET_MODE_BITSIZE (mode);
+	  if (prec > HOST_BITS_PER_WIDE_INT)
+	    return type;
+
+	  if (sign >= 0
+	      && double_int_equal_p (cst, double_int_zext (cst, prec)))
+	    {
+	      if (sign == 0
+		  && double_int_equal_p (cst, double_int_sext (cst, prec)))
+		break;
+	      sign = 1;
+	      break;
+	    }
+	  if (sign <= 0
+	      && double_int_equal_p (cst, double_int_sext (cst, prec)))
+	    {
+	      sign = -1;
+	      break;
+	    }
+
+	  if (sign == 1)
+	    sign = 0;
+
+	  mode = GET_MODE_WIDER_MODE (mode);
+	  if (mode == VOIDmode
+	      || GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TYPE_MODE (type)))
+	    return type;
+	}
+    }
+
+  if (sign == 0)
+    sign = TYPE_UNSIGNED (type) ? 1 : -1;
+  smaller_type = lang_hooks.types.type_for_mode (mode, sign >= 0);
+  if (GET_MODE_SIZE (TYPE_MODE (type))
+      <= GET_MODE_SIZE (TYPE_MODE (smaller_type)))
+    return type;
+
+  return smaller_type;
+}
+
 /* Create an appropriate array type and declaration and assemble a static array
    variable.  Also create a load statement that initializes the variable in
    question with a value from the static array.  SWTCH is the switch statement
@@ -512,10 +604,19 @@  build_one_array (gimple swtch, int num, 
     load = gimple_build_assign (name, cst);
   else
     {
-      tree array_type, ctor, decl, value_type, fetch;
+      tree array_type, ctor, decl, value_type, fetch, default_type;
 
-      value_type = TREE_TYPE (info.default_values[num]);
+      default_type = TREE_TYPE (info.default_values[num]);
+      value_type = array_value_type (swtch, default_type, num);
       array_type = build_array_type (value_type, arr_index_type);
+      if (default_type != value_type)
+	{
+	  unsigned int i;
+	  constructor_elt *elt;
+
+	  FOR_EACH_VEC_ELT (constructor_elt, info.constructors[num], i, elt)
+	    elt->value = fold_convert (value_type, elt->value);
+	}
       ctor = build_constructor (array_type, info.constructors[num]);
       TREE_CONSTANT (ctor) = true;
       TREE_STATIC (ctor) = true;
@@ -534,6 +635,12 @@  build_one_array (gimple swtch, int num, 
 
       fetch = build4 (ARRAY_REF, value_type, decl, tidx, NULL_TREE,
 		      NULL_TREE);
+      if (default_type != value_type)
+	{
+	  fetch = fold_convert (default_type, fetch);
+	  fetch = force_gimple_operand_gsi (&gsi, fetch, true, NULL_TREE,
+					    true, GSI_SAME_STMT);
+	}
       load = gimple_build_assign (name, fetch);
     }
 
@@ -818,6 +925,10 @@  process_switch (gimple swtch)
   info.default_prob = 0;
   info.default_count = 0;
   info.other_count = 0;
+  info.bit_test_uniq = 0;
+  info.bit_test_count = 0;
+  info.bit_test_bb[0] = NULL;
+  info.bit_test_bb[1] = NULL;
 
   /* An ERROR_MARK occurs for various reasons including invalid data type.
      (comment from stmt.c) */
@@ -841,6 +952,18 @@  process_switch (gimple swtch)
 	return false;
       }
 
+  if (info.bit_test_uniq <= 2)
+    {
+      rtl_profile_for_bb (gimple_bb (swtch));
+      if (expand_switch_using_bit_tests_p (gimple_switch_index (swtch),
+					   info.range_size, info.bit_test_uniq,
+					   info.bit_test_count))
+	{
+	  info.reason = "  Expanding as bit test is preferable\n";
+	  return false;
+	}
+    }
+
   if (!check_final_bb ())
     return false;
 
--- gcc/testsuite/gcc.target/i386/pr45830.c.jj	2010-11-19 16:58:08.345372833 +0100
+++ gcc/testsuite/gcc.target/i386/pr45830.c	2010-11-19 17:00:54.425260908 +0100
@@ -0,0 +1,31 @@ 
+/* PR target/45830 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-switchconv-all -mtune=generic" } */
+
+int
+foo (int *a)
+{
+  switch (*a)
+    {
+    case 0:
+    case 3:
+    case 1:
+    case 2:
+    case 4:
+    case 23:
+    case 26:
+    case 19:
+    case 5:
+    case 21:
+    case 20:
+    case 22:
+    case 27:
+      return 1;
+    default:
+      return 0;
+    }
+}
+
+/* { dg-final { scan-tree-dump "Expanding as bit test is preferable" "switchconv" } } */
+/* { dg-final { scan-assembler-not "CSWTCH" } } */
+/* { dg-final { cleanup-tree-dump "switchconv" } } */
--- gcc/testsuite/gcc.c-torture/execute/pr45830.c.jj	2010-11-19 16:56:39.214313262 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr45830.c	2010-11-19 16:55:39.000000000 +0100
@@ -0,0 +1,97 @@ 
+/* PR tree-optimization/45830 */
+
+extern void abort (void);
+
+long long va, vb, vc, vd, ve;
+
+__attribute__((noinline)) int
+foo (int x)
+{
+  long long a, b, c, d, e;
+  switch (x)
+    {
+    case 0:
+    case 3:
+    case 1:
+    case 2:
+    case 4:
+      a = 1;
+      b = 129;
+      c = -12;
+      d = -4;
+      e = 128;
+      break;
+    case 23:
+    case 26:
+    case 19:
+    case 65:
+    case 5:
+      a = 2;
+      b = 138;
+      c = 115;
+      d = 128;
+      e = -16;
+      break;
+    case 21:
+    case 20:
+    case 22:
+    case 38:
+    case 27:
+    case 66:
+    case 45:
+    case 47:
+      a = 3;
+      b = 6;
+      c = 127;
+      d = 25;
+      e = 257;
+      break;
+    default:
+      a = 0;
+      b = 18;
+      c = 0;
+      d = 64;
+      e = 32768L;
+      break;
+    }
+  va = a;
+  vb = b;
+  vc = c;
+  vd = d;
+  ve = e;
+}
+
+int
+bar (int x)
+{
+  if (x < 0)
+    return 3;
+  if (x < 5)
+    return 0;
+  if (x == 5 || x == 19 || x == 23 | x == 26 || x == 65)
+    return 1;
+  if ((x >= 20 && x <= 22) || x == 27 || x == 38
+      || x == 45 || x == 47 || x == 66)
+    return 2;
+  return 3;
+}
+
+long long expected[] =
+{ 1, 129, -12, -4, 128, 2, 138, 115, 128, -16,
+  3, 6, 127, 25, 257, 0, 18, 0, 64, 32768L };
+
+int
+main (void)
+{
+  int i, v;
+  for (i = -4; i < 70; i++)
+    {
+      foo (i);
+      v = bar (i);
+      if (va != expected[5 * v] || vb != expected[5 * v + 1]
+	  || vc != expected[5 * v + 2] || vd != expected[5 * v + 3]
+	  || ve != expected[5 * v + 4])
+	abort ();
+    }
+  return 0;
+}