Patchwork [tree-optimization,1/2] : Branch-cost optimizations

login
register
mail settings
Submitter Kai Tietz
Date Nov. 8, 2011, 3:30 p.m.
Message ID <0a48a15d-ab75-4254-8e04-685893442a73@zmail14.collab.prod.int.phx2.redhat.com>
Download mbox | patch
Permalink /patch/124390/
State New
Headers show

Comments

Kai Tietz - Nov. 8, 2011, 3:30 p.m.
Hi,

with not much hope that this patch gets into 4.7 version, resent revised version for the first part of patch.  I updated the patch according to comments I got by matz on IRC  yesterday.
Patch uses now vector for collecting truth &/|'s conditional chain.  Additionally it checks now consistent for VA.values before trying to access DEF_STMT.

Bootstrapped and regression tested for all languages.  Ok for apply?

Regards,
Kai

Patch

Index: gcc-trunk/gcc/cfgexpand.c
===================================================================
--- gcc-trunk.orig/gcc/cfgexpand.c
+++ gcc-trunk/gcc/cfgexpand.c
@@ -1650,6 +1650,695 @@  maybe_cleanup_end_of_block (edge e, rtx 
     }
 }
 
+/* Check if statement can be considered as a "simple" one.  Simples are:
+   - minimal invariant
+   - any non-SSA_NAME veriant
+   - any SSA_NAME variant without a definition statement
+   - any SSA_NAME with default definition.
+   - an assignment of kind ~X, if X is minimal invariant, or has no
+     definition statement, We exclude here floating point types for X
+     and Y, as ~ (X cmp Y) can have special meaning on floats..
+   - an assignment of kind X ^ ~0, if X is minimal invariant, or has no
+     definition statement,  */
+
+static bool
+is_bool_op_p (tree op, bool *is_not)
+{
+  gimple s;
+  enum tree_code code;
+
+  *is_not = false;
+
+  /* Reject result types not of boolean kine.  */
+  if (TREE_CODE (TREE_TYPE (op)) != BOOLEAN_TYPE)
+    return false;
+
+  if (is_gimple_min_invariant (op)
+      || TREE_CODE (op) != SSA_NAME)
+    return true;
+
+  if ((!SA.values || !bitmap_bit_p (SA.values, SSA_NAME_VERSION (op)))
+      && SSA_NAME_DEF_STMT (op)
+      && !SSA_NAME_IS_DEFAULT_DEF (op))
+    return false;
+  if (SSA_NAME_IS_DEFAULT_DEF (op)
+      || (s = SSA_NAME_DEF_STMT (op)) == NULL)
+    return true;
+  /* Reject statement which isn't of kind assign.  */
+  if (!is_gimple_assign (s))
+    return false;
+
+  code = gimple_assign_rhs_code (s);
+
+  /* See if we have a "simple" logical not.  */
+  if (code == BIT_NOT_EXPR)
+    *is_not = true;
+  else if (code == BIT_XOR_EXPR
+           && integer_all_onesp (gimple_assign_rhs2 (s)))
+    *is_not = true;
+  else
+    return false;
+
+  op = gimple_assign_rhs1 (s);
+
+  if (TREE_CODE (op) != SSA_NAME)
+    return false;
+
+  /* Can the inner statement be considered as
+     simple.  */
+  if (is_gimple_min_invariant (op)
+      || SSA_NAME_IS_DEFAULT_DEF (op)
+      || SSA_NAME_DEF_STMT (op) == NULL)
+    return true;
+
+  *is_not = false;
+  return false;
+}
+
+/* This helper routine normalizes comparisons on boolean-typed operands
+   for OP1 ==/!= CST.
+   Folded patterns are:
+    X ==/!= 1 -> X !=/== 0
+    ~(X cmp Y) !=/== 0 -> (X cmp Y) ==/!= 0, if X and Y aren't floats.
+    ~(X & Y) !=/== 0 -> (X & Y) ==/!= 0
+    ~(Y | Y) !=/== 0 -> (X | Y) ==/!= 0
+    (X cmp Y) != 0 -> (X cmp Y)
+    (X cmp Y) == 0 -> (X cmp-invert Y), if X and Y aren't floats.
+   Note: We reject here operations on floats as pattern ~(float-compare)
+   can have side-effects.  */
+
+static void
+normalize_truth_condition (enum tree_code *cmp, tree *op1, tree *op2)
+{
+  gimple stmt, s;
+  tree nop0, nop1, op;
+  tree type = TREE_TYPE (*op1);
+  enum tree_code code2;
+
+  if (TREE_CODE (type) != BOOLEAN_TYPE
+      || (*cmp != EQ_EXPR && *cmp != NE_EXPR)
+      || TREE_CODE (*op2) != INTEGER_CST)
+    return;
+
+  if (!SA.values
+      || TREE_CODE (*op1) != SSA_NAME
+      || (!bitmap_bit_p (SA.values, SSA_NAME_VERSION (*op1))
+          && SSA_NAME_DEF_STMT (*op1)))
+   return;
+
+  if (integer_onep (*op2))
+    {
+      *op2 = build_int_cst (type, 0);
+      *cmp = (*cmp == EQ_EXPR ? NE_EXPR : EQ_EXPR);
+    }
+
+  for (;;)
+    {
+      if (!SA.values
+	  || TREE_CODE (*op1) != SSA_NAME
+	  || !bitmap_bit_p (SA.values, SSA_NAME_VERSION (*op1)))
+       return;
+
+      stmt = SSA_NAME_DEF_STMT (*op1);
+      if (gimple_code (stmt) != GIMPLE_ASSIGN)
+	return;
+
+      code2 = gimple_assign_rhs_code (stmt);
+      if (code2 != BIT_NOT_EXPR)
+        break;
+      op = gimple_assign_rhs1 (stmt);
+
+      if (!SA.values
+	  || TREE_CODE (op) != SSA_NAME
+	  || !bitmap_bit_p (SA.values, SSA_NAME_VERSION (op)))
+       return;
+
+      s = SSA_NAME_DEF_STMT (op);
+      if (!s || gimple_code (s) != GIMPLE_ASSIGN)
+	return;
+
+      code2 = gimple_assign_rhs_code (s);
+
+      if (TREE_CODE_CLASS (code2) == tcc_comparison)
+        {
+	  tree t = TREE_TYPE (gimple_assign_rhs1 (s));
+
+	  /* Don't fold ~ (X cmp Y) ==/!= 0 to (X cmp Y) ==/!= 0,
+	     if operands of comparison are floating-points.  */
+	  if (FLOAT_TYPE_P (t))
+	    return;
+	}
+      else if (code2 != BIT_AND_EXPR && code2 != BIT_IOR_EXPR)
+        return;
+      *op1 = op;
+      *cmp = (*cmp == EQ_EXPR ? NE_EXPR : EQ_EXPR);
+    }
+
+  if (TREE_CODE_CLASS (code2) != tcc_comparison)
+    return;
+
+  nop0 = gimple_assign_rhs1 (stmt);
+  nop1 = gimple_assign_rhs2 (stmt);
+
+  /* Case (X cmp Y) != 0 -> X cmp Y.  */
+  if (*cmp == NE_EXPR)
+    {
+      *cmp = code2;
+      *op1 = nop0;
+      *op2 = nop1;
+      return;
+    }
+
+  /* Case (X cmp Y) == 0 */
+
+  type = TREE_TYPE (nop0);
+  if (FLOAT_TYPE_P (type))
+    return;
+  code2 = invert_tree_comparison (*cmp,
+				  HONOR_NANS (TYPE_MODE (type)));
+  if (code2 == ERROR_MARK)
+    return;
+  *cmp = code2;
+  *op1 = nop0;
+  *op2 = nop1;
+}
+
+/* Structure to keep some attributes of operands
+   for bitwise-and/or chain within a condition.  */
+
+typedef struct {
+  tree leaf;
+  enum tree_code code;
+  tree op2;
+  tree type;
+  bool sa_valued;
+  bool is_bool;
+  bool is_trap;
+  bool has_sideeffect;
+  bool is_bool_not;
+  bool is_or;
+  bool is_and;
+  bool joined;
+} cond_assoc_t;
+
+DEF_VEC_O(cond_assoc_t);
+DEF_VEC_ALLOC_O(cond_assoc_t, heap);
+
+/* Build up operand list in OP for bitwise operand chain of kind CODE and
+   store each element in CONDS, if non-NULL..
+   CMP can be either EQ_EXPR or NE_EXPR, and op2 is integral zero.
+   Function returns the count of found elements in OP.  */
+static int
+collect_cond_chain (tree op, enum tree_code code,
+		    VEC (cond_assoc_t, heap) **pconds,
+		    enum tree_code cmp, tree op2)
+{
+  gimple s = NULL;
+  int ret = 0;
+  bool sa_valued;
+
+  sa_valued = (SA.values
+	       && TREE_CODE (op) == SSA_NAME
+	       && (bitmap_bit_p (SA.values,
+				 SSA_NAME_VERSION (op))
+		   || SSA_NAME_DEF_STMT (op) == NULL));
+  if (TREE_CODE (op) != SSA_NAME
+      || !sa_valued
+      || (s = SSA_NAME_DEF_STMT (op)) == NULL
+      || !is_gimple_assign (s)
+      || gimple_assign_rhs_code (s) != code)
+    {
+      enum tree_code ncode = cmp;
+      tree nop2 = op2;
+      cond_assoc_t tmp;
+
+      memset (&tmp, 0, sizeof (cond_assoc_t));
+      normalize_truth_condition (&ncode, &op, &nop2);
+
+      tmp.leaf = op;
+      tmp.code = ncode;
+      tmp.op2 = nop2;
+      tmp.type = boolean_type_node;
+      tmp.sa_valued = sa_valued;
+
+      s = NULL;
+      if (TREE_CODE (op) != SSA_NAME
+	  || !sa_valued
+	  || (s = SSA_NAME_DEF_STMT (op)) == NULL
+	  || !is_gimple_assign (s))
+	s = NULL;
+
+      if (s)
+	tmp.is_trap = gimple_could_trap_p (s);
+      if (s)
+	tmp.has_sideeffect = gimple_has_side_effects (s);
+
+      if ((ncode == EQ_EXPR || ncode == NE_EXPR)
+	  && TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
+	  && integer_zerop (nop2))
+	{
+	  tmp.is_bool_not = false;
+	  tmp.is_bool = is_bool_op_p (op, &tmp.is_bool_not);
+
+	  if (!tmp.is_bool || tmp.is_trap || tmp.has_sideeffect)
+	    {
+	      tmp.is_bool = false;
+	      tmp.is_bool_not = false;
+	    }
+	  code = ERROR_MARK;
+	  if (s && is_gimple_assign (s)
+	      && TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE)
+	    code = gimple_assign_rhs_code (s);
+	  tmp.is_or = (code == BIT_IOR_EXPR);
+	  tmp.is_and = (code == BIT_AND_EXPR);
+	}
+
+      VEC_safe_push (cond_assoc_t, heap, *pconds, &tmp);
+      return 1;
+    }
+  ret = collect_cond_chain (gimple_assign_rhs1 (s), code,
+			    pconds, cmp, op2);
+  ret += collect_cond_chain (gimple_assign_rhs2 (s), code,
+			     pconds, cmp, op2);
+
+  return ret;
+}
+
+/* This routine collects the bitwise operand chain of CODE in OP.
+   The found amount of elements is stored in *PCNT.
+   The function returns the pointer allocated buffer containing
+   *PCNT elements.  */
+
+static VEC (cond_assoc_t, heap) *
+get_condition_list_for (tree op, enum tree_code code, int *pcnt,
+			enum tree_code cmp, tree cst)
+{
+  int cnt;
+  VEC (cond_assoc_t, heap) *conds = NULL;
+  cnt = collect_cond_chain (op, code, &conds, cmp, cst);
+
+  *pcnt = cnt;
+
+  return conds;
+}
+
+/* Helper function to build a binary tree.
+   If OP0 is boolean-typed, CODE is equal to NE_EXPR,
+   and OP2 is zero constant, then return OP0.  Otherwise
+   the result of build2 is returned.  */
+
+static tree
+build_cond_expr (enum tree_code code, tree type, tree op0, tree op1)
+{
+  if (code == NE_EXPR && integer_zerop (op1)
+      && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
+    return op0;
+  return build2 (code, type, op0, op1);
+}
+
+/* Deside if we can spare branch costs on combining
+   L and R operands for bitwise-and/or dependent on BRANCH_COST.
+   Function returns TRUE, if combining of L and R might be profitable,
+   otherwise FALSE is returned.  */
+
+static bool
+is_bitwise_binary_simple_combine (cond_assoc_t *l, cond_assoc_t *r)
+{
+  bool op0_has_not = false, op1_has_not = false;
+  int bcosts = BRANCH_COST (optimize_insn_for_speed_p (), false);
+  int costs = 1;
+
+  /* If jumps aren't cheap avoid to turn some more codes into
+     jumpy sequences.  */
+  if (bcosts > 4)
+    return true;
+
+  /* We would spare one branch, but higher the count of executed
+     instructions.  */
+  if (bcosts <= 0)
+    return false;
+
+  if (l->is_bool == false || r->is_bool == false)
+    return false;
+
+  op0_has_not = l->is_bool_not;
+  op1_has_not = r->is_bool_not;
+
+  /* If L and R have inner bit-not, then there
+     are no additional costs for them.  */
+  if (op0_has_not && op1_has_not)
+    op0_has_not = op1_has_not = false;
+
+  /* If comparison codes of L and R aren't equal, then
+     it might be more costy to combine them, if not
+     on arm doesn't have an inner logical-not.  */
+  if (l->code != r->code)
+    {
+      if (op0_has_not)
+        op0_has_not = false;
+      else if (op1_has_not)
+        op1_has_not = false;
+      else
+        op0_has_not = true;
+    }
+  /* If leaf of L or R isn't boolean-typed, then we have at least
+     two operations to obtain result.
+     Each logical-not costs one more operation.  */
+
+  if (TREE_CODE (TREE_TYPE (l->leaf)) != BOOLEAN_TYPE)
+    costs += 2;
+  else if (op0_has_not)
+    costs++;
+
+  if (TREE_CODE (TREE_TYPE (r->leaf)) != BOOLEAN_TYPE)
+    costs += 2;
+  else if (op1_has_not)
+    costs++;
+
+  if (bcosts < costs)
+    return false;
+
+  return true;
+}
+
+/* Obtain some characteristics on comparison of intergral typed
+   arguments and determine if we have here a simple combinable
+   operand.
+   In PREFIX_NOT the value TRUE is stored, if operand CA contains
+   a value-invert operation.
+   In CMP_ZERO the value TRUE is stored, if operand CA is a comparison
+   to integral zero.
+   This function returns TRUE, if a possible candidate was matched,
+   otherwise it returns FALSE.  */
+
+static bool
+preeval_cond_integral (const cond_assoc_t *ca, bool *prefix_not, bool *cmp_zero)
+{
+  gimple s = NULL;
+  tree o;
+
+  if (ca->joined == true
+      || ca->is_bool
+      || ca->is_trap || ca->has_sideeffect
+      || ca->sa_valued == false
+      || ca->is_and || ca->is_or)
+    return false;
+
+  /* Reject anything different than X !=/== 0 and X !=/== ~0.  */
+  if ((ca->code != EQ_EXPR && ca->code != NE_EXPR)
+      || !INTEGRAL_TYPE_P (TREE_TYPE (ca->leaf))
+      || TREE_CODE (TREE_TYPE (ca->leaf)) == BOOLEAN_TYPE
+      || (!integer_zerop (ca->op2) && !integer_all_onesp (ca->op2)))
+    return false;
+
+  if (TREE_CODE (ca->leaf) != SSA_NAME)
+    return false;
+  *prefix_not = false;
+  *cmp_zero = integer_zerop (ca->op2);
+
+  s = SSA_NAME_DEF_STMT (ca->leaf);
+  if (!s || SSA_NAME_IS_DEFAULT_DEF (ca->leaf))
+    return true;
+
+  /* See if we have a valid ~X pattern in left-hand comparison
+     operand.  */
+  if (!is_gimple_assign (s)
+      || gimple_assign_rhs_code (s) != BIT_NOT_EXPR)
+    return false;
+
+  o = gimple_assign_rhs1 (s);
+
+  if (TREE_CODE (o) != SSA_NAME
+      || !SA.values
+      || (!bitmap_bit_p (SA.values, SSA_NAME_VERSION (o))
+          && SSA_NAME_DEF_STMT (o) != NULL))
+    return false;
+
+  *prefix_not = true;
+  s = SSA_NAME_DEF_STMT (o);
+  if (!s || SSA_NAME_IS_DEFAULT_DEF (o))
+    return true;
+  *prefix_not = false;
+
+  return false;
+}
+
+/* Helper routine to do branch-cost optimization for operand-list
+   in CA with count CA_COUNT.  The bitwise-binary operation BITCODE needs
+   to be toggled between and/or, if INVERTED is TRUE.
+   In PCODE the final binary result-code is stored. In POP0 and POP1
+   its final operands.  */
+
+static void
+combine_conds (VEC (cond_assoc_t, heap) *ca, int ca_count, bool inverted,
+	       enum tree_code bitcode, enum tree_code *pcode, tree *pop0, tree *pop1)
+{
+  VEC (cond_assoc_t, heap) *sub_ca;
+  int sub_ca_count, i, l;
+  tree collect = NULL_TREE, tem;
+  enum tree_code chain_bitwise = bitcode;
+  enum tree_code chain_logical;
+  cond_assoc_t *ca_i, *ca_l;
+
+  if (inverted)
+    chain_bitwise = (chain_bitwise == BIT_AND_EXPR ? BIT_IOR_EXPR
+						   : BIT_AND_EXPR);
+  chain_logical = (chain_bitwise == BIT_AND_EXPR ? TRUTH_ANDIF_EXPR
+						 : TRUTH_ORIF_EXPR);
+
+  /* First try to combine the comparisons on integral-typed arguments, which
+     aren't boolean-typed.  */
+  for (i = 0; i < ca_count; i++)
+    {
+      tree o1, o2;
+      bool op1_not = false, op2_not = false;
+      bool op1_cmp_zero = false, op2_cmp_zero = false;
+      int bcosts = BRANCH_COST (optimize_insn_for_speed_p (), false);
+
+      if (bcosts < 2)
+        break;
+      ca_i = VEC_index (cond_assoc_t, ca, i);
+      if (!preeval_cond_integral (ca_i, &op1_not, &op1_cmp_zero))
+        continue;
+
+      if ((chain_bitwise == BIT_IOR_EXPR && ca_i->code != NE_EXPR)
+          || (chain_bitwise == BIT_AND_EXPR && ca_i->code != EQ_EXPR))
+        continue;
+
+      for (l = i + 1; l < ca_count; l++)
+        {
+	  tree p1;
+
+	  ca_l = VEC_index (cond_assoc_t, ca, l);
+	  if (!preeval_cond_integral (ca_l, &op2_not, &op2_cmp_zero))
+	    continue;
+	  if (ca_i->code != ca_l->code)
+	    continue;
+
+	  if (TREE_TYPE (ca_i->leaf) != TREE_TYPE (ca_l->leaf))
+	    continue;
+
+	  o1 = ca_i->leaf;
+	  o2 = ca_l->leaf;
+	  p1 = ca_i->op2;
+
+	  if (op1_cmp_zero == op2_cmp_zero)
+	    {
+	      if (op2_not && op1_not)
+	        {
+		  o1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (o1));
+		  p1 = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (p1), p1);
+		  o2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (o2));
+		  op1_cmp_zero = !op1_cmp_zero;
+		}
+	      else if ((op1_not || op2_not) && bcosts <= 2)
+	        continue;
+	    }
+	  else if (!op1_not && !op2_not)
+	    continue;
+	  else if (op1_not)
+	    {
+	      if (op2_not && bcosts <= 2)
+	        continue;
+	      o1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (o1));
+	      p1 = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (p1), p1);
+	      op1_cmp_zero = !op1_cmp_zero;
+	    }
+	  else
+	    o2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (o2));
+
+	  if (op1_cmp_zero)
+	    ca_i->leaf = build2 (BIT_IOR_EXPR, TREE_TYPE (o1), o1, o2);
+	  else
+	    ca_i->leaf = build2 (BIT_AND_EXPR, TREE_TYPE (o1), o1, o2);
+	  ca_i->op2 = p1;
+	  ca_l->joined = true;
+	  break;
+	}
+    }
+
+  /* Now try to combine comparisons on boolean-typed arguments and do
+     operations for inner bitwise and/or chains.  */
+  for (i = 0; i < ca_count; i++)
+    {
+      ca_i = VEC_index (cond_assoc_t, ca, i);
+      if (ca_i->joined == true)
+        continue;
+
+      if (ca_i->is_and || ca_i->is_or)
+        {
+	  enum tree_code sub_code;
+	  tree op0;
+	  tree cst = ca_i->op2;
+	  enum tree_code ao_code = ca_i->code;
+
+	  sub_ca = get_condition_list_for (ca_i->leaf,
+	  				   (ca_i->is_and ? BIT_AND_EXPR : BIT_IOR_EXPR),
+	  				   &sub_ca_count,
+	  				   ao_code, cst);
+	  combine_conds (sub_ca, sub_ca_count, (ao_code == EQ_EXPR),
+	  		 (ca_i->is_and ? BIT_AND_EXPR : BIT_IOR_EXPR),
+	  		 &sub_code, &op0, &cst);
+	  VEC_free (cond_assoc_t, heap, sub_ca);
+	  sub_ca = NULL;
+	  tem = build_cond_expr (sub_code, ca_i->type, op0, cst);
+	  ca_i->joined = true;
+	}
+      else
+        {
+	  ca_i->joined = true;
+	  tem = NULL_TREE;
+	  if (ca_i->is_bool)
+	    {
+	      for (l = i + 1; l < ca_count; l++)
+		{
+		  ca_l = VEC_index (cond_assoc_t, ca, l);
+		  if (ca_l->joined == true || ca_l->is_bool == false)
+		    continue;
+		  if (is_bitwise_binary_simple_combine (ca_i, ca_l))
+		    break;
+		}
+	      if (l < ca_count)
+		{
+		  tree tem2;
+		  enum tree_code bitw = chain_bitwise;
+
+		  ca_l = VEC_index (cond_assoc_t, ca, l);
+		  /* ~X == 0 -> X != 0, ~X != 0 -> X == 0 */
+		  if (ca_i->is_bool_not && ca_l->is_bool_not)
+		    {
+		      gimple s = SSA_NAME_DEF_STMT (ca_i->leaf);
+		      ca_i->leaf = gimple_assign_rhs1 (s);
+		      ca_i->code = (ca_i->code == EQ_EXPR ? NE_EXPR : EQ_EXPR);
+		      s = SSA_NAME_DEF_STMT (ca_l->leaf);
+		      ca_l->leaf = gimple_assign_rhs1 (s);
+		      ca_l->code = (ca_l->code == EQ_EXPR ? NE_EXPR : EQ_EXPR);
+		    }
+		  if (ca_i->code != ca_l->code)
+		    {
+		      gimple s;
+
+		      if (ca_i->is_bool_not)
+		        {
+			  s = SSA_NAME_DEF_STMT (ca_i->leaf);
+			  ca_i->leaf = gimple_assign_rhs1 (s);
+			  ca_i->code = (ca_i->code == EQ_EXPR ? NE_EXPR : EQ_EXPR);
+			}
+		      else
+		        {
+			  s = SSA_NAME_DEF_STMT (ca_l->leaf);
+			  ca_l->leaf = gimple_assign_rhs1 (s);
+			  ca_l->code = (ca_l->code == EQ_EXPR ? NE_EXPR : EQ_EXPR);
+			}
+		    }
+		  /* (X == 0 op Y == 0) != 0 -> (X op-inv Y) == 0.  */
+		  if (ca_i->code == EQ_EXPR && ca_l->code == EQ_EXPR)
+		    {
+		      bitw = (bitw == BIT_AND_EXPR ? BIT_IOR_EXPR
+						   : BIT_AND_EXPR);
+		      tem = build_cond_expr (NE_EXPR, ca_i->type,
+					     ca_i->leaf, ca_i->op2);
+		      tem2 = build_cond_expr (NE_EXPR, ca_l->type,
+					      ca_l->leaf, ca_l->op2);
+		      tem = build_cond_expr (bitw,
+					     TREE_TYPE (tem), tem, tem2);
+		      tem = build_cond_expr (EQ_EXPR, ca_i->type, tem,
+					     ca_i->op2);
+		    }
+		  else
+		    {
+		      tem = build_cond_expr (ca_i->code, ca_i->type,
+					     ca_i->leaf, ca_i->op2);
+		      tem2 = build_cond_expr (ca_l->code, ca_l->type,
+					      ca_l->leaf, ca_l->op2);
+		      tem = build_cond_expr (bitw,
+					     TREE_TYPE (tem), tem, tem2);
+		    }
+		  ca_l->joined = true;
+		}
+	    }
+
+	  if (!tem)
+	    tem = build_cond_expr (ca_i->code, ca_i->type, ca_i->leaf, ca_i->op2);
+	}
+
+      if (!collect)
+	collect = tem;
+      else
+	collect = build2 (chain_logical, TREE_TYPE (collect), collect, tem);
+    }
+
+  *pcode = TREE_CODE (collect);
+  *pop0 = TREE_OPERAND (collect, 0);
+  *pop1 = TREE_OPERAND (collect, 1);
+}
+
+/* Helper routine to do branch-cost optimization on binary expression
+   *POP0 *PCODE *POP1.  */
+
+static void
+branchcost_optimization_on_conditions (enum tree_code *pcode, tree *pop0,
+				       tree *pop1)
+{
+  tree op0 = *pop0;
+  tree op1 = *pop1;
+  tree type = TREE_TYPE (op0);
+  enum tree_code code = *pcode;
+  gimple stmt;
+  bool invert = false;
+  VEC (cond_assoc_t, heap) *ca = NULL;
+  int ca_count;
+
+  if (TREE_CODE (type) != BOOLEAN_TYPE
+      || !integer_zerop (op1)
+      || (code != EQ_EXPR && code != NE_EXPR))
+    return;
+
+  if (!SA.values
+      || TREE_CODE (op0) != SSA_NAME
+      || !bitmap_bit_p (SA.values, SSA_NAME_VERSION (op0)))
+   return;
+
+  invert = (code == EQ_EXPR);
+  stmt = SSA_NAME_DEF_STMT (op0);
+  if (gimple_code (stmt) != GIMPLE_ASSIGN)
+    return;
+  switch (gimple_assign_rhs_code (stmt))
+    {
+    case BIT_AND_EXPR:
+    case BIT_IOR_EXPR:
+      break;
+    default:
+      return;
+    }
+
+  ca = get_condition_list_for (op0, gimple_assign_rhs_code (stmt),
+			       &ca_count, code, op1);
+  combine_conds (ca, ca_count, invert, gimple_assign_rhs_code (stmt),
+		 pcode, pop0, pop1);
+  VEC_free (cond_assoc_t, heap, ca);
+}
+
 /* A subroutine of expand_gimple_basic_block.  Expand one GIMPLE_COND.
    Returns a new basic block if we've terminated the current basic
    block and created a new one.  */
@@ -1668,6 +2357,8 @@  expand_gimple_cond (basic_block bb, gimp
   code = gimple_cond_code (stmt);
   op0 = gimple_cond_lhs (stmt);
   op1 = gimple_cond_rhs (stmt);
+
+  normalize_truth_condition (&code, &op0, &op1);
   /* We're sometimes presented with such code:
        D.123_1 = x < y;
        if (D.123_1 != 0)
@@ -1676,44 +2367,16 @@  expand_gimple_cond (basic_block bb, gimp
      be cleaned up by combine.  But some pattern matchers like if-conversion
      work better when there's only one compare, so make up for this
      here as special exception if TER would have made the same change.  */
-  if (gimple_cond_single_var_p (stmt)
-      && SA.values
-      && TREE_CODE (op0) == SSA_NAME
-      && bitmap_bit_p (SA.values, SSA_NAME_VERSION (op0)))
-    {
-      gimple second = SSA_NAME_DEF_STMT (op0);
-      if (gimple_code (second) == GIMPLE_ASSIGN)
-	{
-	  enum tree_code code2 = gimple_assign_rhs_code (second);
-	  if (TREE_CODE_CLASS (code2) == tcc_comparison)
-	    {
-	      code = code2;
-	      op0 = gimple_assign_rhs1 (second);
-	      op1 = gimple_assign_rhs2 (second);
-	    }
-	  /* If jumps are cheap turn some more codes into
-	     jumpy sequences.  */
-	  else if (BRANCH_COST (optimize_insn_for_speed_p (), false) < 4)
-	    {
-	      if ((code2 == BIT_AND_EXPR
-		   && TYPE_PRECISION (TREE_TYPE (op0)) == 1
-		   && TREE_CODE (gimple_assign_rhs2 (second)) != INTEGER_CST)
-		  || code2 == TRUTH_AND_EXPR)
-		{
-		  code = TRUTH_ANDIF_EXPR;
-		  op0 = gimple_assign_rhs1 (second);
-		  op1 = gimple_assign_rhs2 (second);
-		}
-	      else if (code2 == BIT_IOR_EXPR || code2 == TRUTH_OR_EXPR)
-		{
-		  code = TRUTH_ORIF_EXPR;
-		  op0 = gimple_assign_rhs1 (second);
-		  op1 = gimple_assign_rhs2 (second);
-		}
-	    }
-	}
-    }
+  branchcost_optimization_on_conditions (&code, &op0, &op1);
 
+  /* Take care that final statement is a comparison, if we end
+     up with an AND or OR associative statement.  */
+  if (code == BIT_AND_EXPR || code == BIT_IOR_EXPR)
+    {
+      op0 = build2 (code, TREE_TYPE (op0), op0, op1);
+      code = NE_EXPR;
+      op1 = build_int_cst (TREE_TYPE (op0), 0);
+    }
   last2 = last = get_last_insn ();
 
   extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
Index: gcc-trunk/gcc/dojump.c
===================================================================
--- gcc-trunk.orig/gcc/dojump.c
+++ gcc-trunk/gcc/dojump.c
@@ -579,13 +579,12 @@  do_jump (tree exp, rtx if_false_label, r
 	goto normal;
 
       /* Boolean comparisons can be compiled as TRUTH_AND_EXPR.  */
-
     case TRUTH_AND_EXPR:
       /* High branch cost, expand as the bitwise AND of the conditions.
 	 Do the same if the RHS has side effects, because we're effectively
 	 turning a TRUTH_AND_EXPR into a TRUTH_ANDIF_EXPR.  */
       if (BRANCH_COST (optimize_insn_for_speed_p (),
-		       false) >= 4
+		       false) >= 1
 	  || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
 	goto normal;
       code = TRUTH_ANDIF_EXPR;
@@ -596,7 +595,7 @@  do_jump (tree exp, rtx if_false_label, r
       /* High branch cost, expand as the bitwise OR of the conditions.
 	 Do the same if the RHS has side effects, because we're effectively
 	 turning a TRUTH_OR_EXPR into a TRUTH_ORIF_EXPR.  */
-      if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 4
+      if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 1
 	  || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1)))
 	goto normal;
       code = TRUTH_ORIF_EXPR;
Index: gcc-trunk/gcc/fold-const.c
===================================================================
--- gcc-trunk.orig/gcc/fold-const.c
+++ gcc-trunk/gcc/fold-const.c
@@ -3711,13 +3711,26 @@  simple_operand_p_2 (tree exp)
 
   code = TREE_CODE (exp);
 
-  if (TREE_CODE_CLASS (code) == tcc_comparison)
-    return (simple_operand_p (TREE_OPERAND (exp, 0))
-	    && simple_operand_p (TREE_OPERAND (exp, 1)));
+  if (code == TRUTH_ANDIF_EXPR || code == TRUTH_ORIF_EXPR)
+    return false;
+
+  /* We need to recurse here tree for two reasons.  First is that
+     simple_operand_p would reject too much binary/unary/expression-kind
+     expression.  Secondly, tree_could_trap_p doesn't inspect all kind
+     of binary/expression/unary-expression and so misses some side-effects.  */
+
+  if (TREE_CODE_CLASS (code) == tcc_binary
+      || TREE_CODE_CLASS (code) == tcc_comparison
+      || code == TRUTH_AND_EXPR || code == TRUTH_OR_EXPR
+      || code == TRUTH_XOR_EXPR)
+    return (simple_operand_p_2 (TREE_OPERAND (exp, 0))
+	    && simple_operand_p_2 (TREE_OPERAND (exp, 1)));
 
-  if (code == TRUTH_NOT_EXPR)
+  if (TREE_CODE_CLASS (code) == tcc_unary
+      || code == TRUTH_NOT_EXPR)
       return simple_operand_p_2 (TREE_OPERAND (exp, 0));
 
+  /* This function is in some cases too strict.  */
   return simple_operand_p (exp);
 }
 
@@ -4875,45 +4888,6 @@  fold_range_test (location_t loc, enum tr
       return or_op ? invert_truthvalue_loc (loc, tem) : tem;
     }
 
-  /* On machines where the branch cost is expensive, if this is a
-     short-circuited branch and the underlying object on both sides
-     is the same, make a non-short-circuit operation.  */
-  else if (LOGICAL_OP_NON_SHORT_CIRCUIT
-	   && lhs != 0 && rhs != 0
-	   && (code == TRUTH_ANDIF_EXPR
-	       || code == TRUTH_ORIF_EXPR)
-	   && operand_equal_p (lhs, rhs, 0))
-    {
-      /* If simple enough, just rewrite.  Otherwise, make a SAVE_EXPR
-	 unless we are at top level or LHS contains a PLACEHOLDER_EXPR, in
-	 which cases we can't do this.  */
-      if (simple_operand_p (lhs))
-	return build2_loc (loc, code == TRUTH_ANDIF_EXPR
-			   ? TRUTH_AND_EXPR : TRUTH_OR_EXPR,
-			   type, op0, op1);
-
-      else if (!lang_hooks.decls.global_bindings_p ()
-	       && !CONTAINS_PLACEHOLDER_P (lhs))
-	{
-	  tree common = save_expr (lhs);
-
-	  if (0 != (lhs = build_range_check (loc, type, common,
-					     or_op ? ! in0_p : in0_p,
-					     low0, high0))
-	      && (0 != (rhs = build_range_check (loc, type, common,
-						 or_op ? ! in1_p : in1_p,
-						 low1, high1))))
-	    {
-	      if (strict_overflow_p)
-		fold_overflow_warning (warnmsg,
-				       WARN_STRICT_OVERFLOW_COMPARISON);
-	      return build2_loc (loc, code == TRUTH_ANDIF_EXPR
-				 ? TRUTH_AND_EXPR : TRUTH_OR_EXPR,
-				 type, lhs, rhs);
-	    }
-	}
-    }
-
   return 0;
 }
 
@@ -5143,40 +5117,6 @@  fold_truth_andor_1 (location_t loc, enum
   code = ((code == TRUTH_AND_EXPR || code == TRUTH_ANDIF_EXPR)
 	  ? TRUTH_AND_EXPR : TRUTH_OR_EXPR);
 
-  /* If the RHS can be evaluated unconditionally and its operands are
-     simple, it wins to evaluate the RHS unconditionally on machines
-     with expensive branches.  In this case, this isn't a comparison
-     that can be merged.  */
-
-  if (BRANCH_COST (optimize_function_for_speed_p (cfun),
-		   false) >= 2
-      && ! FLOAT_TYPE_P (TREE_TYPE (rl_arg))
-      && simple_operand_p (rl_arg)
-      && simple_operand_p (rr_arg))
-    {
-      /* Convert (a != 0) || (b != 0) into (a | b) != 0.  */
-      if (code == TRUTH_OR_EXPR
-	  && lcode == NE_EXPR && integer_zerop (lr_arg)
-	  && rcode == NE_EXPR && integer_zerop (rr_arg)
-	  && TREE_TYPE (ll_arg) == TREE_TYPE (rl_arg)
-	  && INTEGRAL_TYPE_P (TREE_TYPE (ll_arg)))
-	return build2_loc (loc, NE_EXPR, truth_type,
-			   build2 (BIT_IOR_EXPR, TREE_TYPE (ll_arg),
-				   ll_arg, rl_arg),
-			   build_int_cst (TREE_TYPE (ll_arg), 0));
-
-      /* Convert (a == 0) && (b == 0) into (a | b) == 0.  */
-      if (code == TRUTH_AND_EXPR
-	  && lcode == EQ_EXPR && integer_zerop (lr_arg)
-	  && rcode == EQ_EXPR && integer_zerop (rr_arg)
-	  && TREE_TYPE (ll_arg) == TREE_TYPE (rl_arg)
-	  && INTEGRAL_TYPE_P (TREE_TYPE (ll_arg)))
-	return build2_loc (loc, EQ_EXPR, truth_type,
-			   build2 (BIT_IOR_EXPR, TREE_TYPE (ll_arg),
-				   ll_arg, rl_arg),
-			   build_int_cst (TREE_TYPE (ll_arg), 0));
-    }
-
   /* See if the comparisons can be merged.  Then get all the parameters for
      each side.  */
 
@@ -8406,13 +8346,12 @@  fold_truth_andor (location_t loc, enum t
   if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
     return tem;
 
-  if ((BRANCH_COST (optimize_function_for_speed_p (cfun),
-		    false) >= 2)
-      && LOGICAL_OP_NON_SHORT_CIRCUIT
-      && (code == TRUTH_AND_EXPR
-          || code == TRUTH_ANDIF_EXPR
-          || code == TRUTH_OR_EXPR
-          || code == TRUTH_ORIF_EXPR))
+  /* Try to optimize TRUTH_AND/OR[IF]_EXPRs to associative TRUTH_AND/OR_EXPRs
+     chains.  */
+  if (code == TRUTH_AND_EXPR
+      || code == TRUTH_ANDIF_EXPR
+      || code == TRUTH_OR_EXPR
+      || code == TRUTH_ORIF_EXPR)
     {
       enum tree_code ncode, icode;
 
@@ -8440,7 +8379,7 @@  fold_truth_andor (location_t loc, enum t
 	  return fold_build2_loc (loc, icode, type, TREE_OPERAND (arg0, 0),
 				  tem);
 	}
-	/* Same as abouve but for (A AND[-IF] (B AND-IF C)) -> ((A AND B) AND-IF C),
+	/* Same as above but for (A AND[-IF] (B AND-IF C)) -> ((A AND B) AND-IF C),
 	   or (A OR[-IF] (B OR-IF C) -> ((A OR B) OR-IF C).  */
       else if (TREE_CODE (arg1) == icode
 	  && simple_operand_p_2 (arg0)
Index: gcc-trunk/gcc/testsuite/gcc.dg/pr46909.c
===================================================================
--- gcc-trunk.orig/gcc/testsuite/gcc.dg/pr46909.c
+++ gcc-trunk/gcc/testsuite/gcc.dg/pr46909.c
@@ -13,7 +13,7 @@  foo (unsigned int x)
   return -1;
 }
 
-/* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) != 4" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) != 4" 0 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) != 6" 0 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) == 2" 0 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) == 6" 0 "optimized" } } */
Index: gcc-trunk/gcc/testsuite/gcc.dg/tree-ssa/vrp33.c
===================================================================
--- gcc-trunk.orig/gcc/testsuite/gcc.dg/tree-ssa/vrp33.c
+++ gcc-trunk/gcc/testsuite/gcc.dg/tree-ssa/vrp33.c
@@ -3,7 +3,14 @@ 
 
 /* This is from PR14052.  */
 
-int f2(int x) { return x == 1 || x == 3 || x == 1; }
+int f2(int x)
+{
+  if (x == 1 || x == 3)
+    return 1;
+  if (x == 1)
+    return 1;
+  return 0;
+}
 
 /* { dg-final { scan-tree-dump "Folding predicate.*== 1 to 0" "vrp1" } } */
 /* { dg-final { cleanup-tree-dump "vrp1" } } */
Index: gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost1.c
===================================================================
--- gcc-trunk.orig/gcc/testsuite/gcc.target/i386/branch-cost1.c
+++ gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost1.c
@@ -11,6 +11,7 @@  foo (int a, int b)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "if " 2 "gimple" } } */
-/* { dg-final { scan-tree-dump-not " & " "gimple" } } */
+/* { dg-final { scan-tree-dump-times "if " 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times " & " 1 "gimple" } } */
+/* { dg-final { scan-assembler-times "jne|je" 2 } } */
 /* { dg-final { cleanup-tree-dump "gimple" } } */
Index: gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost2.c
===================================================================
--- gcc-trunk.orig/gcc/testsuite/gcc.target/i386/branch-cost2.c
+++ gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost2.c
@@ -13,4 +13,5 @@  foo (int a, int b)
 
 /* { dg-final { scan-tree-dump-times "if " 1 "gimple" } } */
 /* { dg-final { scan-tree-dump-times " & " 1 "gimple" } } */
+/* { dg-final { scan-assembler-times "jne|je" 1 } } */
 /* { dg-final { cleanup-tree-dump "gimple" } } */
Index: gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost3.c
===================================================================
--- gcc-trunk.orig/gcc/testsuite/gcc.target/i386/branch-cost3.c
+++ gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost3.c
@@ -13,4 +13,5 @@  foo (_Bool a, _Bool b)
 
 /* { dg-final { scan-tree-dump-times "if " 1 "gimple" } } */
 /* { dg-final { scan-tree-dump-times " & " 1 "gimple" } } */
+/* { dg-final { scan-assembler-times "jne|je" 1 } } */
 /* { dg-final { cleanup-tree-dump "gimple" } } */
Index: gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost4.c
===================================================================
--- gcc-trunk.orig/gcc/testsuite/gcc.target/i386/branch-cost4.c
+++ gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost4.c
@@ -11,6 +11,7 @@  foo (_Bool a, _Bool b)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "if " 2 "gimple" } } */
-/* { dg-final { scan-tree-dump-not " & " "gimple" } } */
+/* { dg-final { scan-tree-dump-times "if " 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times " & " 1 "gimple" } } */
+/* { dg-final { scan-assembler-times "jne|je" 2 } } */
 /* { dg-final { cleanup-tree-dump "gimple" } } */