From patchwork Tue Nov 8 15:30:48 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kai Tietz X-Patchwork-Id: 124390 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 09B091007D3 for ; Wed, 9 Nov 2011 02:31:23 +1100 (EST) Received: (qmail 5565 invoked by alias); 8 Nov 2011 15:31:19 -0000 Received: (qmail 5552 invoked by uid 22791); 8 Nov 2011 15:31:14 -0000 X-SWARE-Spam-Status: No, hits=-7.6 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, SPF_HELO_PASS, TW_TM X-Spam-Check-By: sourceware.org Received: from mx3-phx2.redhat.com (HELO mx3-phx2.redhat.com) (209.132.183.24) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 08 Nov 2011 15:30:48 +0000 Received: from zmail14.collab.prod.int.phx2.redhat.com (zmail14.collab.prod.int.phx2.redhat.com [10.5.83.16]) by mx3-phx2.redhat.com (8.13.8/8.13.8) with ESMTP id pA8FUmkf020124; Tue, 8 Nov 2011 10:30:48 -0500 Date: Tue, 08 Nov 2011 10:30:48 -0500 (EST) From: Kai Tietz To: Richard Guenther Cc: gcc-patches@gcc.gnu.org, Jeff Law Subject: Re: [patch tree-optimization 1/2]: Branch-cost optimizations Message-ID: <0a48a15d-ab75-4254-8e04-685893442a73@zmail14.collab.prod.int.phx2.redhat.com> In-Reply-To: MIME-Version: 1.0 X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi, with not much hope that this patch gets into 4.7 version, resent revised version for the first part of patch. I updated the patch according to comments I got by matz on IRC yesterday. Patch uses now vector for collecting truth &/|'s conditional chain. Additionally it checks now consistent for VA.values before trying to access DEF_STMT. Bootstrapped and regression tested for all languages. Ok for apply? Regards, Kai Index: gcc-trunk/gcc/cfgexpand.c =================================================================== --- gcc-trunk.orig/gcc/cfgexpand.c +++ gcc-trunk/gcc/cfgexpand.c @@ -1650,6 +1650,695 @@ maybe_cleanup_end_of_block (edge e, rtx } } +/* Check if statement can be considered as a "simple" one. Simples are: + - minimal invariant + - any non-SSA_NAME veriant + - any SSA_NAME variant without a definition statement + - any SSA_NAME with default definition. + - an assignment of kind ~X, if X is minimal invariant, or has no + definition statement, We exclude here floating point types for X + and Y, as ~ (X cmp Y) can have special meaning on floats.. + - an assignment of kind X ^ ~0, if X is minimal invariant, or has no + definition statement, */ + +static bool +is_bool_op_p (tree op, bool *is_not) +{ + gimple s; + enum tree_code code; + + *is_not = false; + + /* Reject result types not of boolean kine. */ + if (TREE_CODE (TREE_TYPE (op)) != BOOLEAN_TYPE) + return false; + + if (is_gimple_min_invariant (op) + || TREE_CODE (op) != SSA_NAME) + return true; + + if ((!SA.values || !bitmap_bit_p (SA.values, SSA_NAME_VERSION (op))) + && SSA_NAME_DEF_STMT (op) + && !SSA_NAME_IS_DEFAULT_DEF (op)) + return false; + if (SSA_NAME_IS_DEFAULT_DEF (op) + || (s = SSA_NAME_DEF_STMT (op)) == NULL) + return true; + /* Reject statement which isn't of kind assign. */ + if (!is_gimple_assign (s)) + return false; + + code = gimple_assign_rhs_code (s); + + /* See if we have a "simple" logical not. */ + if (code == BIT_NOT_EXPR) + *is_not = true; + else if (code == BIT_XOR_EXPR + && integer_all_onesp (gimple_assign_rhs2 (s))) + *is_not = true; + else + return false; + + op = gimple_assign_rhs1 (s); + + if (TREE_CODE (op) != SSA_NAME) + return false; + + /* Can the inner statement be considered as + simple. */ + if (is_gimple_min_invariant (op) + || SSA_NAME_IS_DEFAULT_DEF (op) + || SSA_NAME_DEF_STMT (op) == NULL) + return true; + + *is_not = false; + return false; +} + +/* This helper routine normalizes comparisons on boolean-typed operands + for OP1 ==/!= CST. + Folded patterns are: + X ==/!= 1 -> X !=/== 0 + ~(X cmp Y) !=/== 0 -> (X cmp Y) ==/!= 0, if X and Y aren't floats. + ~(X & Y) !=/== 0 -> (X & Y) ==/!= 0 + ~(Y | Y) !=/== 0 -> (X | Y) ==/!= 0 + (X cmp Y) != 0 -> (X cmp Y) + (X cmp Y) == 0 -> (X cmp-invert Y), if X and Y aren't floats. + Note: We reject here operations on floats as pattern ~(float-compare) + can have side-effects. */ + +static void +normalize_truth_condition (enum tree_code *cmp, tree *op1, tree *op2) +{ + gimple stmt, s; + tree nop0, nop1, op; + tree type = TREE_TYPE (*op1); + enum tree_code code2; + + if (TREE_CODE (type) != BOOLEAN_TYPE + || (*cmp != EQ_EXPR && *cmp != NE_EXPR) + || TREE_CODE (*op2) != INTEGER_CST) + return; + + if (!SA.values + || TREE_CODE (*op1) != SSA_NAME + || (!bitmap_bit_p (SA.values, SSA_NAME_VERSION (*op1)) + && SSA_NAME_DEF_STMT (*op1))) + return; + + if (integer_onep (*op2)) + { + *op2 = build_int_cst (type, 0); + *cmp = (*cmp == EQ_EXPR ? NE_EXPR : EQ_EXPR); + } + + for (;;) + { + if (!SA.values + || TREE_CODE (*op1) != SSA_NAME + || !bitmap_bit_p (SA.values, SSA_NAME_VERSION (*op1))) + return; + + stmt = SSA_NAME_DEF_STMT (*op1); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + return; + + code2 = gimple_assign_rhs_code (stmt); + if (code2 != BIT_NOT_EXPR) + break; + op = gimple_assign_rhs1 (stmt); + + if (!SA.values + || TREE_CODE (op) != SSA_NAME + || !bitmap_bit_p (SA.values, SSA_NAME_VERSION (op))) + return; + + s = SSA_NAME_DEF_STMT (op); + if (!s || gimple_code (s) != GIMPLE_ASSIGN) + return; + + code2 = gimple_assign_rhs_code (s); + + if (TREE_CODE_CLASS (code2) == tcc_comparison) + { + tree t = TREE_TYPE (gimple_assign_rhs1 (s)); + + /* Don't fold ~ (X cmp Y) ==/!= 0 to (X cmp Y) ==/!= 0, + if operands of comparison are floating-points. */ + if (FLOAT_TYPE_P (t)) + return; + } + else if (code2 != BIT_AND_EXPR && code2 != BIT_IOR_EXPR) + return; + *op1 = op; + *cmp = (*cmp == EQ_EXPR ? NE_EXPR : EQ_EXPR); + } + + if (TREE_CODE_CLASS (code2) != tcc_comparison) + return; + + nop0 = gimple_assign_rhs1 (stmt); + nop1 = gimple_assign_rhs2 (stmt); + + /* Case (X cmp Y) != 0 -> X cmp Y. */ + if (*cmp == NE_EXPR) + { + *cmp = code2; + *op1 = nop0; + *op2 = nop1; + return; + } + + /* Case (X cmp Y) == 0 */ + + type = TREE_TYPE (nop0); + if (FLOAT_TYPE_P (type)) + return; + code2 = invert_tree_comparison (*cmp, + HONOR_NANS (TYPE_MODE (type))); + if (code2 == ERROR_MARK) + return; + *cmp = code2; + *op1 = nop0; + *op2 = nop1; +} + +/* Structure to keep some attributes of operands + for bitwise-and/or chain within a condition. */ + +typedef struct { + tree leaf; + enum tree_code code; + tree op2; + tree type; + bool sa_valued; + bool is_bool; + bool is_trap; + bool has_sideeffect; + bool is_bool_not; + bool is_or; + bool is_and; + bool joined; +} cond_assoc_t; + +DEF_VEC_O(cond_assoc_t); +DEF_VEC_ALLOC_O(cond_assoc_t, heap); + +/* Build up operand list in OP for bitwise operand chain of kind CODE and + store each element in CONDS, if non-NULL.. + CMP can be either EQ_EXPR or NE_EXPR, and op2 is integral zero. + Function returns the count of found elements in OP. */ +static int +collect_cond_chain (tree op, enum tree_code code, + VEC (cond_assoc_t, heap) **pconds, + enum tree_code cmp, tree op2) +{ + gimple s = NULL; + int ret = 0; + bool sa_valued; + + sa_valued = (SA.values + && TREE_CODE (op) == SSA_NAME + && (bitmap_bit_p (SA.values, + SSA_NAME_VERSION (op)) + || SSA_NAME_DEF_STMT (op) == NULL)); + if (TREE_CODE (op) != SSA_NAME + || !sa_valued + || (s = SSA_NAME_DEF_STMT (op)) == NULL + || !is_gimple_assign (s) + || gimple_assign_rhs_code (s) != code) + { + enum tree_code ncode = cmp; + tree nop2 = op2; + cond_assoc_t tmp; + + memset (&tmp, 0, sizeof (cond_assoc_t)); + normalize_truth_condition (&ncode, &op, &nop2); + + tmp.leaf = op; + tmp.code = ncode; + tmp.op2 = nop2; + tmp.type = boolean_type_node; + tmp.sa_valued = sa_valued; + + s = NULL; + if (TREE_CODE (op) != SSA_NAME + || !sa_valued + || (s = SSA_NAME_DEF_STMT (op)) == NULL + || !is_gimple_assign (s)) + s = NULL; + + if (s) + tmp.is_trap = gimple_could_trap_p (s); + if (s) + tmp.has_sideeffect = gimple_has_side_effects (s); + + if ((ncode == EQ_EXPR || ncode == NE_EXPR) + && TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE + && integer_zerop (nop2)) + { + tmp.is_bool_not = false; + tmp.is_bool = is_bool_op_p (op, &tmp.is_bool_not); + + if (!tmp.is_bool || tmp.is_trap || tmp.has_sideeffect) + { + tmp.is_bool = false; + tmp.is_bool_not = false; + } + code = ERROR_MARK; + if (s && is_gimple_assign (s) + && TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE) + code = gimple_assign_rhs_code (s); + tmp.is_or = (code == BIT_IOR_EXPR); + tmp.is_and = (code == BIT_AND_EXPR); + } + + VEC_safe_push (cond_assoc_t, heap, *pconds, &tmp); + return 1; + } + ret = collect_cond_chain (gimple_assign_rhs1 (s), code, + pconds, cmp, op2); + ret += collect_cond_chain (gimple_assign_rhs2 (s), code, + pconds, cmp, op2); + + return ret; +} + +/* This routine collects the bitwise operand chain of CODE in OP. + The found amount of elements is stored in *PCNT. + The function returns the pointer allocated buffer containing + *PCNT elements. */ + +static VEC (cond_assoc_t, heap) * +get_condition_list_for (tree op, enum tree_code code, int *pcnt, + enum tree_code cmp, tree cst) +{ + int cnt; + VEC (cond_assoc_t, heap) *conds = NULL; + cnt = collect_cond_chain (op, code, &conds, cmp, cst); + + *pcnt = cnt; + + return conds; +} + +/* Helper function to build a binary tree. + If OP0 is boolean-typed, CODE is equal to NE_EXPR, + and OP2 is zero constant, then return OP0. Otherwise + the result of build2 is returned. */ + +static tree +build_cond_expr (enum tree_code code, tree type, tree op0, tree op1) +{ + if (code == NE_EXPR && integer_zerop (op1) + && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE) + return op0; + return build2 (code, type, op0, op1); +} + +/* Deside if we can spare branch costs on combining + L and R operands for bitwise-and/or dependent on BRANCH_COST. + Function returns TRUE, if combining of L and R might be profitable, + otherwise FALSE is returned. */ + +static bool +is_bitwise_binary_simple_combine (cond_assoc_t *l, cond_assoc_t *r) +{ + bool op0_has_not = false, op1_has_not = false; + int bcosts = BRANCH_COST (optimize_insn_for_speed_p (), false); + int costs = 1; + + /* If jumps aren't cheap avoid to turn some more codes into + jumpy sequences. */ + if (bcosts > 4) + return true; + + /* We would spare one branch, but higher the count of executed + instructions. */ + if (bcosts <= 0) + return false; + + if (l->is_bool == false || r->is_bool == false) + return false; + + op0_has_not = l->is_bool_not; + op1_has_not = r->is_bool_not; + + /* If L and R have inner bit-not, then there + are no additional costs for them. */ + if (op0_has_not && op1_has_not) + op0_has_not = op1_has_not = false; + + /* If comparison codes of L and R aren't equal, then + it might be more costy to combine them, if not + on arm doesn't have an inner logical-not. */ + if (l->code != r->code) + { + if (op0_has_not) + op0_has_not = false; + else if (op1_has_not) + op1_has_not = false; + else + op0_has_not = true; + } + /* If leaf of L or R isn't boolean-typed, then we have at least + two operations to obtain result. + Each logical-not costs one more operation. */ + + if (TREE_CODE (TREE_TYPE (l->leaf)) != BOOLEAN_TYPE) + costs += 2; + else if (op0_has_not) + costs++; + + if (TREE_CODE (TREE_TYPE (r->leaf)) != BOOLEAN_TYPE) + costs += 2; + else if (op1_has_not) + costs++; + + if (bcosts < costs) + return false; + + return true; +} + +/* Obtain some characteristics on comparison of intergral typed + arguments and determine if we have here a simple combinable + operand. + In PREFIX_NOT the value TRUE is stored, if operand CA contains + a value-invert operation. + In CMP_ZERO the value TRUE is stored, if operand CA is a comparison + to integral zero. + This function returns TRUE, if a possible candidate was matched, + otherwise it returns FALSE. */ + +static bool +preeval_cond_integral (const cond_assoc_t *ca, bool *prefix_not, bool *cmp_zero) +{ + gimple s = NULL; + tree o; + + if (ca->joined == true + || ca->is_bool + || ca->is_trap || ca->has_sideeffect + || ca->sa_valued == false + || ca->is_and || ca->is_or) + return false; + + /* Reject anything different than X !=/== 0 and X !=/== ~0. */ + if ((ca->code != EQ_EXPR && ca->code != NE_EXPR) + || !INTEGRAL_TYPE_P (TREE_TYPE (ca->leaf)) + || TREE_CODE (TREE_TYPE (ca->leaf)) == BOOLEAN_TYPE + || (!integer_zerop (ca->op2) && !integer_all_onesp (ca->op2))) + return false; + + if (TREE_CODE (ca->leaf) != SSA_NAME) + return false; + *prefix_not = false; + *cmp_zero = integer_zerop (ca->op2); + + s = SSA_NAME_DEF_STMT (ca->leaf); + if (!s || SSA_NAME_IS_DEFAULT_DEF (ca->leaf)) + return true; + + /* See if we have a valid ~X pattern in left-hand comparison + operand. */ + if (!is_gimple_assign (s) + || gimple_assign_rhs_code (s) != BIT_NOT_EXPR) + return false; + + o = gimple_assign_rhs1 (s); + + if (TREE_CODE (o) != SSA_NAME + || !SA.values + || (!bitmap_bit_p (SA.values, SSA_NAME_VERSION (o)) + && SSA_NAME_DEF_STMT (o) != NULL)) + return false; + + *prefix_not = true; + s = SSA_NAME_DEF_STMT (o); + if (!s || SSA_NAME_IS_DEFAULT_DEF (o)) + return true; + *prefix_not = false; + + return false; +} + +/* Helper routine to do branch-cost optimization for operand-list + in CA with count CA_COUNT. The bitwise-binary operation BITCODE needs + to be toggled between and/or, if INVERTED is TRUE. + In PCODE the final binary result-code is stored. In POP0 and POP1 + its final operands. */ + +static void +combine_conds (VEC (cond_assoc_t, heap) *ca, int ca_count, bool inverted, + enum tree_code bitcode, enum tree_code *pcode, tree *pop0, tree *pop1) +{ + VEC (cond_assoc_t, heap) *sub_ca; + int sub_ca_count, i, l; + tree collect = NULL_TREE, tem; + enum tree_code chain_bitwise = bitcode; + enum tree_code chain_logical; + cond_assoc_t *ca_i, *ca_l; + + if (inverted) + chain_bitwise = (chain_bitwise == BIT_AND_EXPR ? BIT_IOR_EXPR + : BIT_AND_EXPR); + chain_logical = (chain_bitwise == BIT_AND_EXPR ? TRUTH_ANDIF_EXPR + : TRUTH_ORIF_EXPR); + + /* First try to combine the comparisons on integral-typed arguments, which + aren't boolean-typed. */ + for (i = 0; i < ca_count; i++) + { + tree o1, o2; + bool op1_not = false, op2_not = false; + bool op1_cmp_zero = false, op2_cmp_zero = false; + int bcosts = BRANCH_COST (optimize_insn_for_speed_p (), false); + + if (bcosts < 2) + break; + ca_i = VEC_index (cond_assoc_t, ca, i); + if (!preeval_cond_integral (ca_i, &op1_not, &op1_cmp_zero)) + continue; + + if ((chain_bitwise == BIT_IOR_EXPR && ca_i->code != NE_EXPR) + || (chain_bitwise == BIT_AND_EXPR && ca_i->code != EQ_EXPR)) + continue; + + for (l = i + 1; l < ca_count; l++) + { + tree p1; + + ca_l = VEC_index (cond_assoc_t, ca, l); + if (!preeval_cond_integral (ca_l, &op2_not, &op2_cmp_zero)) + continue; + if (ca_i->code != ca_l->code) + continue; + + if (TREE_TYPE (ca_i->leaf) != TREE_TYPE (ca_l->leaf)) + continue; + + o1 = ca_i->leaf; + o2 = ca_l->leaf; + p1 = ca_i->op2; + + if (op1_cmp_zero == op2_cmp_zero) + { + if (op2_not && op1_not) + { + o1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (o1)); + p1 = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (p1), p1); + o2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (o2)); + op1_cmp_zero = !op1_cmp_zero; + } + else if ((op1_not || op2_not) && bcosts <= 2) + continue; + } + else if (!op1_not && !op2_not) + continue; + else if (op1_not) + { + if (op2_not && bcosts <= 2) + continue; + o1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (o1)); + p1 = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (p1), p1); + op1_cmp_zero = !op1_cmp_zero; + } + else + o2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (o2)); + + if (op1_cmp_zero) + ca_i->leaf = build2 (BIT_IOR_EXPR, TREE_TYPE (o1), o1, o2); + else + ca_i->leaf = build2 (BIT_AND_EXPR, TREE_TYPE (o1), o1, o2); + ca_i->op2 = p1; + ca_l->joined = true; + break; + } + } + + /* Now try to combine comparisons on boolean-typed arguments and do + operations for inner bitwise and/or chains. */ + for (i = 0; i < ca_count; i++) + { + ca_i = VEC_index (cond_assoc_t, ca, i); + if (ca_i->joined == true) + continue; + + if (ca_i->is_and || ca_i->is_or) + { + enum tree_code sub_code; + tree op0; + tree cst = ca_i->op2; + enum tree_code ao_code = ca_i->code; + + sub_ca = get_condition_list_for (ca_i->leaf, + (ca_i->is_and ? BIT_AND_EXPR : BIT_IOR_EXPR), + &sub_ca_count, + ao_code, cst); + combine_conds (sub_ca, sub_ca_count, (ao_code == EQ_EXPR), + (ca_i->is_and ? BIT_AND_EXPR : BIT_IOR_EXPR), + &sub_code, &op0, &cst); + VEC_free (cond_assoc_t, heap, sub_ca); + sub_ca = NULL; + tem = build_cond_expr (sub_code, ca_i->type, op0, cst); + ca_i->joined = true; + } + else + { + ca_i->joined = true; + tem = NULL_TREE; + if (ca_i->is_bool) + { + for (l = i + 1; l < ca_count; l++) + { + ca_l = VEC_index (cond_assoc_t, ca, l); + if (ca_l->joined == true || ca_l->is_bool == false) + continue; + if (is_bitwise_binary_simple_combine (ca_i, ca_l)) + break; + } + if (l < ca_count) + { + tree tem2; + enum tree_code bitw = chain_bitwise; + + ca_l = VEC_index (cond_assoc_t, ca, l); + /* ~X == 0 -> X != 0, ~X != 0 -> X == 0 */ + if (ca_i->is_bool_not && ca_l->is_bool_not) + { + gimple s = SSA_NAME_DEF_STMT (ca_i->leaf); + ca_i->leaf = gimple_assign_rhs1 (s); + ca_i->code = (ca_i->code == EQ_EXPR ? NE_EXPR : EQ_EXPR); + s = SSA_NAME_DEF_STMT (ca_l->leaf); + ca_l->leaf = gimple_assign_rhs1 (s); + ca_l->code = (ca_l->code == EQ_EXPR ? NE_EXPR : EQ_EXPR); + } + if (ca_i->code != ca_l->code) + { + gimple s; + + if (ca_i->is_bool_not) + { + s = SSA_NAME_DEF_STMT (ca_i->leaf); + ca_i->leaf = gimple_assign_rhs1 (s); + ca_i->code = (ca_i->code == EQ_EXPR ? NE_EXPR : EQ_EXPR); + } + else + { + s = SSA_NAME_DEF_STMT (ca_l->leaf); + ca_l->leaf = gimple_assign_rhs1 (s); + ca_l->code = (ca_l->code == EQ_EXPR ? NE_EXPR : EQ_EXPR); + } + } + /* (X == 0 op Y == 0) != 0 -> (X op-inv Y) == 0. */ + if (ca_i->code == EQ_EXPR && ca_l->code == EQ_EXPR) + { + bitw = (bitw == BIT_AND_EXPR ? BIT_IOR_EXPR + : BIT_AND_EXPR); + tem = build_cond_expr (NE_EXPR, ca_i->type, + ca_i->leaf, ca_i->op2); + tem2 = build_cond_expr (NE_EXPR, ca_l->type, + ca_l->leaf, ca_l->op2); + tem = build_cond_expr (bitw, + TREE_TYPE (tem), tem, tem2); + tem = build_cond_expr (EQ_EXPR, ca_i->type, tem, + ca_i->op2); + } + else + { + tem = build_cond_expr (ca_i->code, ca_i->type, + ca_i->leaf, ca_i->op2); + tem2 = build_cond_expr (ca_l->code, ca_l->type, + ca_l->leaf, ca_l->op2); + tem = build_cond_expr (bitw, + TREE_TYPE (tem), tem, tem2); + } + ca_l->joined = true; + } + } + + if (!tem) + tem = build_cond_expr (ca_i->code, ca_i->type, ca_i->leaf, ca_i->op2); + } + + if (!collect) + collect = tem; + else + collect = build2 (chain_logical, TREE_TYPE (collect), collect, tem); + } + + *pcode = TREE_CODE (collect); + *pop0 = TREE_OPERAND (collect, 0); + *pop1 = TREE_OPERAND (collect, 1); +} + +/* Helper routine to do branch-cost optimization on binary expression + *POP0 *PCODE *POP1. */ + +static void +branchcost_optimization_on_conditions (enum tree_code *pcode, tree *pop0, + tree *pop1) +{ + tree op0 = *pop0; + tree op1 = *pop1; + tree type = TREE_TYPE (op0); + enum tree_code code = *pcode; + gimple stmt; + bool invert = false; + VEC (cond_assoc_t, heap) *ca = NULL; + int ca_count; + + if (TREE_CODE (type) != BOOLEAN_TYPE + || !integer_zerop (op1) + || (code != EQ_EXPR && code != NE_EXPR)) + return; + + if (!SA.values + || TREE_CODE (op0) != SSA_NAME + || !bitmap_bit_p (SA.values, SSA_NAME_VERSION (op0))) + return; + + invert = (code == EQ_EXPR); + stmt = SSA_NAME_DEF_STMT (op0); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + return; + switch (gimple_assign_rhs_code (stmt)) + { + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + break; + default: + return; + } + + ca = get_condition_list_for (op0, gimple_assign_rhs_code (stmt), + &ca_count, code, op1); + combine_conds (ca, ca_count, invert, gimple_assign_rhs_code (stmt), + pcode, pop0, pop1); + VEC_free (cond_assoc_t, heap, ca); +} + /* A subroutine of expand_gimple_basic_block. Expand one GIMPLE_COND. Returns a new basic block if we've terminated the current basic block and created a new one. */ @@ -1668,6 +2357,8 @@ expand_gimple_cond (basic_block bb, gimp code = gimple_cond_code (stmt); op0 = gimple_cond_lhs (stmt); op1 = gimple_cond_rhs (stmt); + + normalize_truth_condition (&code, &op0, &op1); /* We're sometimes presented with such code: D.123_1 = x < y; if (D.123_1 != 0) @@ -1676,44 +2367,16 @@ expand_gimple_cond (basic_block bb, gimp be cleaned up by combine. But some pattern matchers like if-conversion work better when there's only one compare, so make up for this here as special exception if TER would have made the same change. */ - if (gimple_cond_single_var_p (stmt) - && SA.values - && TREE_CODE (op0) == SSA_NAME - && bitmap_bit_p (SA.values, SSA_NAME_VERSION (op0))) - { - gimple second = SSA_NAME_DEF_STMT (op0); - if (gimple_code (second) == GIMPLE_ASSIGN) - { - enum tree_code code2 = gimple_assign_rhs_code (second); - if (TREE_CODE_CLASS (code2) == tcc_comparison) - { - code = code2; - op0 = gimple_assign_rhs1 (second); - op1 = gimple_assign_rhs2 (second); - } - /* If jumps are cheap turn some more codes into - jumpy sequences. */ - else if (BRANCH_COST (optimize_insn_for_speed_p (), false) < 4) - { - if ((code2 == BIT_AND_EXPR - && TYPE_PRECISION (TREE_TYPE (op0)) == 1 - && TREE_CODE (gimple_assign_rhs2 (second)) != INTEGER_CST) - || code2 == TRUTH_AND_EXPR) - { - code = TRUTH_ANDIF_EXPR; - op0 = gimple_assign_rhs1 (second); - op1 = gimple_assign_rhs2 (second); - } - else if (code2 == BIT_IOR_EXPR || code2 == TRUTH_OR_EXPR) - { - code = TRUTH_ORIF_EXPR; - op0 = gimple_assign_rhs1 (second); - op1 = gimple_assign_rhs2 (second); - } - } - } - } + branchcost_optimization_on_conditions (&code, &op0, &op1); + /* Take care that final statement is a comparison, if we end + up with an AND or OR associative statement. */ + if (code == BIT_AND_EXPR || code == BIT_IOR_EXPR) + { + op0 = build2 (code, TREE_TYPE (op0), op0, op1); + code = NE_EXPR; + op1 = build_int_cst (TREE_TYPE (op0), 0); + } last2 = last = get_last_insn (); extract_true_false_edges_from_block (bb, &true_edge, &false_edge); Index: gcc-trunk/gcc/dojump.c =================================================================== --- gcc-trunk.orig/gcc/dojump.c +++ gcc-trunk/gcc/dojump.c @@ -579,13 +579,12 @@ do_jump (tree exp, rtx if_false_label, r goto normal; /* Boolean comparisons can be compiled as TRUTH_AND_EXPR. */ - case TRUTH_AND_EXPR: /* High branch cost, expand as the bitwise AND of the conditions. Do the same if the RHS has side effects, because we're effectively turning a TRUTH_AND_EXPR into a TRUTH_ANDIF_EXPR. */ if (BRANCH_COST (optimize_insn_for_speed_p (), - false) >= 4 + false) >= 1 || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1))) goto normal; code = TRUTH_ANDIF_EXPR; @@ -596,7 +595,7 @@ do_jump (tree exp, rtx if_false_label, r /* High branch cost, expand as the bitwise OR of the conditions. Do the same if the RHS has side effects, because we're effectively turning a TRUTH_OR_EXPR into a TRUTH_ORIF_EXPR. */ - if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 4 + if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 1 || TREE_SIDE_EFFECTS (TREE_OPERAND (exp, 1))) goto normal; code = TRUTH_ORIF_EXPR; Index: gcc-trunk/gcc/fold-const.c =================================================================== --- gcc-trunk.orig/gcc/fold-const.c +++ gcc-trunk/gcc/fold-const.c @@ -3711,13 +3711,26 @@ simple_operand_p_2 (tree exp) code = TREE_CODE (exp); - if (TREE_CODE_CLASS (code) == tcc_comparison) - return (simple_operand_p (TREE_OPERAND (exp, 0)) - && simple_operand_p (TREE_OPERAND (exp, 1))); + if (code == TRUTH_ANDIF_EXPR || code == TRUTH_ORIF_EXPR) + return false; + + /* We need to recurse here tree for two reasons. First is that + simple_operand_p would reject too much binary/unary/expression-kind + expression. Secondly, tree_could_trap_p doesn't inspect all kind + of binary/expression/unary-expression and so misses some side-effects. */ + + if (TREE_CODE_CLASS (code) == tcc_binary + || TREE_CODE_CLASS (code) == tcc_comparison + || code == TRUTH_AND_EXPR || code == TRUTH_OR_EXPR + || code == TRUTH_XOR_EXPR) + return (simple_operand_p_2 (TREE_OPERAND (exp, 0)) + && simple_operand_p_2 (TREE_OPERAND (exp, 1))); - if (code == TRUTH_NOT_EXPR) + if (TREE_CODE_CLASS (code) == tcc_unary + || code == TRUTH_NOT_EXPR) return simple_operand_p_2 (TREE_OPERAND (exp, 0)); + /* This function is in some cases too strict. */ return simple_operand_p (exp); } @@ -4875,45 +4888,6 @@ fold_range_test (location_t loc, enum tr return or_op ? invert_truthvalue_loc (loc, tem) : tem; } - /* On machines where the branch cost is expensive, if this is a - short-circuited branch and the underlying object on both sides - is the same, make a non-short-circuit operation. */ - else if (LOGICAL_OP_NON_SHORT_CIRCUIT - && lhs != 0 && rhs != 0 - && (code == TRUTH_ANDIF_EXPR - || code == TRUTH_ORIF_EXPR) - && operand_equal_p (lhs, rhs, 0)) - { - /* If simple enough, just rewrite. Otherwise, make a SAVE_EXPR - unless we are at top level or LHS contains a PLACEHOLDER_EXPR, in - which cases we can't do this. */ - if (simple_operand_p (lhs)) - return build2_loc (loc, code == TRUTH_ANDIF_EXPR - ? TRUTH_AND_EXPR : TRUTH_OR_EXPR, - type, op0, op1); - - else if (!lang_hooks.decls.global_bindings_p () - && !CONTAINS_PLACEHOLDER_P (lhs)) - { - tree common = save_expr (lhs); - - if (0 != (lhs = build_range_check (loc, type, common, - or_op ? ! in0_p : in0_p, - low0, high0)) - && (0 != (rhs = build_range_check (loc, type, common, - or_op ? ! in1_p : in1_p, - low1, high1)))) - { - if (strict_overflow_p) - fold_overflow_warning (warnmsg, - WARN_STRICT_OVERFLOW_COMPARISON); - return build2_loc (loc, code == TRUTH_ANDIF_EXPR - ? TRUTH_AND_EXPR : TRUTH_OR_EXPR, - type, lhs, rhs); - } - } - } - return 0; } @@ -5143,40 +5117,6 @@ fold_truth_andor_1 (location_t loc, enum code = ((code == TRUTH_AND_EXPR || code == TRUTH_ANDIF_EXPR) ? TRUTH_AND_EXPR : TRUTH_OR_EXPR); - /* If the RHS can be evaluated unconditionally and its operands are - simple, it wins to evaluate the RHS unconditionally on machines - with expensive branches. In this case, this isn't a comparison - that can be merged. */ - - if (BRANCH_COST (optimize_function_for_speed_p (cfun), - false) >= 2 - && ! FLOAT_TYPE_P (TREE_TYPE (rl_arg)) - && simple_operand_p (rl_arg) - && simple_operand_p (rr_arg)) - { - /* Convert (a != 0) || (b != 0) into (a | b) != 0. */ - if (code == TRUTH_OR_EXPR - && lcode == NE_EXPR && integer_zerop (lr_arg) - && rcode == NE_EXPR && integer_zerop (rr_arg) - && TREE_TYPE (ll_arg) == TREE_TYPE (rl_arg) - && INTEGRAL_TYPE_P (TREE_TYPE (ll_arg))) - return build2_loc (loc, NE_EXPR, truth_type, - build2 (BIT_IOR_EXPR, TREE_TYPE (ll_arg), - ll_arg, rl_arg), - build_int_cst (TREE_TYPE (ll_arg), 0)); - - /* Convert (a == 0) && (b == 0) into (a | b) == 0. */ - if (code == TRUTH_AND_EXPR - && lcode == EQ_EXPR && integer_zerop (lr_arg) - && rcode == EQ_EXPR && integer_zerop (rr_arg) - && TREE_TYPE (ll_arg) == TREE_TYPE (rl_arg) - && INTEGRAL_TYPE_P (TREE_TYPE (ll_arg))) - return build2_loc (loc, EQ_EXPR, truth_type, - build2 (BIT_IOR_EXPR, TREE_TYPE (ll_arg), - ll_arg, rl_arg), - build_int_cst (TREE_TYPE (ll_arg), 0)); - } - /* See if the comparisons can be merged. Then get all the parameters for each side. */ @@ -8406,13 +8346,12 @@ fold_truth_andor (location_t loc, enum t if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0) return tem; - if ((BRANCH_COST (optimize_function_for_speed_p (cfun), - false) >= 2) - && LOGICAL_OP_NON_SHORT_CIRCUIT - && (code == TRUTH_AND_EXPR - || code == TRUTH_ANDIF_EXPR - || code == TRUTH_OR_EXPR - || code == TRUTH_ORIF_EXPR)) + /* Try to optimize TRUTH_AND/OR[IF]_EXPRs to associative TRUTH_AND/OR_EXPRs + chains. */ + if (code == TRUTH_AND_EXPR + || code == TRUTH_ANDIF_EXPR + || code == TRUTH_OR_EXPR + || code == TRUTH_ORIF_EXPR) { enum tree_code ncode, icode; @@ -8440,7 +8379,7 @@ fold_truth_andor (location_t loc, enum t return fold_build2_loc (loc, icode, type, TREE_OPERAND (arg0, 0), tem); } - /* Same as abouve but for (A AND[-IF] (B AND-IF C)) -> ((A AND B) AND-IF C), + /* Same as above but for (A AND[-IF] (B AND-IF C)) -> ((A AND B) AND-IF C), or (A OR[-IF] (B OR-IF C) -> ((A OR B) OR-IF C). */ else if (TREE_CODE (arg1) == icode && simple_operand_p_2 (arg0) Index: gcc-trunk/gcc/testsuite/gcc.dg/pr46909.c =================================================================== --- gcc-trunk.orig/gcc/testsuite/gcc.dg/pr46909.c +++ gcc-trunk/gcc/testsuite/gcc.dg/pr46909.c @@ -13,7 +13,7 @@ foo (unsigned int x) return -1; } -/* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) != 4" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) != 4" 0 "optimized" } } */ /* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) != 6" 0 "optimized" } } */ /* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) == 2" 0 "optimized" } } */ /* { dg-final { scan-tree-dump-times "x_\[0-9\]+\\(D\\) == 6" 0 "optimized" } } */ Index: gcc-trunk/gcc/testsuite/gcc.dg/tree-ssa/vrp33.c =================================================================== --- gcc-trunk.orig/gcc/testsuite/gcc.dg/tree-ssa/vrp33.c +++ gcc-trunk/gcc/testsuite/gcc.dg/tree-ssa/vrp33.c @@ -3,7 +3,14 @@ /* This is from PR14052. */ -int f2(int x) { return x == 1 || x == 3 || x == 1; } +int f2(int x) +{ + if (x == 1 || x == 3) + return 1; + if (x == 1) + return 1; + return 0; +} /* { dg-final { scan-tree-dump "Folding predicate.*== 1 to 0" "vrp1" } } */ /* { dg-final { cleanup-tree-dump "vrp1" } } */ Index: gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost1.c =================================================================== --- gcc-trunk.orig/gcc/testsuite/gcc.target/i386/branch-cost1.c +++ gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost1.c @@ -11,6 +11,7 @@ foo (int a, int b) return 0; } -/* { dg-final { scan-tree-dump-times "if " 2 "gimple" } } */ -/* { dg-final { scan-tree-dump-not " & " "gimple" } } */ +/* { dg-final { scan-tree-dump-times "if " 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times " & " 1 "gimple" } } */ +/* { dg-final { scan-assembler-times "jne|je" 2 } } */ /* { dg-final { cleanup-tree-dump "gimple" } } */ Index: gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost2.c =================================================================== --- gcc-trunk.orig/gcc/testsuite/gcc.target/i386/branch-cost2.c +++ gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost2.c @@ -13,4 +13,5 @@ foo (int a, int b) /* { dg-final { scan-tree-dump-times "if " 1 "gimple" } } */ /* { dg-final { scan-tree-dump-times " & " 1 "gimple" } } */ +/* { dg-final { scan-assembler-times "jne|je" 1 } } */ /* { dg-final { cleanup-tree-dump "gimple" } } */ Index: gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost3.c =================================================================== --- gcc-trunk.orig/gcc/testsuite/gcc.target/i386/branch-cost3.c +++ gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost3.c @@ -13,4 +13,5 @@ foo (_Bool a, _Bool b) /* { dg-final { scan-tree-dump-times "if " 1 "gimple" } } */ /* { dg-final { scan-tree-dump-times " & " 1 "gimple" } } */ +/* { dg-final { scan-assembler-times "jne|je" 1 } } */ /* { dg-final { cleanup-tree-dump "gimple" } } */ Index: gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost4.c =================================================================== --- gcc-trunk.orig/gcc/testsuite/gcc.target/i386/branch-cost4.c +++ gcc-trunk/gcc/testsuite/gcc.target/i386/branch-cost4.c @@ -11,6 +11,7 @@ foo (_Bool a, _Bool b) return 0; } -/* { dg-final { scan-tree-dump-times "if " 2 "gimple" } } */ -/* { dg-final { scan-tree-dump-not " & " "gimple" } } */ +/* { dg-final { scan-tree-dump-times "if " 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times " & " 1 "gimple" } } */ +/* { dg-final { scan-assembler-times "jne|je" 2 } } */ /* { dg-final { cleanup-tree-dump "gimple" } } */