[SH] PR 51244 - Fix defects introduced in 4.8

Message ID	1380877594.4778.23.camel@yam-132-YW-E178-FTW
State	New
Headers	show Return-Path: <gcc-patches-return-350460-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:subject:from:to:date:content-type:mime-version; q= dns; s=default; b=w3nolon54/pDE40qqKnW6oFDnHubXW3psb7eLBqs+xZOwl SojAM3Dz6FkBPxBs+U2Y5JKiqPWjd+tHWgZOwu+godUnHOTUth+Z/roCsIvJvR01 MGnCm+jpv2JCvWaIc1CqkNtuHo/dF1wrgOHIPTJFncOCY5QJO3zA3cHh7U+LM= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Message-ID: <1380877594.4778.23.camel@yam-132-YW-E178-FTW> Subject: [SH] PR 51244 - Fix defects introduced in 4.8 From: Oleg Endo <oleg.endo@t-online.de> To: gcc-patches <gcc-patches@gcc.gnu.org> Date: Fri, 04 Oct 2013 11:06:34 +0200 Content-Type: multipart/mixed; boundary="=-6K4YBDQ8tJ1ryv9LzjTh" Mime-Version: 1.0

Index: gcc/config.gcc =================================================================== --- gcc/config.gcc (revision 202876) +++ gcc/config.gcc (working copy) @@ -462,6 +462,7 @@ cpu_type=sh need_64bit_hwint=yes extra_options="${extra_options} fused-madd.opt" + extra_objs="${extra_objs} ifcvt_sh.o" ;; v850*-*-*) cpu_type=v850 Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 202876) +++ gcc/config/sh/sh.c (working copy) @@ -53,6 +53,9 @@ #include "alloc-pool.h" #include "tm-constrs.h" #include "opts.h" +#include "tree-pass.h" +#include "pass_manager.h" +#include "context.h" #include <sstream> #include <vector> @@ -311,6 +314,7 @@ static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool); static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&, enum machine_mode, bool); +static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2); static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED; @@ -587,6 +591,9 @@ #undef TARGET_CANONICALIZE_COMPARISON #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison +#undef TARGET_FIXED_CONDITION_CODE_REGS +#define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs + /* Machine-specific symbol_ref flags. */ #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0) @@ -710,6 +717,34 @@ #undef err_ret } +/* Register SH specific RTL passes. */ +extern opt_pass* make_pass_ifcvt_sh (gcc::context* ctx, bool split_insns, + const char* name); +static void +register_sh_passes (void) +{ + if (!TARGET_SH1) + return; + +/* Running the ifcvt_sh pass after ce1 generates better code when + comparisons are combined and reg-reg moves are introduced, because + reg-reg moves will be eliminated afterwards. However, there are quite + some cases where combine will be unable to fold comparison related insns, + thus for now don't do it. + register_pass (make_pass_ifcvt_sh (g, false, "ifcvt1_sh"), + PASS_POS_INSERT_AFTER, "ce1", 1); +*/ + + /* Run ifcvt_sh pass after combine but before register allocation. */ + register_pass (make_pass_ifcvt_sh (g, true, "ifcvt2_sh"), + PASS_POS_INSERT_AFTER, "split1", 1); + + /* Run ifcvt_sh pass after register allocation and basic block reordering + as this sometimes creates new opportunities. */ + register_pass (make_pass_ifcvt_sh (g, true, "ifcvt3_sh"), + PASS_POS_INSERT_AFTER, "split4", 1); +} + /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override various options, and do some machine dependent initialization. */ static void @@ -1022,6 +1057,8 @@ target CPU. */ selected_atomic_model_ = parse_validate_atomic_model_option (sh_atomic_model_str); + + register_sh_passes (); } /* Print the operand address in x to the stream. */ @@ -1908,7 +1945,7 @@ static void sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1, enum machine_mode mode, - bool op0_preserve_value ATTRIBUTE_UNUSED) + bool op0_preserve_value) { /* When invoked from within the combine pass the mode is not specified, so try to get it from one of the operands. */ @@ -1928,6 +1965,9 @@ // Make sure that the constant operand is the second operand. if (CONST_INT_P (op0) && !CONST_INT_P (op1)) { + if (op0_preserve_value) + return; + std::swap (op0, op1); cmp = swap_condition (cmp); } @@ -2016,6 +2056,14 @@ *code = (int)tmp_code; } +bool +sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2) +{ + *p1 = T_REG; + *p2 = INVALID_REGNUM; + return true; +} + enum rtx_code prepare_cbranch_operands (rtx *operands, enum machine_mode mode, enum rtx_code comparison) Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 202876) +++ gcc/config/sh/sh.md (working copy) @@ -8419,89 +8419,32 @@ return output_branch (sh_eval_treg_value (operands[1]), insn, operands); } "&& 1" - [(set (pc) (if_then_else (eq (reg:SI T_REG) (match_dup 2)) - (label_ref (match_dup 0)) - (pc)))] + [(const_int 0)] { - /* Try to find missed test and branch combine opportunities which result - in redundant T bit tests before conditional branches. - This is done not only after combine (and before reload) but in every - split pass, because some opportunities are formed also after combine. - FIXME: Probably this would not be needed if CCmode was used - together with TARGET_FIXED_CONDITION_CODE_REGS. */ + /* Try to canonicalize the branch condition if it is not one of: + (ne (reg:SI T_REG) (const_int 0)) + (eq (reg:SI T_REG) (const_int 0)) - const int treg_value = sh_eval_treg_value (operands[1]); - operands[2] = NULL_RTX; + Instead of splitting out a new insn, we modify the current insn's + operands as needed. This preserves things such as REG_DEAD notes. */ - /* Scan the insns backwards for an insn that sets the T bit by testing a - reg against zero like: - (set (reg T_REG) (eq (reg) (const_int 0))) */ - rtx testing_insn = NULL_RTX; - rtx tested_reg = NULL_RTX; + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && REG_P (XEXP (operands[1], 0)) && REGNO (XEXP (operands[1], 0)) == T_REG + && XEXP (operands[1], 1) == const0_rtx) + DONE; - set_of_reg s0 = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn, - prev_nonnote_insn_bb); - if (s0.set_src != NULL_RTX - && GET_CODE (s0.set_src) == EQ - && REG_P (XEXP (s0.set_src, 0)) - && satisfies_constraint_Z (XEXP (s0.set_src, 1))) - { - testing_insn = s0.insn; - tested_reg = XEXP (s0.set_src, 0); - } - else - FAIL; + int branch_cond = sh_eval_treg_value (operands[1]); + rtx new_cond_rtx = NULL_RTX; - /* Continue scanning the insns backwards and try to find the insn that - sets the tested reg which we found above. If the reg is set by storing - the T bit or the negated T bit we can eliminate the test insn before - the branch. Notice that the branch condition has to be inverted if the - test is eliminated. */ + if (branch_cond == 0) + new_cond_rtx = gen_rtx_EQ (VOIDmode, get_t_reg_rtx (), const0_rtx); + else if (branch_cond == 1) + new_cond_rtx = gen_rtx_NE (VOIDmode, get_t_reg_rtx (), const0_rtx); - /* If the T bit is used between the testing insn and the brach insn - leave it alone. */ - if (reg_used_between_p (get_t_reg_rtx (), testing_insn, curr_insn)) - FAIL; - - while (true) - { - /* It's not safe to go beyond the current basic block after reload. */ - set_of_reg s1 = sh_find_set_of_reg (tested_reg, s0.insn, - reload_completed - ? prev_nonnote_insn_bb - : prev_nonnote_insn); - if (s1.set_src == NULL_RTX) - break; - - if (t_reg_operand (s1.set_src, VOIDmode)) - operands[2] = GEN_INT (treg_value ^ 1); - else if (negt_reg_operand (s1.set_src, VOIDmode)) - operands[2] = GEN_INT (treg_value); - else if (REG_P (s1.set_src)) - { - /* If it's a reg-reg copy follow the copied reg. This can - happen e.g. when T bit store zero-extensions are - eliminated. */ - tested_reg = s1.set_src; - s0.insn = s1.insn; - continue; - } - - /* It's only safe to remove the testing insn if the T bit is not - modified between the testing insn and the insn that stores the - T bit. Notice that some T bit stores such as negc also modify - the T bit. */ - if (modified_between_p (get_t_reg_rtx (), s1.insn, testing_insn) - || modified_in_p (get_t_reg_rtx (), s1.insn)) - operands[2] = NULL_RTX; - - break; - } - - if (operands[2] == NULL_RTX) - FAIL; - - set_insn_deleted (testing_insn); + if (new_cond_rtx != NULL_RTX) + validate_change (curr_insn, &XEXP (XEXP (PATTERN (curr_insn), 1), 0), + new_cond_rtx, false); + DONE; } [(set_attr "type" "cbranch")]) @@ -11480,10 +11423,12 @@ ;; multiple insns like: ;; movt Rn ;; tst Rn,Rn +;; This requires an additional pseudo. The SH specific ifcvt_sh pass will +;; look for this insn. Disallow using it if pseudos can't be created. (define_insn_and_split "nott" [(set (reg:SI T_REG) - (xor:SI (match_operand:SI 0 "t_reg_operand" "") (const_int 1)))] - "TARGET_SH1" + (xor:SI (match_operand:SI 0 "t_reg_operand") (const_int 1)))] + "TARGET_SH2A || (TARGET_SH1 && can_create_pseudo_p ())" { gcc_assert (TARGET_SH2A); return "nott"; Index: gcc/config/sh/t-sh =================================================================== --- gcc/config/sh/t-sh (revision 202876) +++ gcc/config/sh/t-sh (working copy) @@ -21,6 +21,10 @@ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/sh/sh-c.c +ifcvt_sh.o: $(srcdir)/config/sh/ifcvt_sh.cc \ + $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG)) OTHER_ENDIAN = $(word 2,$(TM_ENDIAN_CONFIG)) Index: gcc/config/sh/ifcvt_sh.cc =================================================================== --- gcc/config/sh/ifcvt_sh.cc (revision 0) +++ gcc/config/sh/ifcvt_sh.cc (revision 0) @@ -0,0 +1,1489 @@ +/* An SH specific if-conversion RTL pass that tries to combine comparisons + and redundant condition code register stores across multiple basic blocks. + Copyright (C) 2013 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "machmode.h" +#include "basic-block.h" +#include "df.h" +#include "rtl.h" +#include "insn-config.h" +#include "insn-codes.h" +#include "emit-rtl.h" +#include "recog.h" +#include "tree-pass.h" +#include "target.h" +#include "expr.h" + +#include <algorithm> +#include <list> +#include <vector> + +/* +This pass tries to optimize for example this: + mov.l @(4,r4),r1 + tst r1,r1 + movt r1 + tst r1,r1 + bt/s .L5 + +into something simpler: + mov.l @(4,r4),r1 + tst r1,r1 + bf/s .L5 + +Such sequences can be identified by looking for conditional branches and +checking whether the ccreg is set before the conditional branch +by testing another register for != 0, which was set by a ccreg store. +This can be optimized by eliminating the redundant comparison and +inverting the branch condition. There can be multiple comparisons in +different basic blocks that all end up in the redunant test insn before the +conditional branch. Some example RTL ... + +Example 1) +---------- + +[bb 3] +(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 0))) +(set (reg:SI 167) (xor:SI (reg:SI 147 t) (const_int 1))) +-> bb 5 + +[bb 4] +(set (reg:SI 147 t) (eq:SI (reg:SI 177) (const_int 0))) +(set (reg:SI 167) (reg:SI 147 t)) +-> bb 5 + +[bb 5] +(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0))) +(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0)) + (label_ref:SI 50) (pc))) + +In [bb 4] elimination of the comparison would require inversion of the branch +condition and compensation of other BBs. +Instead an inverting reg-move can be used: + +[bb 3] +(set (reg:SI 167) (reg:SI 173)) +-> bb 5 + +[BB 4] +(set (reg:SI 167) (not:SI (reg:SI 177))) +-> bb 5 + +[bb 5] +(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0))) +(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))) + (label_ref:SI 50) (pc))) + + +Example 2) +---------- + +[bb 3] +(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175))) +(set (reg:SI 167) (reg:SI 147 t)) +-> bb 5 + +[bb 4] +(set (reg:SI 147 t) (gt:SI (reg:SI 177) (reg:SI 179))) +(set (reg:SI 167) (reg:SI 147 t)) +-> bb 5 + +[bb 5] +(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0))) +(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0)) + (label_ref:SI 51) (pc))) + +The common comparison is factored out and the branch condition is inverted: + +[bb 3] +(set (reg:SI 167) (reg:SI 173)) +(set (reg:SI 200) (reg:SI 175)) +-> bb 5 + +[bb 4] +(set (reg:SI 167) (reg:SI 177)) +(set (reg:SI 200) (reg:SI 179)) +-> bb 5 + +[bb 5] +(set (reg:SI 147 t) (gt:SI (reg:SI 167) (reg:SI 200))) +(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0)) + (label_ref:SI 51) (pc))) + + +Example 3) +---------- + +[bb 3] +(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175))) +(set (reg:SI 167) (reg:SI 147 t)) +-> bb 5 + +[bb 4] +(set (reg:SI 147 t) (ge:SI (reg:SI 179) (reg:SI 177))) +(set (reg:SI 167) (reg:SI 147 t)) +-> bb 5 + +[bb 5] +(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0))) +(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0)) + (label_ref:SI 51) (pc))) + +The T bit lifetime is extended and the branch condition is inverted: + +[bb 3] +(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175))) +-> bb 5 + +[bb 4] +(set (reg:SI 147 t) (ge:SI (reg:SI 179) (reg:SI 177))) +-> bb 5 + +[bb 5] +(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0)) + (label_ref:SI 51) (pc))) + + +Example 4) +---------- + +[bb 3] +(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 5))) +(set (reg:SI 167) (reg:SI 147 t)) +-> bb 5 + +[bb 4] +(set (reg:SI 147 t) (eq:SI (reg:SI 176) (const_int 5))) +(set (reg:SI 167) (xor:SI (reg:SI 147 t) (const_int 1))) +-> bb 5 + +[bb 5] +(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0))) +(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0)) + (label_ref:SI 50) (pc))) + +In this case the comparisons are the same and could be combined, but the +branch condition is different for [bb 3] and [bb 5]. Since the comparison +is not a zero comparison, we can't negate one of the operands. The best thing +we can do here is to eliminate the comparison before the cbranch and invert +the ccreg in one of the BBs. On SH2A this will utilize the 'nott' instruction. + +[bb 3] +(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 5))) +-> bb 5 + +[bb 4] +(set (reg:SI 147 t) (eq:SI (reg:SI 176) (const_int 5))) +(set (reg:SI 147 t) (xor:SI (reg:SI 147 t) (const_int 1))) +-> bb 5 + +[bb 5] +(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0)) // inverted + (label_ref:SI 50) (pc))) + + +In order to handle cases such as above the RTL pass does the following: + +- Find the ccreg sets (comparisons) and ccreg stores + (inverting and non-inverting) in all related BBs. + +- If the comparison types in the BBs are all the same, try to combine the + comparisons in the BBs and replace the zero comparison before the cbranch + with the common comparison. + + - If the cstores are the same, move the comparison before the cbranch + and replace the comparisons in the BBs with reg-reg copies to get the + operands in place (create new pseudo regs). + + - If the cstores differ, try to apply the special case + (eq (reg) (const_int 0)) -> inverted = (not (reg)). + for the subordinate cstore types and eliminate the dominating ones. + +- If the comparison types in the BBs are not the same, or the first approach + doesn't work out for some reason, try to eliminate the comparison before the + cbranch by extending the lifetime of the ccreg by leaving the individual + comparisons but eliminating the cstores. + If the cstores are all the same this is straight forward. + If they're not, try to reverse the ccreg for the subordinate cstore type + and eliminate the dominating one. +*/ + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// Helper functions + +#define log_msg(...)\ + do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); } while (0) + +#define log_insn(i)\ + do { if (dump_file != NULL) print_rtl_single (dump_file, (const_rtx)i); } while (0) + +#define log_rtx(r)\ + do { if (dump_file != NULL) print_rtl (dump_file, (const_rtx)r); } while (0) + +#define log_return(retval, ...)\ + do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \ + return retval; } while (0) + +#define log_return_void(...)\ + do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \ + return; } while (0) + +struct set_of_reg +{ + // The insn where the search stopped or NULL_RTX. + rtx insn; + + // The set rtx of the specified reg if found, NULL_RTX otherwise. + // Notice that the set rtx can also be in a parallel. + const_rtx set_rtx; + + // The set source operand rtx if found, NULL_RTX otherwise. + rtx + set_src (void) const + { + return set_rtx == NULL_RTX ? NULL_RTX : XEXP (set_rtx, 1); + } + + // The set destination operand rtx if found, NULL_RTX otherwise. + rtx + set_dst (void) const + { + return set_rtx == NULL_RTX ? NULL_RTX : XEXP (set_rtx, 0); + } + + bool + empty (void) const + { + return insn == NULL_RTX || set_rtx == NULL_RTX; + } +}; + +// Given a reg rtx and a start insn find the insn (in the same basic block) +// that sets the reg. +static set_of_reg +find_set_of_reg_bb (rtx reg, rtx insn) +{ + set_of_reg result = { insn, NULL_RTX }; + + if (!REG_P (reg) || insn == NULL_RTX) + return result; + + for (result.insn = insn; result.insn != NULL_RTX; + result.insn = prev_nonnote_insn_bb (result.insn)) + { + if (BARRIER_P (result.insn)) + return result; + if (!NONJUMP_INSN_P (result.insn)) + continue; + if (reg_set_p (reg, result.insn)) + { + result.set_rtx = set_of (reg, result.insn); + if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET) + result.set_rtx = NULL_RTX; + return result; + } + } + + return result; +} + +static bool +reg_dead_after_insn (const_rtx reg, const_rtx insn) +{ + return find_regno_note (insn, REG_DEAD, REGNO (reg)) != NULL_RTX; +} + +static bool +reg_unused_after_insn (const_rtx reg, const_rtx insn) +{ + return find_regno_note (insn, REG_UNUSED, REGNO (reg)) != NULL_RTX; +} + +// Check whether the two specified basic blocks are adjacent, i.e. there's no +// other basic block in between them. +static bool +is_adjacent_bb (basic_block a, basic_block b) +{ + basic_block bb0[] = { a, b }; + basic_block bb1[] = { b, a }; + + for (int i = 0; i < 2; ++i) + for (edge_iterator ei = ei_start (bb0[i]->succs); + !ei_end_p (ei); ei_next (&ei)) + if (ei_edge (ei)->dest == bb1[i]) + return true; + + return false; +} + +// Internal function of trace_reg_uses. +static void +trace_reg_uses_1 (rtx reg, rtx start_insn, basic_block bb, int& count, + std::vector<basic_block>& visited_bb, rtx abort_at_insn) +{ + if (bb == NULL) + return; + + if (std::find (visited_bb.begin (), visited_bb.end (), bb) + != visited_bb.end ()) + log_return_void ("[bb %d] already visited\n", bb->index); + + visited_bb.push_back (bb); + + if (BB_END (bb) == NULL_RTX) + log_return_void ("[bb %d] BB_END is null\n", bb->index); + + if (start_insn == NULL_RTX) + log_return_void ("[bb %d] start_insn is null\n", bb->index); + + rtx end_insn = NEXT_INSN (BB_END (bb)); + if (end_insn == NULL_RTX) + log_return_void ("[bb %d] end_insn is null\n", bb->index); + + for (rtx i = NEXT_INSN (start_insn); i != end_insn; i = NEXT_INSN (i)) + { + if (INSN_P (i)) + { + if (NONDEBUG_INSN_P (i) + && (reg_overlap_mentioned_p (reg, PATTERN (i)) + || (CALL_P (i) && find_reg_fusage (i, USE, reg)))) + { + log_msg ("found use in [bb %d] at insn:\n", bb->index); + log_insn (i); + log_msg ("\n"); + count += 1; + } + + // Stop following this BB if the reg is set or dies along the way. + if (reg_set_p (reg, i) || reg_dead_after_insn (reg, i)) + return; + } + + if (abort_at_insn != NULL_RTX && abort_at_insn == i) + return; + } + + for (edge_iterator ei = ei_start (bb->succs); !ei_end_p (ei); ei_next (&ei)) + { + basic_block succ_bb = ei_edge (ei)->dest; + trace_reg_uses_1 (reg, BB_HEAD (succ_bb), succ_bb, count, visited_bb, + abort_at_insn); + } +} + +// Trace uses of the specified reg in all basic blocks that are reachable from +// the specified insn. If 'abort_at_insn' is not null, abort the trace at +// that insn. If the insn 'abort_at_insn' uses the specified reg, it is also +// counted. +static int +trace_reg_uses (rtx reg, rtx start_insn, rtx abort_at_insn) +{ + log_msg ("\ntrace_reg_uses\nreg = "); + log_rtx (reg); + log_msg ("\nstart_insn = "); + log_insn (start_insn); + + int count = 0; + std::vector<basic_block> visited_bb; + visited_bb.reserve (32); + + trace_reg_uses_1 (reg, start_insn, BLOCK_FOR_INSN (start_insn), + count, visited_bb, abort_at_insn); + return count; +} + +// FIXME: Remove dependency on SH predicate function somehow. +extern int t_reg_operand (rtx, machine_mode); +extern int negt_reg_operand (rtx, machine_mode); + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// RTL pass class + +class ifcvt_sh : public rtl_opt_pass +{ +public: + ifcvt_sh (gcc::context* ctx, bool split_insns, const char* name); + virtual ~ifcvt_sh (void); + virtual bool gate (void); + virtual unsigned int execute (void); + +private: + // Type of ccreg store that is supported. + enum cstore_type_t + { + cstore_normal = 0, + cstore_inverted = 1, + cstore_unknown = -1 + }; + + // Type of branch condition that is supported. + enum branch_condition_type_t + { + branch_if_true = 1, + branch_if_false = 0, + unknown_branch_condition = -1 + }; + + // For each basic block there can be a trace entry which consists of an + // insn that sets the ccreg (usually a comparison) and a ccreg store. + struct bb_entry + { + basic_block bb; + set_of_reg setcc; + set_of_reg cstore; + cstore_type_t cstore_type; + std::vector<set_of_reg> cstore_reg_reg_copies; + + bb_entry (basic_block b) + : bb (b), setcc (), cstore (), cstore_type (cstore_unknown) { } + + rtx comparison_rtx (void) const { return setcc.set_src (); } + }; + + // A ccreg trace for a conditional branch. + struct cbranch_trace + { + rtx cbranch_insn; + branch_condition_type_t cbranch_type; + + // The comparison against zero right before the conditional branch. + set_of_reg setcc; + + // All BBs that are related to the cbranch. The last BB in the list is + // the BB of the cbranch itself and might be empty. + std::list<bb_entry> bb_entries; + + cbranch_trace (rtx insn) + : cbranch_insn (insn), + cbranch_type (unknown_branch_condition), + setcc () + { + } + + basic_block bb (void) const { return BLOCK_FOR_INSN (cbranch_insn); } + + rtx + branch_condition_rtx (void) const + { + rtx x = pc_set (cbranch_insn); + return x == NULL_RTX ? NULL_RTX : XEXP (XEXP (x, 1), 0); + } + + bool + can_invert_condition (void) const + { + // The branch condition can be inverted safely only if the condition + // reg is dead after the cbranch. + return reg_dead_after_insn (XEXP (branch_condition_rtx (), 0), + cbranch_insn); + } + }; + + static const pass_data default_pass_data; + + // Tells whether modified or newly added insns are to be split at the end + // of the pass. + const bool split_insns_; + + // Captured dump_file != NULL. + bool en_logging_; + + // rtx of the ccreg that is obtained from the target. + rtx ccreg_; + + // Newly added or modified insns. + std::vector<rtx> touched_insns_; + + // Given an rtx determine whether it's a comparison with a constant zero. + static bool is_cmp_eq_zero (const_rtx i); + + // Update the stored mode of the ccreg from the given branch condition rtx. + void update_ccreg_mode (const_rtx cond); + + // Given an rtx, figure out the branch condition, assuming that it is + // in canonical form: + // (ne (reg) (const_int 0)) + // (eq (reg) (const_int 0)) + branch_condition_type_t branch_condition_type (const_rtx cond) const; + + // Return true if the specified rtx is either a normal ccreg or + // a negated form of the ccreg. + bool is_normal_ccreg (const_rtx x) const; + bool is_inverted_ccreg (const_rtx x) const; + + // Given a reg rtx and a start insn rtx, try to find the insn in the same + // basic block that sets the specified reg. + // Return how the search ended and the insn where it stopped or NULL_RTX. + enum record_return_t + { + set_found, + set_not_found, + other_set_found + }; + record_return_t record_set_of_reg (rtx reg, rtx start_insn, bb_entry& e); + + // Tells whether the cbranch insn of the specified bb_entry can be removed + // safely without triggering any side effects. + bool can_remove_cstore (const bb_entry& e, + const cbranch_trace& trace) const; + + // Tells whether the setcc insn of the specified bb_entry can be removed + // safely without triggering any side effects. + bool can_remove_comparison (const bb_entry& e, + const cbranch_trace& trace) const; + + // Tells whether the two specified comparison rtx can be combined into a + // single comparison. + bool can_combine_comparisons (const_rtx x, const_rtx y) const; + + // Tells whether the ccreg usage can be extended from the bb_entry on until + // the final cbranch of the trace. + bool can_extend_ccreg_usage (const bb_entry& e, + const cbranch_trace& trace) const; + + // Create an insn rtx that is a negating reg move (not operation). + rtx make_not_reg_insn (rtx dst_reg, rtx src_reg) const; + + // Create an insn rtx that inverts the ccreg. + rtx make_inv_ccreg_insn (void) const; + + // Adds the specified insn to the set of modified or newly added insns that + // might need splitting at the end of the pass. + rtx touched_insn (rtx i); + + // Try to invert the branch condition of the specified trace. + bool try_invert_branch_condition (cbranch_trace& trace); + + // Try to optimize a cbranch trace by combining comparisons in BBs and + // eliminate the cstores. + bool try_combine_comparisons (cbranch_trace& trace, + int cstore_count, int inv_cstore_count, + cstore_type_t dominating_cstore); + + // Try to optimize a cbranch trace by eliminating the cstores in BBs only. + bool try_eliminate_cstores (cbranch_trace& trace, + int cstore_count, int inv_cstore_count, + cstore_type_t dominating_cstore); + + // Given a branch insn, try to optimize its branch condition. + // If any insns are modified or added they are added to 'touched_insns_'. + void try_optimize_cbranch (rtx i); +}; + + +const pass_data ifcvt_sh::default_pass_data = +{ + RTL_PASS, // type + "", // name (overwritten by the constructor) + OPTGROUP_NONE, // optinfo_flags + true, // has_gate + true, // has_execute + TV_OPTIMIZE, // tv_id + 0, // properties_required + 0, // properties_provided + 0, // properties_destroyed + 0, // todo_flags_start + TODO_df_finish | TODO_df_verify // todo_flags_finish + | TODO_verify_rtl_sharing +}; + +ifcvt_sh::ifcvt_sh (gcc::context* ctx, bool split_insns, const char* name) +: rtl_opt_pass (default_pass_data, ctx), + split_insns_ (split_insns), + en_logging_ (false), + ccreg_ (NULL_RTX) +{ + // Overwrite default name in pass_data base class. + this->name = name; +} + +ifcvt_sh::~ifcvt_sh (void) +{ +} + +void ifcvt_sh::update_ccreg_mode (const_rtx cond) +{ + if (REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) != REGNO (ccreg_)) + return; + + machine_mode m = GET_MODE (XEXP (cond, 0)); + if (m == GET_MODE (ccreg_)) + return; + + PUT_MODE (ccreg_, m); + log_msg ("updated ccreg mode: "); + log_rtx (ccreg_); + log_msg ("\n"); +} + +bool +ifcvt_sh::is_cmp_eq_zero (const_rtx i) +{ + return i != NULL_RTX && GET_CODE (i) == EQ + && REG_P (XEXP (i, 0)) && XEXP (i, 1) == const0_rtx; +} + +ifcvt_sh::branch_condition_type_t +ifcvt_sh::branch_condition_type (const_rtx cond) const +{ + if (cond == NULL_RTX) + return unknown_branch_condition; + + if (GET_CODE (cond) == NE + && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (ccreg_) + && XEXP (cond, 1) == const0_rtx) + return branch_if_true; + + else if (GET_CODE (cond) == EQ + && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (ccreg_) + && XEXP (cond, 1) == const0_rtx) + return branch_if_false; + + else + return unknown_branch_condition; +} + +bool +ifcvt_sh::is_normal_ccreg (const_rtx x) const +{ + return t_reg_operand (const_cast<rtx> (x), VOIDmode); +} + +bool +ifcvt_sh::is_inverted_ccreg (const_rtx x) const +{ + return negt_reg_operand (const_cast<rtx> (x), VOIDmode); +} + +ifcvt_sh::record_return_t +ifcvt_sh::record_set_of_reg (rtx reg, rtx start_insn, bb_entry& new_entry) +{ + log_msg ("\n[bb %d]\n", new_entry.bb->index); + + if (start_insn == NULL_RTX) + log_return (set_not_found, "set of reg not found. empty BB?\n"); + + new_entry.cstore_type = cstore_unknown; + + for (rtx i = start_insn; i != NULL_RTX; ) + { + new_entry.cstore = find_set_of_reg_bb (reg, i); + + if (new_entry.cstore.set_src () == NULL_RTX) + log_return (set_not_found, "set of reg not found (cstore)\n"); + + log_insn (new_entry.cstore.insn); + log_msg ("\n"); + + if (is_normal_ccreg (new_entry.cstore.set_src ())) + { + log_msg ("normal condition store\n"); + new_entry.cstore_type = cstore_normal; + } + else if (is_inverted_ccreg (new_entry.cstore.set_src ())) + { + log_msg ("inverted condition store\n"); + new_entry.cstore_type = cstore_inverted; + } + else if (REG_P (new_entry.cstore.set_src ())) + { + // If it's a reg-reg copy follow the copied reg. + new_entry.cstore_reg_reg_copies.push_back (new_entry.cstore); + reg = new_entry.cstore.set_src (); + i = new_entry.cstore.insn; + + log_msg ("reg-reg copy. tracing "); + log_rtx (reg); + log_msg ("\n"); + continue; + } + else + log_return (other_set_found, "not a condition store\n"); + + gcc_assert (new_entry.cstore_type != cstore_unknown); + + // Now see how the ccreg was set. + // For now it must be in the same BB. + log_msg ("tracing ccreg\n"); + new_entry.setcc = + find_set_of_reg_bb (ccreg_, + prev_nonnote_insn_bb (new_entry.cstore.insn)); + + // If cstore was found but setcc was not found continue anyway, as + // for some of the optimization types the setcc is irrelevant. + if (new_entry.setcc.set_src () == NULL_RTX) + log_return (set_found, "set of ccreg not found\n"); + + else if (GET_CODE (new_entry.setcc.set_rtx) == SET) + { + // Also allow insns that set the ccreg, but are not true comparison + // insns, as long as they are sets and not e.g. clobbers. + log_insn (new_entry.setcc.insn); + log_msg ("\n"); + return set_found; + } + else + // If cstore was found but setcc was not found continue anyway, as + // for some of the optimization types the setcc is irrelevant. + log_return (set_found, "unknown set of ccreg\n"); + } + + log_return (set_not_found, "set of reg not found\n"); +} + +bool +ifcvt_sh::can_remove_cstore (const bb_entry& e, + const cbranch_trace& trace) const +{ + if (volatile_insn_p (PATTERN (e.cstore.insn))) + { + log_msg ("can't remove insn\n"); + log_insn (e.cstore.insn); + log_return (false, "\nbecause it's volatile\n"); + } + + // On SH there are parallel patterns which store the ccreg multiple times. + // In this case it's not safe. + rtx cstore_pat = PATTERN (e.cstore.insn); + if (GET_CODE (cstore_pat) == PARALLEL) + for (int i = 0; i < XVECLEN (cstore_pat, 0); ++i) + { + rtx x = XVECEXP (cstore_pat, 0, i); + + // It's the cstore set that we're referring to, ignore that one. + if (x != e.cstore.set_rtx + && GET_CODE (x) == SET && reg_referenced_p (ccreg_, x)) + { + log_msg ("can't remove insn\n"); + log_insn (e.cstore.insn); + log_return (false, "\nbecause it's a multiple ccreg store\n"); + } + } + + // If the cstore sets the ccreg (e.g. negc) and the ccreg is used afterwards + // it's not safe. + if (modified_in_p (ccreg_, e.cstore.insn) + && !(reg_dead_after_insn (ccreg_, e.cstore.insn) + || reg_unused_after_insn (ccreg_, e.cstore.insn))) + { + log_msg ("can't remove insn\n"); + log_insn (e.cstore.insn); + log_return (false, "\nbecause it sets the ccreg\n"); + } + + // If the cstore destination reg is copied around check the reg-reg + // copies. At every reg-reg copy the copied reg must be dead. + // Otherwise we assume that the result of the cstore is used in some + // other way. + for (std::vector<set_of_reg>::const_reverse_iterator i = + e.cstore_reg_reg_copies.rbegin (); + i != e.cstore_reg_reg_copies.rend (); ++i) + { + if (!reg_dead_after_insn (i->set_src (), i->insn)) + { + log_msg ("can't remove insn\n"); + log_insn (i->insn); + log_return (false, "\nbecause source of reg-reg copy doesn't die\n"); + } + } + + // Check that the destination reg of the cstore is not used otherwise. + + // If the cstore destination reg is copied around check the effective + // destination reg of the cstore. The reg-reg copies are recorded in + // reverse order, i.e. the most recent reg-reg copy in the insn list + // comes first. + rtx cstore_dst = e.cstore_reg_reg_copies.empty () + ? e.cstore.set_dst () + : e.cstore_reg_reg_copies.front ().set_dst (); + + // The cstore_dst reg must die after the test before the cbranch. + if (!reg_dead_after_insn (cstore_dst, trace.setcc.insn)) + { + log_msg ("can't remove insn\n"); + log_insn (e.cstore.insn); + log_return (false, "\nbecause its effective target reg %d doesn't die " + "after trace.setcc.insn\n", REGNO (cstore_dst)); + } + + // Even if the cstore_dst reg dies after the test before the cbranch we + // have to check whether it is used in other BBs if the cstore is not + // in the same BB where the cstore_dst dies. + if (e.bb == trace.bb ()) + { + if (reg_used_between_p (cstore_dst, e.cstore.insn, trace.setcc.insn)) + { + log_msg ("can't remove insn\n"); + log_insn (e.cstore.insn); + log_return (false, "\nbecause its effective target reg %d is used " + "otherwise\n", REGNO (cstore_dst)); + } + } + else + { + // Count the uses of cstore_dst reg in all BBs that can be reached from + // bb_entry's BB. If we get more than '1' we assume that it's used + // somewhere else and is not safe to be removed. + int cstore_dst_use_count = trace_reg_uses (cstore_dst, e.cstore.insn, + trace.setcc.insn); + if (cstore_dst_use_count > 1) + { + log_msg ("can't remove insn\n"); + log_insn (e.cstore.insn); + log_return (false, "\nbecause its effective target reg %d is used " + "in %d other places\n", REGNO (cstore_dst), + cstore_dst_use_count - 1); + } + } + + return true; +} + +bool +ifcvt_sh::can_remove_comparison (const bb_entry& e, + const cbranch_trace&/* trace*/) const +{ + // If the ccreg is used otherwise between the comparison and the cstore, + // it's not safe. + if (reg_used_between_p (ccreg_, e.setcc.insn, e.cstore.insn)) + { + log_msg ("can't remove insn\n"); + log_insn (e.setcc.insn); + log_return (false, "\nbecause the ccreg is used otherwise\n"); + } + + if (!reg_dead_after_insn (ccreg_, e.cstore.insn) + && !reg_unused_after_insn (ccreg_, e.cstore.insn)) + { + log_msg ("can't remove insn\n"); + log_insn (e.cstore.insn); + log_return (false, "\nbecause ccreg is not dead or unused afterwards\n"); + } + + // On SH there are also multiple set patterns that can be used for + // comparisons, such as "shll". It's not safe to remove those. + if (multiple_sets (e.setcc.insn)) + { + log_msg ("can't remove insn\n"); + log_insn (e.cstore.insn); + log_return (false, "\nbecause it's a multiple set\n"); + } + + return true; +} + +rtx +ifcvt_sh::make_not_reg_insn (rtx dst_reg, rtx src_reg) const +{ + // This will to go through expanders and may output multiple insns + // for multi-word regs. + start_sequence (); + expand_simple_unop (GET_MODE (dst_reg), NOT, src_reg, dst_reg, 0); + rtx i = get_insns (); + end_sequence (); + return i; +} + +rtx +ifcvt_sh::make_inv_ccreg_insn (void) const +{ + start_sequence (); + rtx i = emit_insn (gen_rtx_SET (VOIDmode, ccreg_, + gen_rtx_fmt_ee (XOR, GET_MODE (ccreg_), + ccreg_, const1_rtx))); + end_sequence (); + return i; +} + +rtx +ifcvt_sh::touched_insn (rtx i) +{ + touched_insns_.push_back (i); + return i; +} + +bool +ifcvt_sh::can_combine_comparisons (const_rtx x, const_rtx y) const +{ + if (GET_CODE (x) != GET_CODE (y)) + return false; + + rtx x_op0 = XEXP (x, 0); + rtx x_op1 = XEXP (x, 1); + + rtx y_op0 = XEXP (y, 0); + rtx y_op1 = XEXP (y, 1); + + if (!REG_P (x_op0) || !REG_P (y_op0)) + return false; + + if (GET_MODE (x_op0) != GET_MODE (y_op0)) + return false; + + // rtx_equal_p also compares the reg numbers which we do not care about + // here, as long as both are regs and the modes are the same. + if (REG_P (x_op1)) + return REG_P (y_op1) && GET_MODE (x_op1) == GET_MODE (y_op1); + + return rtx_equal_p (x_op1, y_op1); +} + +bool +ifcvt_sh::can_extend_ccreg_usage (const bb_entry& e, + const cbranch_trace& trace) const +{ + // Check if the ccreg is not modified by other insins in the BB path until + // the final cbranch of the trace. + // Start checking after the cstore that follows the setcc, assuming that + // the cstore will be removed. + + // The assumption here is that the specified bb_entry's BB is a direct + // predecessor of the trace.cbranch_insn's BB. + if (e.bb != trace.bb () && !is_adjacent_bb (e.bb, trace.bb ())) + log_return (false, + "can't extend ccreg usage -- [bb %d] and [bb %d] are not adjacent\n", + e.bb->index, trace.bb ()->index); + + if (e.cstore.empty ()) + log_return (false, "can't extend ccreg usage -- no cstore\n"); + + // The entry's cstore is in the same BB as the final cbranch. + if (e.bb == trace.bb ()) + { + if (reg_set_between_p (ccreg_, e.cstore.insn, trace.setcc.insn)) + log_return (false, + "can't extend ccreg usage -- it's modified between e.cstore.insn " + "and trace.setcc.insn"); + else + return true; + } + + // The entry's cstore and the final cbranch are in different BBs. + if (reg_set_between_p (ccreg_, e.cstore.insn, NEXT_INSN (BB_END (e.bb)))) + log_return (false, + "can't extend ccreg usage -- it's modified in [bb %d]", e.bb->index); + + if (reg_set_between_p (ccreg_, PREV_INSN (BB_HEAD (trace.bb ())), + trace.setcc.insn)) + log_return (false, + "can't extend ccreg usage -- it's modified in [bb %d]", + trace.bb ()->index); + + return true; +} + +bool +ifcvt_sh::try_invert_branch_condition (cbranch_trace& trace) +{ + log_msg ("inverting branch condition\n"); + + if (!invert_jump_1 (trace.cbranch_insn, JUMP_LABEL (trace.cbranch_insn))) + log_return (false, "invert_jump_1 failed\n"); + + if (verify_changes (num_validated_changes ())) + confirm_change_group (); + else + log_return (false, "verify_changed failed\n"); + + touched_insn (trace.cbranch_insn); + return true; +} + +bool +ifcvt_sh::try_combine_comparisons (cbranch_trace& trace, + int cstore_count, int inv_cstore_count, + cstore_type_t dominating_cstore) +{ + log_msg ("\ntry_combine_comparisons\n"); + + // This function will always try to create new pseudos. + if (!can_create_pseudo_p ()) + log_return (false, "can't create pseudos\n"); + + // Check that all ccset insns are comparisons and all comparison types in + // all BBs are the same and could be combined into one single comparison. + rtx comp = NULL_RTX; + rtx comp_insn = NULL_RTX; + + for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin (); + i != trace.bb_entries.end (); ++i) + { + int i_empty_count = i->setcc.empty () + i->cstore.empty (); + + // A completly empty entry is OK (could be the BB of the cbranch). + if (i_empty_count == 2) + continue; + + // Otherwise we need both, the setcc and the cstore. + if (i_empty_count != 0) + log_return (false, "bb entry is not a setcc cstore pair\n"); + + rtx other_comp = i->comparison_rtx (); + + if (!COMPARISON_P (other_comp)) + { + log_msg ("setcc is not a comparison:\n"); + log_rtx (other_comp); + log_return (false, "\n"); + } + + if (comp_insn == NULL_RTX) + { + comp = other_comp; + comp_insn = i->setcc.insn; + } + else if (!can_combine_comparisons (comp, other_comp)) + return false; + + // The goal here is to eliminate all cstores and comparisons in the BBs. + // Thus check if every cstore can actually be removed safely. + if (!can_remove_cstore (*i, trace) || !can_remove_comparison (*i, trace)) + return false; + } + + // FIXME: The first operand of the comparison must be a simple reg. + // This effectively prohibits combining div0s comparisons such as + // (lt:SI (xor:SI (reg:SI) (reg:SI))) + if (!REG_P (XEXP (comp, 0))) + { + log_msg ("comparison operand 0\n"); + log_rtx (XEXP (comp, 0)); + log_return (false, "\nis not a reg\n"); + } + + rtx comp_op0 = gen_reg_rtx (GET_MODE (XEXP (comp, 0))); + rtx comp_op1 = REG_P (XEXP (comp, 1)) + ? gen_reg_rtx (GET_MODE (XEXP (comp, 1))) + : XEXP (comp, 1); + + // If there are both, inverting and non-inverting cstores, they can only + // be eliminated if the comparison can be inverted. We assume that the + // comparison insns that we find are already minimal and canonicalized. + // There is one special case though, where an integer comparison + // (eq (reg) (const_int 0)) + // can be inverted with a sequence + // (eq (not (reg)) (const_int 0)) + if (inv_cstore_count != 0 && cstore_count != 0) + { + if (make_not_reg_insn (comp_op0, comp_op0) == NULL_RTX) + log_return (false, "make_not_reg_insn failed.\n"); + + for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin (); + i != trace.bb_entries.end (); ++i) + { + if (i->setcc.empty () || i->cstore.empty ()) + continue; + + if (i->cstore_type != dominating_cstore + && !is_cmp_eq_zero (i->comparison_rtx ())) + { + log_msg ("can't invert comparison in insn\n"); + log_insn (i->setcc.insn); + log_return (false, + "\nbecause it's not a (eq (reg) (const_int 0))\n"); + } + } + } + + if (dominating_cstore == cstore_normal + && !try_invert_branch_condition (trace)) + return false; + + // Replace the test insn before the cbranch with the common comparison. + // Instead of creating a new insn from scratch we copy the common comparison + // pattern. This simplifies handling parallel comparison patterns, such as + // FP comparisons on SH, which have an extra use on FPSCR. + log_msg ("installing common comparison in [bb %d]\n", trace.bb ()->index); + + rtx common_comp_pat = copy_rtx (PATTERN (comp_insn)); + rtx common_comp = const_cast<rtx> (set_of (ccreg_, common_comp_pat)); + + gcc_assert (common_comp != NULL_RTX); + + XEXP (XEXP (common_comp, 1), 0) = comp_op0; + XEXP (XEXP (common_comp, 1), 1) = comp_op1; + + log_rtx (common_comp_pat); + log_msg ("\n"); + + touched_insn (emit_insn_after (common_comp_pat, trace.setcc.insn)); + delete_insn (trace.setcc.insn); + + // Replace comparison and cstore insns with reg-reg moves in all BBs. + for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin (); + i != trace.bb_entries.end (); ++i) + { + if (i->setcc.empty () || i->cstore.empty ()) + continue; + + if (i->cstore_type == dominating_cstore) + { + log_msg ("replacing comparison and cstore with reg move " + "in [bb %d]\n", i->bb->index); + + touched_insn (emit_insn_after ( + gen_move_insn (comp_op0, XEXP (i->comparison_rtx (), 0)), + i->setcc.insn)); + + // If the second operand is a reg, have to emit a move insn. + // Otherwise assume it's a const_int and just reference it. + if (REG_P (comp_op1)) + touched_insn (emit_insn_after ( + gen_move_insn (comp_op1, XEXP (i->comparison_rtx (), 1)), + i->setcc.insn)); + } + else + { + log_msg ("replacing comparison and cstore with inverting reg move " + "in [bb %d]\n", i->bb->index); + + rtx new_i = make_not_reg_insn (comp_op0, + XEXP (i->comparison_rtx (), 0)); + touched_insn (emit_insn_after (new_i, i->setcc.insn)); + } + + delete_insn (i->cstore.insn); + delete_insn (i->setcc.insn); + } + + return true; +} + +bool +ifcvt_sh::try_eliminate_cstores (cbranch_trace& trace, + int cstore_count, int inv_cstore_count, + cstore_type_t dominating_cstore) +{ + log_msg ("\ntry_eliminate_cstores\n"); + + for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin (); + i != trace.bb_entries.end (); ++i) + { + // A completly empty entry is OK (could be the BB of the cbranch). + if (i->setcc.empty () && i->cstore.empty ()) + continue; + + // We're going to eliminate cstores, but for that they have to be + // there. We don't care about the setcc in this case. + if (i->cstore.empty ()) + log_return (false, "bb entry cstore empty -- aborting\n"); + + // The goal here is to eliminate all cstores in the BBs and extend the + // ccreg usage. + if (!can_extend_ccreg_usage (*i, trace)) + return false; + + // If the cstore can't be removed we can keep it around as long as + // it doesn't modify the ccreg. + if (!can_remove_cstore (*i, trace) + && modified_in_p (ccreg_, i->cstore.insn)) + log_return (false, "cstore sets ccreg -- aborting\n"); + } + + // If there are both, inverting and non-inverting cstores, we'll have to + // invert the ccreg as a replacement for one of them. + if (cstore_count != 0 && inv_cstore_count != 0) + { + rtx i = make_inv_ccreg_insn (); + if (recog_memoized (i) < 0) + { + log_msg ("failed to match ccreg inversion insn:\n"); + log_rtx (PATTERN (i)); + log_return (false, "\naborting\n"); + } + } + + if (dominating_cstore == cstore_normal + && !try_invert_branch_condition (trace)) + return false; + + // Eliminate cstores in all BBs. + for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin (); + i != trace.bb_entries.end (); ++i) + { + if (i->cstore.empty ()) + continue; + + if (i->cstore_type == dominating_cstore) + log_msg ("removing cstore in [bb %d]\n", i->bb->index); + else + { + log_msg ("replacing cstore with ccreg inversion in [bb %d]\n", + i->bb->index); + + touched_insn ( + emit_insn_after (make_inv_ccreg_insn (), i->cstore.insn)); + } + + if (can_remove_cstore (*i, trace)) + delete_insn (i->cstore.insn); + } + + log_msg ("removing test insn before cbranch\n"); + delete_insn (trace.setcc.insn); + return true; +} + +void +ifcvt_sh::try_optimize_cbranch (rtx insn) +{ + cbranch_trace trace (insn); + + log_msg ("\n\n--------------------------------------\n"); + log_msg ("found cbranch insn in [bb %d]:\n", trace.bb ()->index); + log_insn (insn); + + trace.cbranch_type = branch_condition_type (trace.branch_condition_rtx ()); + + if (trace.cbranch_type == branch_if_true) + log_msg ("condition: branch if true\n"); + else if (trace.cbranch_type == branch_if_false) + log_msg ("condition: branch if false\n"); + else + { + log_msg ("unknown branch condition\n"); + log_rtx (trace.branch_condition_rtx ()); + log_return_void ("\n"); + } + + update_ccreg_mode (trace.branch_condition_rtx ()); + + // Scan the insns backwards for an insn that sets the ccreg by testing a + // reg against zero like + // (set (reg ccreg) (eq (reg) (const_int 0))) + // The testing insn could also be outside of the current basic block, but + // for now we limit the search to the current basic block. + trace.setcc = find_set_of_reg_bb (ccreg_, prev_nonnote_insn_bb (insn)); + + if (!is_cmp_eq_zero (trace.setcc.set_src ())) + log_return_void ("could not find set of ccreg in current BB\n"); + + rtx trace_reg = XEXP (trace.setcc.set_src (), 0); + + log_msg ("set of ccreg:\n"); + log_insn (trace.setcc.insn); + + // See if we can remove the trace.setcc insn safely. + if (reg_used_between_p (ccreg_, trace.setcc.insn, trace.cbranch_insn)) + log_return_void ("ccreg used between testing insn and branch insn\n"); + + if (volatile_insn_p (PATTERN (trace.setcc.insn))) + { + log_msg ("can't remove insn\n"); + log_insn (trace.setcc.insn); + log_return_void ("\nbecause it's volatile\n"); + } + + // Now that we have an insn which tests some reg and sets the condition + // reg before the conditional branch, try to figure out how that tested + // reg was formed, i.e. find all the insns that set the tested reg in + // some way. + // The tested reg might be set in multiple basic blocks so we need to + // check all basic blocks which can reach this current basic block. + // If the set of reg is an inverting or non-inverting store of the condition + // register, check how the ccreg value was obtained. + log_msg ("\ntracing "); + log_rtx (trace_reg); + log_msg ("\n"); + + + // First check the basic block where the conditional branch is in. + // If we find it here there's no point in checking other BBs. + trace.bb_entries.push_front (bb_entry (trace.bb ())); + + record_return_t res = + record_set_of_reg (trace_reg, prev_nonnote_insn_bb (trace.setcc.insn), + trace.bb_entries.front ()); + + if (res == other_set_found) + log_return_void ("other set found - aborting trace\n"); + else if (res == set_not_found) + { + // It seems the initial search in the BB of the conditional branch + // didn't find anything. Now look in all predecessor BBs. + for (edge_iterator ei = ei_start (trace.bb ()->preds); + !ei_end_p (ei); ei_next (&ei)) + { + edge e = ei_edge (ei); + trace.bb_entries.push_front (bb_entry (e->src)); + + res = record_set_of_reg (trace_reg, BB_END (e->src), + trace.bb_entries.front ()); + if (res != set_found) + log_return_void ("set not found - aborting trace\n"); + } + } + + if (en_logging_) + { + log_msg ("\ncbranch trace summary:\n"); + for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin (); + i != trace.bb_entries.end (); ++i) + { + log_msg ("\n[bb %d]\n", i->bb->index); + if (!i->setcc.empty ()) + { + log_rtx (i->setcc.set_rtx); + log_msg ("\n"); + } + if (!i->cstore.empty ()) + { + log_rtx (i->cstore.set_rtx); + log_msg ("\n"); + } + + for (std::vector<set_of_reg>::const_reverse_iterator j = + i->cstore_reg_reg_copies.rbegin (); + j != i->cstore_reg_reg_copies.rend (); ++j) + { + log_rtx (j->set_rtx); + log_msg ("\n"); + } + } + + log_rtx (trace.setcc.set_rtx); + log_msg ("\n"); + log_rtx (PATTERN (trace.cbranch_insn)); + log_msg ("\n"); + } + + // Check that we don't have any empty BBs. + // Only the BB with the cbranch may be empty. + for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin (); + i != trace.bb_entries.end (); ++i) + if (i->setcc.empty () && i->cstore.empty () && i->bb != trace.bb ()) + log_return_void ("\n[bb %d] is empty - aborting.\n", i->bb->index); + + // Determine the dominating cstore type + // FIXME: Try to take the probabilities of the BBs into account somehow. + int cstore_count = 0; + int inv_cstore_count = 0; + + for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin (); + i != trace.bb_entries.end (); ++i) + { + if (i->cstore_type == cstore_normal) + cstore_count += 1; + else if (i->cstore_type == cstore_inverted) + inv_cstore_count += 1; + } + + log_msg ("cstore count = %d inverted cstore count = %d\n", + cstore_count, inv_cstore_count); + + // This puts a priority on inverting cstores. + cstore_type_t dominating_cstore = inv_cstore_count >= cstore_count + ? cstore_inverted + : cstore_normal; + + if (dominating_cstore == cstore_inverted) + log_msg ("will try to eliminate inverted cstore\n"); + else if (dominating_cstore == cstore_normal) + { + log_msg ("will try to eliminate normal cstore\n"); + if (!trace.can_invert_condition ()) + log_return_void ("branch condition can't be inverted - aborting\n"); + } + else + gcc_unreachable (); + + if (try_combine_comparisons (trace, cstore_count, inv_cstore_count, + dominating_cstore)) + return; + + try_eliminate_cstores (trace, cstore_count, inv_cstore_count, + dominating_cstore); +} + +bool +ifcvt_sh::gate (void) +{ + return optimize > 0; +} + +unsigned int +ifcvt_sh::execute (void) +{ + en_logging_ = dump_file != NULL; + + unsigned int ccr0 = INVALID_REGNUM; + unsigned int ccr1 = INVALID_REGNUM; + + if (targetm.fixed_condition_code_regs (&ccr0, &ccr1) + && ccr0 != INVALID_REGNUM) + { + // Initially create a reg rtx with VOIDmode. + // When the first conditional branch is discovered, the mode is changed + // to the mode that is actually used by the target. + ccreg_ = gen_rtx_REG (VOIDmode, ccr0); + } + + if (ccreg_ == NULL_RTX) + log_return (0, "no ccreg.\n\n"); + + if (STORE_FLAG_VALUE != 1) + log_return (0, "unsupported STORE_FLAG_VALUE %d", STORE_FLAG_VALUE); + + log_msg ("ccreg: "); + log_rtx (ccreg_); + log_msg (" STORE_FLAG_VALUE = %d\n", STORE_FLAG_VALUE); + + // Look for basic blocks that end with a conditional branch and try to + // optimize them. + basic_block bb; + FOR_EACH_BB (bb) + { + rtx i = BB_END (bb); + if (any_condjump_p (i) && onlyjump_p (i)) + try_optimize_cbranch (i); + } + + log_msg ("\n\n"); + + // If new insns are created and this pass is executed after all insns + // have been split already, we must split the insns we've changed or added + // ourselves here. + // FIXME: Multi-word operations (which emit multiple insns) are not handled + // properly here, since only one insn will end up in 'touched_insns_'. + // On SH this is not a problem though. + if (split_insns_) + for (std::vector<rtx>::const_iterator i = touched_insns_.begin (); + i != touched_insns_.end (); ++i) + { + log_msg ("trying to split insn:\n"); + log_insn (*i); + log_msg ("\n"); + try_split (PATTERN (*i), *i, 0); + } + + touched_insns_.clear (); + log_return (0, "\n\n"); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// This allows instantiating the pass somewhere else without having to pull +// in a header file. +opt_pass* +make_pass_ifcvt_sh (gcc::context* ctx, bool split_insns, const char* name) +{ + return new ifcvt_sh (ctx, split_insns, name); +}

[SH] PR 51244 - Fix defects introduced in 4.8

Commit Message

Comments

Patch