diff mbox

[3/3] shrink-wrap: Remove complicated simple_return manipulations

Message ID 01b8913890f998c3c3c46d770fc90fb8c1236099.1462256245.git.segher@kernel.crashing.org
State New
Headers show

Commit Message

Segher Boessenkool May 3, 2016, 6:59 a.m. UTC
Now that cfgcleanup knows how to optimize with return statements, the
epilogue insertion code doesn't have to deal with it itself anymore.


2016-05-03  Segher Boessenkool  <segher@kernel.crashing.org>

	* function.c (emit_use_return_register_into_block): Delete.
	(gen_return_pattern): Delete.
	(emit_return_into_block): Delete.
	(active_insn_between): Delete.
	(convert_jumps_to_returns): Delete.
	(emit_return_for_exit): Delete.
	(thread_prologue_and_epilogue_insns): Delete all code dealing with
	simple_return for shrink-wrapped blocks.
	* shrink-wrap.c (try_shrink_wrapping): Insert simple_return at the
	end of blocks that need one.
	(get_unconverted_simple_return): Delete.
	(convert_to_simple_return): Delete.
	* shrink-wrap.c (get_unconverted_simple_return): Delete declaration.
	(convert_to_simple_return): Ditto.

---
 gcc/function.c    | 213 +-----------------------------------------------------
 gcc/shrink-wrap.c | 171 ++++---------------------------------------
 gcc/shrink-wrap.h |   6 --
 3 files changed, 16 insertions(+), 374 deletions(-)

Comments

Christophe Lyon May 9, 2016, 1:54 p.m. UTC | #1
On 3 May 2016 at 08:59, Segher Boessenkool <segher@kernel.crashing.org> wrote:
> Now that cfgcleanup knows how to optimize with return statements, the
> epilogue insertion code doesn't have to deal with it itself anymore.
>
>
> 2016-05-03  Segher Boessenkool  <segher@kernel.crashing.org>
>
>         * function.c (emit_use_return_register_into_block): Delete.
>         (gen_return_pattern): Delete.
>         (emit_return_into_block): Delete.
>         (active_insn_between): Delete.
>         (convert_jumps_to_returns): Delete.
>         (emit_return_for_exit): Delete.
>         (thread_prologue_and_epilogue_insns): Delete all code dealing with
>         simple_return for shrink-wrapped blocks.
>         * shrink-wrap.c (try_shrink_wrapping): Insert simple_return at the
>         end of blocks that need one.
>         (get_unconverted_simple_return): Delete.
>         (convert_to_simple_return): Delete.
>         * shrink-wrap.c (get_unconverted_simple_return): Delete declaration.
>         (convert_to_simple_return): Ditto.
>

Hi,

After this patch, I've noticed that
gcc.target/arm/pr43920-2.c
now fails at:
/* { dg-final { scan-assembler-times "pop" 2 } } */

Before the patch, the generated code was:
[...]
        pop     {r3, r4, r5, r6, r7, pc}
.L4:
        mov     r0, #-1
.L1:
        pop     {r3, r4, r5, r6, r7, pc}

it is now:
[...]
.L1:
        pop     {r3, r4, r5, r6, r7, pc}
.L4:
        mov     r0, #-1
        b       .L1

The new version does not seem better, as it adds a branch on the path
and it is not smaller.

Christophe.


> ---
>  gcc/function.c    | 213 +-----------------------------------------------------
>  gcc/shrink-wrap.c | 171 ++++---------------------------------------
>  gcc/shrink-wrap.h |   6 --
>  3 files changed, 16 insertions(+), 374 deletions(-)
>
> diff --git a/gcc/function.c b/gcc/function.c
> index f6eb56c..b9a6338 100644
> --- a/gcc/function.c
> +++ b/gcc/function.c
> @@ -5753,49 +5753,6 @@ prologue_epilogue_contains (const_rtx insn)
>    return 0;
>  }
>
> -/* Insert use of return register before the end of BB.  */
> -
> -static void
> -emit_use_return_register_into_block (basic_block bb)
> -{
> -  start_sequence ();
> -  use_return_register ();
> -  rtx_insn *seq = get_insns ();
> -  end_sequence ();
> -  rtx_insn *insn = BB_END (bb);
> -  if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, PATTERN (insn)))
> -    insn = prev_cc0_setter (insn);
> -
> -  emit_insn_before (seq, insn);
> -}
> -
> -
> -/* Create a return pattern, either simple_return or return, depending on
> -   simple_p.  */
> -
> -static rtx_insn *
> -gen_return_pattern (bool simple_p)
> -{
> -  return (simple_p
> -         ? targetm.gen_simple_return ()
> -         : targetm.gen_return ());
> -}
> -
> -/* Insert an appropriate return pattern at the end of block BB.  This
> -   also means updating block_for_insn appropriately.  SIMPLE_P is
> -   the same as in gen_return_pattern and passed to it.  */
> -
> -void
> -emit_return_into_block (bool simple_p, basic_block bb)
> -{
> -  rtx_jump_insn *jump = emit_jump_insn_after (gen_return_pattern (simple_p),
> -                                             BB_END (bb));
> -  rtx pat = PATTERN (jump);
> -  if (GET_CODE (pat) == PARALLEL)
> -    pat = XVECEXP (pat, 0, 0);
> -  gcc_assert (ANY_RETURN_P (pat));
> -  JUMP_LABEL (jump) = pat;
> -}
>
>  /* Set JUMP_LABEL for a return insn.  */
>
> @@ -5811,135 +5768,6 @@ set_return_jump_label (rtx_insn *returnjump)
>      JUMP_LABEL (returnjump) = ret_rtx;
>  }
>
> -/* Return true if there are any active insns between HEAD and TAIL.  */
> -bool
> -active_insn_between (rtx_insn *head, rtx_insn *tail)
> -{
> -  while (tail)
> -    {
> -      if (active_insn_p (tail))
> -       return true;
> -      if (tail == head)
> -       return false;
> -      tail = PREV_INSN (tail);
> -    }
> -  return false;
> -}
> -
> -/* LAST_BB is a block that exits, and empty of active instructions.
> -   Examine its predecessors for jumps that can be converted to
> -   (conditional) returns.  */
> -vec<edge>
> -convert_jumps_to_returns (basic_block last_bb, bool simple_p,
> -                         vec<edge> unconverted ATTRIBUTE_UNUSED)
> -{
> -  int i;
> -  basic_block bb;
> -  edge_iterator ei;
> -  edge e;
> -  auto_vec<basic_block> src_bbs (EDGE_COUNT (last_bb->preds));
> -
> -  FOR_EACH_EDGE (e, ei, last_bb->preds)
> -    if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
> -      src_bbs.quick_push (e->src);
> -
> -  rtx_insn *label = BB_HEAD (last_bb);
> -
> -  FOR_EACH_VEC_ELT (src_bbs, i, bb)
> -    {
> -      rtx_insn *jump = BB_END (bb);
> -
> -      if (!JUMP_P (jump) || JUMP_LABEL (jump) != label)
> -       continue;
> -
> -      e = find_edge (bb, last_bb);
> -
> -      /* If we have an unconditional jump, we can replace that
> -        with a simple return instruction.  */
> -      if (simplejump_p (jump))
> -       {
> -         /* The use of the return register might be present in the exit
> -            fallthru block.  Either:
> -            - removing the use is safe, and we should remove the use in
> -            the exit fallthru block, or
> -            - removing the use is not safe, and we should add it here.
> -            For now, we conservatively choose the latter.  Either of the
> -            2 helps in crossjumping.  */
> -         emit_use_return_register_into_block (bb);
> -
> -         emit_return_into_block (simple_p, bb);
> -         delete_insn (jump);
> -       }
> -
> -      /* If we have a conditional jump branching to the last
> -        block, we can try to replace that with a conditional
> -        return instruction.  */
> -      else if (condjump_p (jump))
> -       {
> -         rtx dest;
> -
> -         if (simple_p)
> -           dest = simple_return_rtx;
> -         else
> -           dest = ret_rtx;
> -         if (!redirect_jump (as_a <rtx_jump_insn *> (jump), dest, 0))
> -           {
> -             if (targetm.have_simple_return () && simple_p)
> -               {
> -                 if (dump_file)
> -                   fprintf (dump_file,
> -                            "Failed to redirect bb %d branch.\n", bb->index);
> -                 unconverted.safe_push (e);
> -               }
> -             continue;
> -           }
> -
> -         /* See comment in simplejump_p case above.  */
> -         emit_use_return_register_into_block (bb);
> -
> -         /* If this block has only one successor, it both jumps
> -            and falls through to the fallthru block, so we can't
> -            delete the edge.  */
> -         if (single_succ_p (bb))
> -           continue;
> -       }
> -      else
> -       {
> -         if (targetm.have_simple_return () && simple_p)
> -           {
> -             if (dump_file)
> -               fprintf (dump_file,
> -                        "Failed to redirect bb %d branch.\n", bb->index);
> -             unconverted.safe_push (e);
> -           }
> -         continue;
> -       }
> -
> -      /* Fix up the CFG for the successful change we just made.  */
> -      redirect_edge_succ (e, EXIT_BLOCK_PTR_FOR_FN (cfun));
> -      e->flags &= ~EDGE_CROSSING;
> -    }
> -  src_bbs.release ();
> -  return unconverted;
> -}
> -
> -/* Emit a return insn for the exit fallthru block.  */
> -basic_block
> -emit_return_for_exit (edge exit_fallthru_edge, bool simple_p)
> -{
> -  basic_block last_bb = exit_fallthru_edge->src;
> -
> -  if (JUMP_P (BB_END (last_bb)))
> -    {
> -      last_bb = split_edge (exit_fallthru_edge);
> -      exit_fallthru_edge = single_succ_edge (last_bb);
> -    }
> -  emit_barrier_after (BB_END (last_bb));
> -  emit_return_into_block (simple_p, last_bb);
> -  exit_fallthru_edge->flags &= ~EDGE_FALLTHRU;
> -  return last_bb;
> -}
> -
>
>  /* Generate the prologue and epilogue RTL if the machine supports it.  Thread
>     this into place with notes indicating where the prologue ends and where
> @@ -5993,7 +5821,6 @@ void
>  thread_prologue_and_epilogue_insns (void)
>  {
>    bool inserted;
> -  vec<edge> unconverted_simple_returns = vNULL;
>    bitmap_head bb_flags;
>    rtx_insn *returnjump;
>    rtx_insn *epilogue_end ATTRIBUTE_UNUSED;
> @@ -6088,40 +5915,8 @@ thread_prologue_and_epilogue_insns (void)
>
>    exit_fallthru_edge = find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
>
> -  if (targetm.have_simple_return () && entry_edge != orig_entry_edge)
> -    exit_fallthru_edge
> -       = get_unconverted_simple_return (exit_fallthru_edge, bb_flags,
> -                                        &unconverted_simple_returns,
> -                                        &returnjump);
> -  if (targetm.have_return ())
> -    {
> -      if (exit_fallthru_edge == NULL)
> -       goto epilogue_done;
> -
> -      if (optimize)
> -       {
> -         basic_block last_bb = exit_fallthru_edge->src;
> -
> -         if (LABEL_P (BB_HEAD (last_bb))
> -             && !active_insn_between (BB_HEAD (last_bb), BB_END (last_bb)))
> -           convert_jumps_to_returns (last_bb, false, vNULL);
> -
> -         if (EDGE_COUNT (last_bb->preds) != 0
> -             && single_succ_p (last_bb))
> -           {
> -             last_bb = emit_return_for_exit (exit_fallthru_edge, false);
> -             epilogue_end = returnjump = BB_END (last_bb);
> -
> -             /* Emitting the return may add a basic block.
> -                Fix bb_flags for the added block.  */
> -             if (targetm.have_simple_return ()
> -                 && last_bb != exit_fallthru_edge->src)
> -               bitmap_set_bit (&bb_flags, last_bb->index);
> -
> -             goto epilogue_done;
> -           }
> -       }
> -    }
> +  if (targetm.have_return () && exit_fallthru_edge == NULL)
> +    goto epilogue_done;
>
>    /* A small fib -- epilogue is not yet completed, but we wish to re-use
>       this marker for the splits of EH_RETURN patterns, and nothing else
> @@ -6229,10 +6024,6 @@ epilogue_done:
>         }
>      }
>
> -  if (targetm.have_simple_return ())
> -    convert_to_simple_return (entry_edge, orig_entry_edge, bb_flags,
> -                             returnjump, unconverted_simple_returns);
> -
>    /* Emit sibling epilogues before any sibling call sites.  */
>    for (ei = ei_start (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); (e =
>                                                              ei_safe_edge (ei));
> diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c
> index fe79519..0ba1fed 100644
> --- a/gcc/shrink-wrap.c
> +++ b/gcc/shrink-wrap.c
> @@ -946,10 +946,9 @@ try_shrink_wrapping (edge *entry_edge, bitmap_head *bb_with,
>         redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
>        }
>
> -  /* Change all the exits that should get a simple_return to FAKE.
> -     They will be converted later.  */
> +  /* Make a simple_return for those exits that run without prologue.  */
>
> -  FOR_EACH_BB_FN (bb, cfun)
> +  FOR_EACH_BB_REVERSE_FN (bb, cfun)
>      if (!bitmap_bit_p (bb_with, bb->index))
>        FOR_EACH_EDGE (e, ei, bb->succs)
>         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
> @@ -958,7 +957,18 @@ try_shrink_wrapping (edge *entry_edge, bitmap_head *bb_with,
>
>             e->flags &= ~EDGE_FALLTHRU;
>             if (!(e->flags & EDGE_SIBCALL))
> -             e->flags |= EDGE_FAKE;
> +             {
> +               rtx_insn *ret = targetm.gen_simple_return ();
> +               rtx_insn *end = BB_END (e->src);
> +               rtx_jump_insn *start = emit_jump_insn_after (ret, end);
> +               JUMP_LABEL (start) = simple_return_rtx;
> +               e->flags &= ~EDGE_FAKE;
> +
> +               if (dump_file)
> +                 fprintf (dump_file,
> +                          "Made simple_return with UID %d in bb %d\n",
> +                          INSN_UID (start), e->src->index);
> +             }
>
>             emit_barrier_after_bb (e->src);
>           }
> @@ -996,156 +1006,3 @@ try_shrink_wrapping (edge *entry_edge, bitmap_head *bb_with,
>
>    free_dominance_info (CDI_DOMINATORS);
>  }
> -
> -/* If we're allowed to generate a simple return instruction, then by
> -   definition we don't need a full epilogue.  If the last basic
> -   block before the exit block does not contain active instructions,
> -   examine its predecessors and try to emit (conditional) return
> -   instructions.  */
> -
> -edge
> -get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
> -                              vec<edge> *unconverted_simple_returns,
> -                              rtx_insn **returnjump)
> -{
> -  if (optimize)
> -    {
> -      unsigned i, last;
> -
> -      /* convert_jumps_to_returns may add to preds of the exit block
> -         (but won't remove).  Stop at end of current preds.  */
> -      last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
> -      for (i = 0; i < last; i++)
> -       {
> -         edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
> -         if (LABEL_P (BB_HEAD (e->src))
> -             && !bitmap_bit_p (&bb_flags, e->src->index)
> -             && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
> -           *unconverted_simple_returns
> -                 = convert_jumps_to_returns (e->src, true,
> -                                             *unconverted_simple_returns);
> -       }
> -    }
> -
> -  if (exit_fallthru_edge != NULL
> -      && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
> -      && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
> -    {
> -      basic_block last_bb;
> -
> -      last_bb = emit_return_for_exit (exit_fallthru_edge, true);
> -      *returnjump = BB_END (last_bb);
> -      exit_fallthru_edge = NULL;
> -    }
> -  return exit_fallthru_edge;
> -}
> -
> -/* If there were branches to an empty LAST_BB which we tried to
> -   convert to conditional simple_returns, but couldn't for some
> -   reason, create a block to hold a simple_return insn and redirect
> -   those remaining edges.  */
> -
> -void
> -convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
> -                         bitmap_head bb_flags, rtx_insn *returnjump,
> -                         vec<edge> unconverted_simple_returns)
> -{
> -  edge e;
> -  edge_iterator ei;
> -
> -  if (!unconverted_simple_returns.is_empty ())
> -    {
> -      basic_block simple_return_block_hot = NULL;
> -      basic_block simple_return_block_cold = NULL;
> -      edge pending_edge_hot = NULL;
> -      edge pending_edge_cold = NULL;
> -      basic_block exit_pred;
> -      int i;
> -
> -      gcc_assert (entry_edge != orig_entry_edge);
> -
> -      /* See if we can reuse the last insn that was emitted for the
> -        epilogue.  */
> -      if (returnjump != NULL_RTX
> -         && JUMP_LABEL (returnjump) == simple_return_rtx)
> -       {
> -         e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
> -         if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
> -           simple_return_block_hot = e->dest;
> -         else
> -           simple_return_block_cold = e->dest;
> -       }
> -
> -      /* Also check returns we might need to add to tail blocks.  */
> -      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
> -       if (EDGE_COUNT (e->src->preds) != 0
> -           && (e->flags & EDGE_FAKE) != 0
> -           && !bitmap_bit_p (&bb_flags, e->src->index))
> -         {
> -           if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
> -             pending_edge_hot = e;
> -           else
> -             pending_edge_cold = e;
> -         }
> -
> -      /* Save a pointer to the exit's predecessor BB for use in
> -         inserting new BBs at the end of the function.  Do this
> -         after the call to split_block above which may split
> -         the original exit pred.  */
> -      exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
> -
> -      FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
> -       {
> -         basic_block *pdest_bb;
> -         edge pending;
> -
> -         if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
> -           {
> -             pdest_bb = &simple_return_block_hot;
> -             pending = pending_edge_hot;
> -           }
> -         else
> -           {
> -             pdest_bb = &simple_return_block_cold;
> -             pending = pending_edge_cold;
> -           }
> -
> -         if (*pdest_bb == NULL && pending != NULL)
> -           {
> -             emit_return_into_block (true, pending->src);
> -             pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
> -             *pdest_bb = pending->src;
> -           }
> -         else if (*pdest_bb == NULL)
> -           {
> -             basic_block bb;
> -
> -             bb = create_basic_block (NULL, NULL, exit_pred);
> -             BB_COPY_PARTITION (bb, e->src);
> -             rtx_insn *ret = targetm.gen_simple_return ();
> -             rtx_jump_insn *start = emit_jump_insn_after (ret, BB_END (bb));
> -             JUMP_LABEL (start) = simple_return_rtx;
> -             emit_barrier_after (start);
> -
> -             *pdest_bb = bb;
> -             make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
> -           }
> -         redirect_edge_and_branch_force (e, *pdest_bb);
> -       }
> -      unconverted_simple_returns.release ();
> -    }
> -
> -  if (entry_edge != orig_entry_edge)
> -    {
> -      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
> -       if (EDGE_COUNT (e->src->preds) != 0
> -           && (e->flags & EDGE_FAKE) != 0
> -           && !bitmap_bit_p (&bb_flags, e->src->index))
> -         {
> -           e = fix_fake_fallthrough_edge (e);
> -
> -           emit_return_into_block (true, e->src);
> -           e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
> -         }
> -    }
> -}
> diff --git a/gcc/shrink-wrap.h b/gcc/shrink-wrap.h
> index 6e7a4f7..4d821d7 100644
> --- a/gcc/shrink-wrap.h
> +++ b/gcc/shrink-wrap.h
> @@ -26,12 +26,6 @@ along with GCC; see the file COPYING3.  If not see
>  extern bool requires_stack_frame_p (rtx_insn *, HARD_REG_SET, HARD_REG_SET);
>  extern void try_shrink_wrapping (edge *entry_edge, bitmap_head *bb_flags,
>                                  rtx_insn *prologue_seq);
> -extern edge get_unconverted_simple_return (edge, bitmap_head,
> -                                          vec<edge> *, rtx_insn **);
> -extern void convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
> -                                     bitmap_head bb_flags,
> -                                     rtx_insn *returnjump,
> -                                     vec<edge> unconverted_simple_returns);
>  #define SHRINK_WRAPPING_ENABLED \
>    (flag_shrink_wrap && targetm.have_simple_return ())
>
> --
> 1.9.3
>
Segher Boessenkool May 9, 2016, 3:08 p.m. UTC | #2
Hi Christophe,

On Mon, May 09, 2016 at 03:54:26PM +0200, Christophe Lyon wrote:
> After this patch, I've noticed that
> gcc.target/arm/pr43920-2.c
> now fails at:
> /* { dg-final { scan-assembler-times "pop" 2 } } */
> 
> Before the patch, the generated code was:
> [...]
>         pop     {r3, r4, r5, r6, r7, pc}
> .L4:
>         mov     r0, #-1
> .L1:
>         pop     {r3, r4, r5, r6, r7, pc}
> 
> it is now:
> [...]
> .L1:
>         pop     {r3, r4, r5, r6, r7, pc}
> .L4:
>         mov     r0, #-1
>         b       .L1
> 
> The new version does not seem better, as it adds a branch on the path
> and it is not smaller.

That looks like bb-reorder isn't doing its job?  Maybe it thinks that
pop is too expensive to copy?


Segher
Jiong Wang May 11, 2016, 11:52 a.m. UTC | #3
On 09/05/16 16:08, Segher Boessenkool wrote:
> Hi Christophe,
>
> On Mon, May 09, 2016 at 03:54:26PM +0200, Christophe Lyon wrote:
>> After this patch, I've noticed that
>> gcc.target/arm/pr43920-2.c
>> now fails at:
>> /* { dg-final { scan-assembler-times "pop" 2 } } */
>>
>> Before the patch, the generated code was:
>> [...]
>>          pop     {r3, r4, r5, r6, r7, pc}
>> .L4:
>>          mov     r0, #-1
>> .L1:
>>          pop     {r3, r4, r5, r6, r7, pc}
>>
>> it is now:
>> [...]
>> .L1:
>>          pop     {r3, r4, r5, r6, r7, pc}
>> .L4:
>>          mov     r0, #-1
>>          b       .L1
>>
>> The new version does not seem better, as it adds a branch on the path
>> and it is not smaller.
> That looks like bb-reorder isn't doing its job?  Maybe it thinks that
> pop is too expensive to copy?
I think so. Filed PR71061

ARM backend is not setting the length attribute correctly, that the bb
failed copy_bb_p check.

Unfortunately I am afraid even we fixed the backend length issue, this 
testcase
will keep failing, because it's specify "-Os" that some bb copy won't be 
triggerd.
diff mbox

Patch

diff --git a/gcc/function.c b/gcc/function.c
index f6eb56c..b9a6338 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -5753,49 +5753,6 @@  prologue_epilogue_contains (const_rtx insn)
   return 0;
 }
 
-/* Insert use of return register before the end of BB.  */
-
-static void
-emit_use_return_register_into_block (basic_block bb)
-{
-  start_sequence ();
-  use_return_register ();
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
-  rtx_insn *insn = BB_END (bb);
-  if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, PATTERN (insn)))
-    insn = prev_cc0_setter (insn);
-
-  emit_insn_before (seq, insn);
-}
-
-
-/* Create a return pattern, either simple_return or return, depending on
-   simple_p.  */
-
-static rtx_insn *
-gen_return_pattern (bool simple_p)
-{
-  return (simple_p
-	  ? targetm.gen_simple_return ()
-	  : targetm.gen_return ());
-}
-
-/* Insert an appropriate return pattern at the end of block BB.  This
-   also means updating block_for_insn appropriately.  SIMPLE_P is
-   the same as in gen_return_pattern and passed to it.  */
-
-void
-emit_return_into_block (bool simple_p, basic_block bb)
-{
-  rtx_jump_insn *jump = emit_jump_insn_after (gen_return_pattern (simple_p),
-					      BB_END (bb));
-  rtx pat = PATTERN (jump);
-  if (GET_CODE (pat) == PARALLEL)
-    pat = XVECEXP (pat, 0, 0);
-  gcc_assert (ANY_RETURN_P (pat));
-  JUMP_LABEL (jump) = pat;
-}
 
 /* Set JUMP_LABEL for a return insn.  */
 
@@ -5811,135 +5768,6 @@  set_return_jump_label (rtx_insn *returnjump)
     JUMP_LABEL (returnjump) = ret_rtx;
 }
 
-/* Return true if there are any active insns between HEAD and TAIL.  */
-bool
-active_insn_between (rtx_insn *head, rtx_insn *tail)
-{
-  while (tail)
-    {
-      if (active_insn_p (tail))
-	return true;
-      if (tail == head)
-	return false;
-      tail = PREV_INSN (tail);
-    }
-  return false;
-}
-
-/* LAST_BB is a block that exits, and empty of active instructions.
-   Examine its predecessors for jumps that can be converted to
-   (conditional) returns.  */
-vec<edge>
-convert_jumps_to_returns (basic_block last_bb, bool simple_p,
-			  vec<edge> unconverted ATTRIBUTE_UNUSED)
-{
-  int i;
-  basic_block bb;
-  edge_iterator ei;
-  edge e;
-  auto_vec<basic_block> src_bbs (EDGE_COUNT (last_bb->preds));
-
-  FOR_EACH_EDGE (e, ei, last_bb->preds)
-    if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
-      src_bbs.quick_push (e->src);
-
-  rtx_insn *label = BB_HEAD (last_bb);
-
-  FOR_EACH_VEC_ELT (src_bbs, i, bb)
-    {
-      rtx_insn *jump = BB_END (bb);
-
-      if (!JUMP_P (jump) || JUMP_LABEL (jump) != label)
-	continue;
-
-      e = find_edge (bb, last_bb);
-
-      /* If we have an unconditional jump, we can replace that
-	 with a simple return instruction.  */
-      if (simplejump_p (jump))
-	{
-	  /* The use of the return register might be present in the exit
-	     fallthru block.  Either:
-	     - removing the use is safe, and we should remove the use in
-	     the exit fallthru block, or
-	     - removing the use is not safe, and we should add it here.
-	     For now, we conservatively choose the latter.  Either of the
-	     2 helps in crossjumping.  */
-	  emit_use_return_register_into_block (bb);
-
-	  emit_return_into_block (simple_p, bb);
-	  delete_insn (jump);
-	}
-
-      /* If we have a conditional jump branching to the last
-	 block, we can try to replace that with a conditional
-	 return instruction.  */
-      else if (condjump_p (jump))
-	{
-	  rtx dest;
-
-	  if (simple_p)
-	    dest = simple_return_rtx;
-	  else
-	    dest = ret_rtx;
-	  if (!redirect_jump (as_a <rtx_jump_insn *> (jump), dest, 0))
-	    {
-	      if (targetm.have_simple_return () && simple_p)
-		{
-		  if (dump_file)
-		    fprintf (dump_file,
-			     "Failed to redirect bb %d branch.\n", bb->index);
-		  unconverted.safe_push (e);
-		}
-	      continue;
-	    }
-
-	  /* See comment in simplejump_p case above.  */
-	  emit_use_return_register_into_block (bb);
-
-	  /* If this block has only one successor, it both jumps
-	     and falls through to the fallthru block, so we can't
-	     delete the edge.  */
-	  if (single_succ_p (bb))
-	    continue;
-	}
-      else
-	{
-	  if (targetm.have_simple_return () && simple_p)
-	    {
-	      if (dump_file)
-		fprintf (dump_file,
-			 "Failed to redirect bb %d branch.\n", bb->index);
-	      unconverted.safe_push (e);
-	    }
-	  continue;
-	}
-
-      /* Fix up the CFG for the successful change we just made.  */
-      redirect_edge_succ (e, EXIT_BLOCK_PTR_FOR_FN (cfun));
-      e->flags &= ~EDGE_CROSSING;
-    }
-  src_bbs.release ();
-  return unconverted;
-}
-
-/* Emit a return insn for the exit fallthru block.  */
-basic_block
-emit_return_for_exit (edge exit_fallthru_edge, bool simple_p)
-{
-  basic_block last_bb = exit_fallthru_edge->src;
-
-  if (JUMP_P (BB_END (last_bb)))
-    {
-      last_bb = split_edge (exit_fallthru_edge);
-      exit_fallthru_edge = single_succ_edge (last_bb);
-    }
-  emit_barrier_after (BB_END (last_bb));
-  emit_return_into_block (simple_p, last_bb);
-  exit_fallthru_edge->flags &= ~EDGE_FALLTHRU;
-  return last_bb;
-}
-
 
 /* Generate the prologue and epilogue RTL if the machine supports it.  Thread
    this into place with notes indicating where the prologue ends and where
@@ -5993,7 +5821,6 @@  void
 thread_prologue_and_epilogue_insns (void)
 {
   bool inserted;
-  vec<edge> unconverted_simple_returns = vNULL;
   bitmap_head bb_flags;
   rtx_insn *returnjump;
   rtx_insn *epilogue_end ATTRIBUTE_UNUSED;
@@ -6088,40 +5915,8 @@  thread_prologue_and_epilogue_insns (void)
 
   exit_fallthru_edge = find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
 
-  if (targetm.have_simple_return () && entry_edge != orig_entry_edge)
-    exit_fallthru_edge
-	= get_unconverted_simple_return (exit_fallthru_edge, bb_flags,
-					 &unconverted_simple_returns,
-					 &returnjump);
-  if (targetm.have_return ())
-    {
-      if (exit_fallthru_edge == NULL)
-	goto epilogue_done;
-
-      if (optimize)
-	{
-	  basic_block last_bb = exit_fallthru_edge->src;
-
-	  if (LABEL_P (BB_HEAD (last_bb))
-	      && !active_insn_between (BB_HEAD (last_bb), BB_END (last_bb)))
-	    convert_jumps_to_returns (last_bb, false, vNULL);
-
-	  if (EDGE_COUNT (last_bb->preds) != 0
-	      && single_succ_p (last_bb))
-	    {
-	      last_bb = emit_return_for_exit (exit_fallthru_edge, false);
-	      epilogue_end = returnjump = BB_END (last_bb);
-
-	      /* Emitting the return may add a basic block.
-		 Fix bb_flags for the added block.  */
-	      if (targetm.have_simple_return ()
-		  && last_bb != exit_fallthru_edge->src)
-		bitmap_set_bit (&bb_flags, last_bb->index);
-
-	      goto epilogue_done;
-	    }
-	}
-    }
+  if (targetm.have_return () && exit_fallthru_edge == NULL)
+    goto epilogue_done;
 
   /* A small fib -- epilogue is not yet completed, but we wish to re-use
      this marker for the splits of EH_RETURN patterns, and nothing else
@@ -6229,10 +6024,6 @@  epilogue_done:
 	}
     }
 
-  if (targetm.have_simple_return ())
-    convert_to_simple_return (entry_edge, orig_entry_edge, bb_flags,
-			      returnjump, unconverted_simple_returns);
-
   /* Emit sibling epilogues before any sibling call sites.  */
   for (ei = ei_start (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); (e =
 							     ei_safe_edge (ei));
diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c
index fe79519..0ba1fed 100644
--- a/gcc/shrink-wrap.c
+++ b/gcc/shrink-wrap.c
@@ -946,10 +946,9 @@  try_shrink_wrapping (edge *entry_edge, bitmap_head *bb_with,
 	redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
       }
 
-  /* Change all the exits that should get a simple_return to FAKE.
-     They will be converted later.  */
+  /* Make a simple_return for those exits that run without prologue.  */
 
-  FOR_EACH_BB_FN (bb, cfun)
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
     if (!bitmap_bit_p (bb_with, bb->index))
       FOR_EACH_EDGE (e, ei, bb->succs)
 	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
@@ -958,7 +957,18 @@  try_shrink_wrapping (edge *entry_edge, bitmap_head *bb_with,
 
 	    e->flags &= ~EDGE_FALLTHRU;
 	    if (!(e->flags & EDGE_SIBCALL))
-	      e->flags |= EDGE_FAKE;
+	      {
+		rtx_insn *ret = targetm.gen_simple_return ();
+		rtx_insn *end = BB_END (e->src);
+		rtx_jump_insn *start = emit_jump_insn_after (ret, end);
+		JUMP_LABEL (start) = simple_return_rtx;
+		e->flags &= ~EDGE_FAKE;
+
+		if (dump_file)
+		  fprintf (dump_file,
+			   "Made simple_return with UID %d in bb %d\n",
+			   INSN_UID (start), e->src->index);
+	      }
 
 	    emit_barrier_after_bb (e->src);
 	  }
@@ -996,156 +1006,3 @@  try_shrink_wrapping (edge *entry_edge, bitmap_head *bb_with,
 
   free_dominance_info (CDI_DOMINATORS);
 }
-
-/* If we're allowed to generate a simple return instruction, then by
-   definition we don't need a full epilogue.  If the last basic
-   block before the exit block does not contain active instructions,
-   examine its predecessors and try to emit (conditional) return
-   instructions.  */
-
-edge
-get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags,
-			       vec<edge> *unconverted_simple_returns,
-			       rtx_insn **returnjump)
-{
-  if (optimize)
-    {
-      unsigned i, last;
-
-      /* convert_jumps_to_returns may add to preds of the exit block
-         (but won't remove).  Stop at end of current preds.  */
-      last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds);
-      for (i = 0; i < last; i++)
-	{
-	  edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i);
-	  if (LABEL_P (BB_HEAD (e->src))
-	      && !bitmap_bit_p (&bb_flags, e->src->index)
-	      && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
-	    *unconverted_simple_returns
-		  = convert_jumps_to_returns (e->src, true,
-					      *unconverted_simple_returns);
-	}
-    }
-
-  if (exit_fallthru_edge != NULL
-      && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
-      && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
-    {
-      basic_block last_bb;
-
-      last_bb = emit_return_for_exit (exit_fallthru_edge, true);
-      *returnjump = BB_END (last_bb);
-      exit_fallthru_edge = NULL;
-    }
-  return exit_fallthru_edge;
-}
-
-/* If there were branches to an empty LAST_BB which we tried to
-   convert to conditional simple_returns, but couldn't for some
-   reason, create a block to hold a simple_return insn and redirect
-   those remaining edges.  */
-
-void
-convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
-			  bitmap_head bb_flags, rtx_insn *returnjump,
-			  vec<edge> unconverted_simple_returns)
-{
-  edge e;
-  edge_iterator ei;
-
-  if (!unconverted_simple_returns.is_empty ())
-    {
-      basic_block simple_return_block_hot = NULL;
-      basic_block simple_return_block_cold = NULL;
-      edge pending_edge_hot = NULL;
-      edge pending_edge_cold = NULL;
-      basic_block exit_pred;
-      int i;
-
-      gcc_assert (entry_edge != orig_entry_edge);
-
-      /* See if we can reuse the last insn that was emitted for the
-	 epilogue.  */
-      if (returnjump != NULL_RTX
-	  && JUMP_LABEL (returnjump) == simple_return_rtx)
-	{
-	  e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
-	  if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
-	    simple_return_block_hot = e->dest;
-	  else
-	    simple_return_block_cold = e->dest;
-	}
-
-      /* Also check returns we might need to add to tail blocks.  */
-      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
-	if (EDGE_COUNT (e->src->preds) != 0
-	    && (e->flags & EDGE_FAKE) != 0
-	    && !bitmap_bit_p (&bb_flags, e->src->index))
-	  {
-	    if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
-	      pending_edge_hot = e;
-	    else
-	      pending_edge_cold = e;
-	  }
-
-      /* Save a pointer to the exit's predecessor BB for use in
-         inserting new BBs at the end of the function.  Do this
-         after the call to split_block above which may split
-         the original exit pred.  */
-      exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
-
-      FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e)
-	{
-	  basic_block *pdest_bb;
-	  edge pending;
-
-	  if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
-	    {
-	      pdest_bb = &simple_return_block_hot;
-	      pending = pending_edge_hot;
-	    }
-	  else
-	    {
-	      pdest_bb = &simple_return_block_cold;
-	      pending = pending_edge_cold;
-	    }
-
-	  if (*pdest_bb == NULL && pending != NULL)
-	    {
-	      emit_return_into_block (true, pending->src);
-	      pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
-	      *pdest_bb = pending->src;
-	    }
-	  else if (*pdest_bb == NULL)
-	    {
-	      basic_block bb;
-
-	      bb = create_basic_block (NULL, NULL, exit_pred);
-	      BB_COPY_PARTITION (bb, e->src);
-	      rtx_insn *ret = targetm.gen_simple_return ();
-	      rtx_jump_insn *start = emit_jump_insn_after (ret, BB_END (bb));
-	      JUMP_LABEL (start) = simple_return_rtx;
-	      emit_barrier_after (start);
-
-	      *pdest_bb = bb;
-	      make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
-	    }
-	  redirect_edge_and_branch_force (e, *pdest_bb);
-	}
-      unconverted_simple_returns.release ();
-    }
-
-  if (entry_edge != orig_entry_edge)
-    {
-      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
-	if (EDGE_COUNT (e->src->preds) != 0
-	    && (e->flags & EDGE_FAKE) != 0
-	    && !bitmap_bit_p (&bb_flags, e->src->index))
-	  {
-	    e = fix_fake_fallthrough_edge (e);
-
-	    emit_return_into_block (true, e->src);
-	    e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
-	  }
-    }
-}
diff --git a/gcc/shrink-wrap.h b/gcc/shrink-wrap.h
index 6e7a4f7..4d821d7 100644
--- a/gcc/shrink-wrap.h
+++ b/gcc/shrink-wrap.h
@@ -26,12 +26,6 @@  along with GCC; see the file COPYING3.  If not see
 extern bool requires_stack_frame_p (rtx_insn *, HARD_REG_SET, HARD_REG_SET);
 extern void try_shrink_wrapping (edge *entry_edge, bitmap_head *bb_flags,
 				 rtx_insn *prologue_seq);
-extern edge get_unconverted_simple_return (edge, bitmap_head,
-					   vec<edge> *, rtx_insn **);
-extern void convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
-				      bitmap_head bb_flags,
-				      rtx_insn *returnjump,
-				      vec<edge> unconverted_simple_returns);
 #define SHRINK_WRAPPING_ENABLED \
   (flag_shrink_wrap && targetm.have_simple_return ())