Patchwork combine/dce patch for PR36003, PR42575

login
register
mail settings
Submitter Bernd Schmidt
Date June 25, 2010, 10:38 a.m.
Message ID <4C24873D.6050405@codesourcery.com>
Download mbox | patch
Permalink /patch/56899/
State New
Headers show

Comments

Bernd Schmidt - June 25, 2010, 10:38 a.m.
On 06/24/2010 03:16 PM, Eric Botcazou wrote:
>> I can tack it onto something else, but I don't see how this would reduce
>> the amount of work we need to do?
> 
> The overhead of traversing the RTL or invoking DF isn't negligible so if we 
> can avoid doing it one more time...  Steven's measurements showed that the RTL 
> optimizers still consume 40-45% of the compilation time at -O2.

Surely that's for RA/reload/sched which are naturally expensive?  Things
like lower-subreg, ifcvt, dce tend to show up with 0% when I look at the
time report if they show up with any time at all.

Anyhow.  Here's a very simple implementation, grafted onto lower-subreg,
which gets rid of the unnecessary insn in PR42575.  It also gets rid of
all the byte_lr code :)  I have another version where it's done as an
extra pass inside DCE.

I've bootstrapped this on i686-linux and ran regression tests for my
usual three set of target options on arm-linux on a slightly earlier
version of the patch.


Bernd
* lower-subreg.c: Include "dce.h".
	(mark_ref): New static function.
	(decompose_multiword_subregs): Add a small DCE pass on subregs of
	multiword regs to the initial scan.
	* Makefile.in (lower-subreg.o): Add "dce.h" to dependencies.
	(OBJS-common): Remove df-byte-scan.o.
	(df-byte-scan.o): Remove.
	* dce.c (dce_tmp_bitmap_obstack): No longer static.
	(dce_marked_insn_p): Renamed from marked_insn_p.  All callers changed.  No
	longer static.
	(prescan_insns_for_dce): No longer static.  Add arg STACK_ARGS, use it
	to decide whether to detect argument stores.  All callers changed.
	(init_dce, fini_dce): No longer static.
	(byte_dce_process_block): Delete.
	(fast_dce): Remove BYTE_LEVEL arg.  All callers changed.  Remove
	support for byte-level DCE.
	(rest_of_handle_fast_byte_dce): Delete.
	(pass_fast_rtl_byte_dce): Delete.
	* dce.h (dce_tmp_bitmap_obstack): Declare.
	(init_dce, fini_dce, prescan_insns_for_dce, dce_marked_insn_p): Declare.
	* df-core.c (df_print_byte_regset): Delete.
	* df.h (DF_BYTE_LR): Delete.
	(DF_NOTE, DF_MD): Renumber.
	(DF_BYTE_LR_BB_INFO, DF_BYTE_LR_IN, DF_BYTE_LR_OUT): Delete.
	(df_byte_lr_bb_info): Delete.
	(df_byte_lr): Delete.
	(df_print_byte_regset, df_byte_lr_add_problem,
	df_byte_lr_get_regno_start, df_byte_lr_get_regno_len,
	df_byte_lr_simulate_defs, df_byte_lr_simulate_uses,
	df_byte_lr_simulate_artificial_refs_at_top,
	df_byte_lr_simulate_artificial_refs_at_end, df_compute_accessed_bytes):
	Don't declare.
	(df_byte_lr_get_bb_info): Delete.
	* df-problems.c (df_byte_lr_problem_data): Delete.
	(df_byte_lr_add_problem, df_byte_lr_get_regno_start,
	df_byte_lr_get_regno_len, df_byte_lr_simulate_defs,
	df_byte_lr_simulate_uses, df_byte_lr_simulate_artificial_refs_at_top,
	df_byte_lr_simulate_artificial_refs_at_end, df_compute_accessed_bytes,
	df_byte_lr_free_bb_info, df_byte_lr_check_regs,
	df_byte_lr_expand_bitmap, df_byte_lr_alloc, df_byte_lr_reset,
	df_byte_lr_bb_local_compute, df_byte_lr_local_compute, df_byte_lr_init,
	df_byte_lr_confluence_0, df_byte_lr_confluence_n,
	df_byte_lr_transfer_function, df_byte_lr_free, df_byte_lr_top_dump,
	df_byte_lr_bottom_dump): Delete functions.
	(df_problem_BYTE_LR): Delete.

Patch

Index: tree-pass.h
===================================================================
--- tree-pass.h	(revision 161116)
+++ tree-pass.h	(working copy)
@@ -521,7 +521,6 @@  extern struct rtl_opt_pass pass_partitio
 extern struct rtl_opt_pass pass_match_asm_constraints;
 extern struct rtl_opt_pass pass_regmove;
 extern struct rtl_opt_pass pass_split_all_insns;
-extern struct rtl_opt_pass pass_fast_rtl_byte_dce;
 extern struct rtl_opt_pass pass_lower_subreg2;
 extern struct rtl_opt_pass pass_mode_switching;
 extern struct rtl_opt_pass pass_sms;
Index: df-core.c
===================================================================
--- df-core.c	(revision 161116)
+++ df-core.c	(working copy)
@@ -1856,69 +1856,6 @@  df_print_regset (FILE *file, bitmap r)
 }
 
 
-/* Write information about registers and basic blocks into FILE.  The
-   bitmap is in the form used by df_byte_lr.  This is part of making a
-   debugging dump.  */
-
-void
-df_print_byte_regset (FILE *file, bitmap r)
-{
-  unsigned int max_reg = max_reg_num ();
-  bitmap_iterator bi;
-
-  if (r == NULL)
-    fputs (" (nil)", file);
-  else
-    {
-      unsigned int i;
-      for (i = 0; i < max_reg; i++)
-	{
-	  unsigned int first = df_byte_lr_get_regno_start (i);
-	  unsigned int len = df_byte_lr_get_regno_len (i);
-
-	  if (len > 1)
-	    {
-	      bool found = false;
-	      unsigned int j;
-
-	      EXECUTE_IF_SET_IN_BITMAP (r, first, j, bi)
-		{
-		  found = j < first + len;
-		  break;
-		}
-	      if (found)
-		{
-		  const char * sep = "";
-		  fprintf (file, " %d", i);
-		  if (i < FIRST_PSEUDO_REGISTER)
-		    fprintf (file, " [%s]", reg_names[i]);
-		  fprintf (file, "(");
-		  EXECUTE_IF_SET_IN_BITMAP (r, first, j, bi)
-		    {
-		      if (j > first + len - 1)
-			break;
-		      fprintf (file, "%s%d", sep, j-first);
-		      sep = ", ";
-		    }
-		  fprintf (file, ")");
-		}
-	    }
-	  else
-	    {
-	      if (bitmap_bit_p (r, first))
-		{
-		  fprintf (file, " %d", i);
-		  if (i < FIRST_PSEUDO_REGISTER)
-		    fprintf (file, " [%s]", reg_names[i]);
-		}
-	    }
-
-	}
-    }
-  fprintf (file, "\n");
-}
-
-
 /* Dump dataflow info.  */
 
 void
Index: df.h
===================================================================
--- df.h	(revision 161116)
+++ df.h	(working copy)
@@ -52,9 +52,8 @@  union df_ref_d;
 #define DF_LIVE    2      /* Live Registers & Uninitialized Registers */
 #define DF_RD      3      /* Reaching Defs. */
 #define DF_CHAIN   4      /* Def-Use and/or Use-Def Chains. */
-#define DF_BYTE_LR 5      /* Subreg tracking lr.  */
-#define DF_NOTE    6      /* REG_DEF and REG_UNUSED notes. */
-#define DF_MD      7      /* Multiple Definitions. */
+#define DF_NOTE    5      /* REG_DEF and REG_UNUSED notes. */
+#define DF_MD      6      /* Multiple Definitions. */
 
 #define DF_LAST_PROBLEM_PLUS1 (DF_MD + 1)
 
@@ -622,7 +621,6 @@  struct df
 #define DF_RD_BB_INFO(BB) (df_rd_get_bb_info((BB)->index))
 #define DF_LR_BB_INFO(BB) (df_lr_get_bb_info((BB)->index))
 #define DF_LIVE_BB_INFO(BB) (df_live_get_bb_info((BB)->index))
-#define DF_BYTE_LR_BB_INFO(BB) (df_byte_lr_get_bb_info((BB)->index))
 #define DF_MD_BB_INFO(BB) (df_md_get_bb_info((BB)->index))
 
 /* Most transformations that wish to use live register analysis will
@@ -636,12 +634,6 @@  struct df
 #define DF_LR_IN(BB) (&DF_LR_BB_INFO(BB)->in)
 #define DF_LR_OUT(BB) (&DF_LR_BB_INFO(BB)->out)
 
-/* These macros are used by passes that are not tolerant of
-   uninitialized variables.  This intolerance should eventually
-   be fixed.  */
-#define DF_BYTE_LR_IN(BB) (&DF_BYTE_LR_BB_INFO(BB)->in)
-#define DF_BYTE_LR_OUT(BB) (&DF_BYTE_LR_BB_INFO(BB)->out)
-
 /* Macros to access the elements within the ref structure.  */
 
 
@@ -856,22 +848,6 @@  struct df_live_bb_info
 };
 
 
-/* Live registers, a backwards dataflow problem.  These bitmaps are
-indexed by the df_byte_lr_offset array which is indexed by pseudo.  */
-
-struct df_byte_lr_bb_info
-{
-  /* Local sets to describe the basic blocks.  */
-  bitmap_head def;   /* The set of registers set in this block
-                        - except artificial defs at the top.  */
-  bitmap_head use;   /* The set of registers used in this block.  */
-
-  /* The results of the dataflow problem.  */
-  bitmap_head in;    /* Just before the block itself. */
-  bitmap_head out;   /* At the bottom of the block.  */
-};
-
-
 /* This is used for debugging and for the dumpers to find the latest
    instance so that the df info can be added to the dumps.  This
    should not be used by regular code.  */
@@ -881,7 +857,6 @@  extern struct df *df;
 #define df_lr      (df->problems_by_index[DF_LR])
 #define df_live    (df->problems_by_index[DF_LIVE])
 #define df_chain   (df->problems_by_index[DF_CHAIN])
-#define df_byte_lr (df->problems_by_index[DF_BYTE_LR])
 #define df_note    (df->problems_by_index[DF_NOTE])
 #define df_md      (df->problems_by_index[DF_MD])
 
@@ -931,7 +906,6 @@  extern df_ref df_find_use (rtx, rtx);
 extern bool df_reg_used (rtx, rtx);
 extern void df_worklist_dataflow (struct dataflow *,bitmap, int *, int);
 extern void df_print_regset (FILE *file, bitmap r);
-extern void df_print_byte_regset (FILE *file, bitmap r);
 extern void df_dump (FILE *);
 extern void df_dump_region (FILE *);
 extern void df_dump_start (FILE *);
@@ -970,13 +944,6 @@  extern void df_live_verify_transfer_func
 extern void df_live_add_problem (void);
 extern void df_live_set_all_dirty (void);
 extern void df_chain_add_problem (unsigned int);
-extern void df_byte_lr_add_problem (void);
-extern int df_byte_lr_get_regno_start (unsigned int);
-extern int df_byte_lr_get_regno_len (unsigned int);
-extern void df_byte_lr_simulate_defs (rtx, bitmap);
-extern void df_byte_lr_simulate_uses (rtx, bitmap);
-extern void df_byte_lr_simulate_artificial_refs_at_top (basic_block, bitmap);
-extern void df_byte_lr_simulate_artificial_refs_at_end (basic_block, bitmap);
 extern void df_note_add_problem (void);
 extern void df_md_add_problem (void);
 extern void df_md_simulate_artificial_defs_at_top (basic_block, bitmap);
@@ -1027,10 +994,6 @@  extern void df_compute_regs_ever_live (b
 extern bool df_read_modify_subreg_p (rtx);
 extern void df_scan_verify (void);
 
-/* Functions defined in df-byte-scan.c.  */
-extern bool df_compute_accessed_bytes (df_ref, enum df_mm,
-				       unsigned int *, unsigned int *);
-
 
 /* Get basic block info.  */
 
@@ -1079,15 +1042,6 @@  df_live_get_bb_info (unsigned int index)
     return NULL;
 }
 
-static inline struct df_byte_lr_bb_info *
-df_byte_lr_get_bb_info (unsigned int index)
-{
-  if (index < df_byte_lr->block_info_size)
-    return &((struct df_byte_lr_bb_info *) df_byte_lr->block_info)[index];
-  else
-    return NULL;
-}
-
 /* Get the artificial defs for a basic block.  */
 
 static inline df_ref *
Index: lower-subreg.c
===================================================================
--- lower-subreg.c	(revision 161116)
+++ lower-subreg.c	(working copy)
@@ -37,6 +37,7 @@  along with GCC; see the file COPYING3.  
 #include "except.h"
 #include "regs.h"
 #include "tree-pass.h"
+#include "dce.h"
 #include "df.h"
 
 #ifdef STACK_GROWS_DOWNWARD
@@ -1052,6 +1053,58 @@  resolve_shift_zext (rtx insn)
   return insns;
 }
 
+/* Examine REF, and if it is for a reg we're interested in, mark it
+   either live or dead according to KILL.  LIVE0 and LIVE1 give the
+   live subwords for each pseudo that has a size of two words.  We
+   do not track hard regs or pseudos of any other size.
+   Return true if we changed any reg set, or if we did not handle the
+   ref.  */
+
+static bool
+mark_ref (df_ref ref, bool kill, regset live0, regset live1)
+{
+  rtx orig_reg = DF_REF_REG (ref);
+  rtx reg = orig_reg;
+  enum machine_mode reg_mode;
+  unsigned regno;
+  /* Left at -1 for whole accesses.  */
+  int which_subword = -1;
+  bool changed = false;
+
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (orig_reg);
+  regno = REGNO (reg);
+  reg_mode = GET_MODE (reg);
+  if (regno < FIRST_PSEUDO_REGISTER
+      || GET_MODE_SIZE (reg_mode) != 2 * UNITS_PER_WORD)
+    return true;
+
+  if (GET_CODE (orig_reg) == SUBREG
+      && df_read_modify_subreg_p (orig_reg))
+    {
+      gcc_assert (DF_REF_FLAGS_IS_SET (ref, DF_REF_PARTIAL));
+      if (subreg_lowpart_p (orig_reg))
+	which_subword = 0;
+      else
+	which_subword = 1;
+    }
+  if (kill)
+    {
+      if (which_subword != 1)
+	changed |= bitmap_clear_bit (live0, regno);
+      if (which_subword != 0)
+	changed |= bitmap_clear_bit (live1, regno);
+    }
+  else
+    {
+      if (which_subword != 1)
+	changed |= bitmap_set_bit (live0, regno);
+      if (which_subword != 0)
+	changed |= bitmap_set_bit (live1, regno);
+    }
+  return changed;
+}
+
 /* Look for registers which are always accessed via word-sized SUBREGs
    or via copies.  Decompose these registers into several word-sized
    pseudo-registers.  */
@@ -1061,6 +1114,7 @@  decompose_multiword_subregs (void)
 {
   unsigned int max;
   basic_block bb;
+  bitmap_head live_word0, live_word1;
 
   if (df)
     df_set_flags (DF_DEFER_INSN_RESCAN);
@@ -1096,16 +1150,56 @@  decompose_multiword_subregs (void)
   VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
   memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
 
+  if (df)
+    {
+      init_dce (true);
+      prescan_insns_for_dce (true, false);
+      bitmap_initialize (&live_word0, &dce_tmp_bitmap_obstack);
+      bitmap_initialize (&live_word1, &dce_tmp_bitmap_obstack);
+    }
+
   FOR_EACH_BB (bb)
     {
       rtx insn;
 
-      FOR_BB_INSNS (bb, insn)
+      if (df)
+	{
+	  bitmap_copy (&live_word0, DF_LR_OUT (bb));
+	  df_simulate_initialize_backwards (bb, &live_word0);
+	  bitmap_copy (&live_word1, &live_word0);
+	}
+
+      FOR_BB_INSNS_REVERSE (bb, insn)
 	{
 	  rtx set;
 	  enum classify_move_insn cmi;
 	  int i, n;
 
+	  df_ref *rec;
+	  bool any_needed;
+
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  if (df)
+	    {
+	      any_needed = dce_marked_insn_p (insn);
+
+	      /* See which defined values die here.  */
+	      for (rec = DF_INSN_DEFS (insn); *rec; rec++)
+		any_needed |= mark_ref (*rec, true, &live_word0, &live_word1);
+
+	      if (!any_needed && NONJUMP_INSN_P (insn))
+		{
+		  delete_insn (insn);
+		  continue;
+		}
+	      else
+		/* Mark each used value as live.  */
+		for (rec = DF_INSN_USES (insn); *rec; rec++)
+		  mark_ref (*rec, false, &live_word0, &live_word1);
+	    }
+
 	  if (!INSN_P (insn)
 	      || GET_CODE (PATTERN (insn)) == CLOBBER
 	      || GET_CODE (PATTERN (insn)) == USE)
@@ -1151,6 +1245,13 @@  decompose_multiword_subregs (void)
 	}
     }
 
+  if (df)
+    {
+      bitmap_clear (&live_word0);
+      bitmap_clear (&live_word1);
+      fini_dce (true);
+    }
+
   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
   if (!bitmap_empty_p (decomposable_context))
     {
Index: df-problems.c
===================================================================
--- df-problems.c	(revision 161116)
+++ df-problems.c	(working copy)
@@ -2282,720 +2282,6 @@  df_chain_add_problem (unsigned int chain
 
 
 /*----------------------------------------------------------------------------
-   BYTE LEVEL LIVE REGISTERS
-
-   Find the locations in the function where any use of a pseudo can
-   reach in the backwards direction.  In and out bitvectors are built
-   for each basic block.  There are two mapping functions,
-   df_byte_lr_get_regno_start and df_byte_lr_get_regno_len that are
-   used to map regnos into bit vector positions.
-
-   This problem differs from the regular df_lr function in the way
-   that subregs, *_extracts and strict_low_parts are handled. In lr
-   these are consider partial kills, here, the exact set of bytes is
-   modeled.  Note that any reg that has none of these operations is
-   only modeled with a single bit since all operations access the
-   entire register.
-
-   This problem is more brittle that the regular lr.  It currently can
-   be used in dce incrementally, but cannot be used in an environment
-   where insns are created or modified.  The problem is that the
-   mapping of regnos to bitmap positions is relatively compact, in
-   that if a pseudo does not do any of the byte wise operations, only
-   one slot is allocated, rather than a slot for each byte.  If insn
-   are created, where a subreg is used for a reg that had no subregs,
-   the mapping would be wrong.  Likewise, there are no checks to see
-   that new pseudos have been added.  These issues could be addressed
-   by adding a problem specific flag to not use the compact mapping,
-   if there was a need to do so.
-
-   ----------------------------------------------------------------------------*/
-
-/* Private data used to verify the solution for this problem.  */
-struct df_byte_lr_problem_data
-{
-  /* Expanded versions of bitvectors used in lr.  */
-  bitmap_head invalidated_by_call;
-  bitmap_head hardware_regs_used;
-
-  /* Indexed by regno, this is true if there are subregs, extracts or
-     strict_low_parts for this regno.  */
-  bitmap_head needs_expansion;
-
-  /* The start position and len for each regno in the various bit
-     vectors.  */
-  unsigned int* regno_start;
-  unsigned int* regno_len;
-  /* An obstack for the bitmaps we need for this problem.  */
-  bitmap_obstack byte_lr_bitmaps;
-};
-
-
-/* Get the starting location for REGNO in the df_byte_lr bitmaps.  */
-
-int
-df_byte_lr_get_regno_start (unsigned int regno)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;;
-  return problem_data->regno_start[regno];
-}
-
-
-/* Get the len for REGNO in the df_byte_lr bitmaps.  */
-
-int
-df_byte_lr_get_regno_len (unsigned int regno)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;;
-  return problem_data->regno_len[regno];
-}
-
-
-/* Free basic block info.  */
-
-static void
-df_byte_lr_free_bb_info (basic_block bb ATTRIBUTE_UNUSED,
-			 void *vbb_info)
-{
-  struct df_byte_lr_bb_info *bb_info = (struct df_byte_lr_bb_info *) vbb_info;
-  if (bb_info)
-    {
-      bitmap_clear (&bb_info->use);
-      bitmap_clear (&bb_info->def);
-      bitmap_clear (&bb_info->in);
-      bitmap_clear (&bb_info->out);
-    }
-}
-
-
-/* Check all of the refs in REF_REC to see if any of them are
-   extracts, subregs or strict_low_parts.  */
-
-static void
-df_byte_lr_check_regs (df_ref *ref_rec)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-
-  for (; *ref_rec; ref_rec++)
-    {
-      df_ref ref = *ref_rec;
-      if (DF_REF_FLAGS_IS_SET (ref, DF_REF_SIGN_EXTRACT
-			       | DF_REF_ZERO_EXTRACT
-			       | DF_REF_STRICT_LOW_PART)
-	  || GET_CODE (DF_REF_REG (ref)) == SUBREG)
-	bitmap_set_bit (&problem_data->needs_expansion, DF_REF_REGNO (ref));
-    }
-}
-
-
-/* Expand bitmap SRC which is indexed by regno to DEST which is indexed by
-   regno_start and regno_len.  */
-
-static void
-df_byte_lr_expand_bitmap (bitmap dest, bitmap src)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-  bitmap_iterator bi;
-  unsigned int i;
-
-  bitmap_clear (dest);
-  EXECUTE_IF_SET_IN_BITMAP (src, 0, i, bi)
-    {
-      bitmap_set_range (dest, problem_data->regno_start[i],
-			problem_data->regno_len[i]);
-    }
-}
-
-
-/* Allocate or reset bitmaps for DF_BYTE_LR blocks. The solution bits are
-   not touched unless the block is new.  */
-
-static void
-df_byte_lr_alloc (bitmap all_blocks ATTRIBUTE_UNUSED)
-{
-  unsigned int bb_index;
-  bitmap_iterator bi;
-  basic_block bb;
-  unsigned int regno;
-  unsigned int index = 0;
-  unsigned int max_reg = max_reg_num();
-  struct df_byte_lr_problem_data *problem_data
-    = XNEW (struct df_byte_lr_problem_data);
-
-  df_byte_lr->problem_data = problem_data;
-
-  df_grow_bb_info (df_byte_lr);
-
-  /* Create the mapping from regnos to slots. This does not change
-     unless the problem is destroyed and recreated.  In particular, if
-     we end up deleting the only insn that used a subreg, we do not
-     want to redo the mapping because this would invalidate everything
-     else.  */
-
-  bitmap_obstack_initialize (&problem_data->byte_lr_bitmaps);
-  problem_data->regno_start = XNEWVEC (unsigned int, max_reg);
-  problem_data->regno_len = XNEWVEC (unsigned int, max_reg);
-  bitmap_initialize (&problem_data->hardware_regs_used,
-		     &problem_data->byte_lr_bitmaps);
-  bitmap_initialize (&problem_data->invalidated_by_call,
-		     &problem_data->byte_lr_bitmaps);
-  bitmap_initialize (&problem_data->needs_expansion,
-		     &problem_data->byte_lr_bitmaps);
-
-  /* Discover which regno's use subregs, extracts or
-     strict_low_parts.  */
-  FOR_EACH_BB (bb)
-    {
-      rtx insn;
-      FOR_BB_INSNS (bb, insn)
-	{
-	  if (INSN_P (insn))
-	    {
-	      struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
-	      df_byte_lr_check_regs (DF_INSN_INFO_DEFS (insn_info));
-	      df_byte_lr_check_regs (DF_INSN_INFO_USES (insn_info));
-	    }
-	}
-      bitmap_set_bit (df_byte_lr->out_of_date_transfer_functions, bb->index);
-    }
-
-  bitmap_set_bit (df_byte_lr->out_of_date_transfer_functions, ENTRY_BLOCK);
-  bitmap_set_bit (df_byte_lr->out_of_date_transfer_functions, EXIT_BLOCK);
-
-  /* Allocate the slots for each regno.  */
-  for (regno = 0; regno < max_reg; regno++)
-    {
-      int len;
-      problem_data->regno_start[regno] = index;
-      if (bitmap_bit_p (&problem_data->needs_expansion, regno))
-	len = GET_MODE_SIZE (GET_MODE (regno_reg_rtx[regno]));
-      else
-	len = 1;
-
-      problem_data->regno_len[regno] = len;
-      index += len;
-    }
-
-  df_byte_lr_expand_bitmap (&problem_data->hardware_regs_used,
-			    &df->hardware_regs_used);
-  df_byte_lr_expand_bitmap (&problem_data->invalidated_by_call,
-			    regs_invalidated_by_call_regset);
-
-  EXECUTE_IF_SET_IN_BITMAP (df_byte_lr->out_of_date_transfer_functions, 0, bb_index, bi)
-    {
-      struct df_byte_lr_bb_info *bb_info = df_byte_lr_get_bb_info (bb_index);
-      
-      /* When bitmaps are already initialized, just clear them.  */
-      if (bb_info->use.obstack)
-	{
-	  bitmap_clear (&bb_info->def);
-	  bitmap_clear (&bb_info->use);
-	}
-      else
-	{
-	  bitmap_initialize (&bb_info->use, &problem_data->byte_lr_bitmaps);
-	  bitmap_initialize (&bb_info->def, &problem_data->byte_lr_bitmaps);
-	  bitmap_initialize (&bb_info->in, &problem_data->byte_lr_bitmaps);
-	  bitmap_initialize (&bb_info->out, &problem_data->byte_lr_bitmaps);
-	}
-    }
-
-  df_byte_lr->optional_p = true;
-}
-
-
-/* Reset the global solution for recalculation.  */
-
-static void
-df_byte_lr_reset (bitmap all_blocks)
-{
-  unsigned int bb_index;
-  bitmap_iterator bi;
-
-  EXECUTE_IF_SET_IN_BITMAP (all_blocks, 0, bb_index, bi)
-    {
-      struct df_byte_lr_bb_info *bb_info = df_byte_lr_get_bb_info (bb_index);
-      gcc_assert (bb_info);
-      bitmap_clear (&bb_info->in);
-      bitmap_clear (&bb_info->out);
-    }
-}
-
-
-/* Compute local live register info for basic block BB.  */
-
-static void
-df_byte_lr_bb_local_compute (unsigned int bb_index)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-  basic_block bb = BASIC_BLOCK (bb_index);
-  struct df_byte_lr_bb_info *bb_info = df_byte_lr_get_bb_info (bb_index);
-  rtx insn;
-  df_ref *def_rec;
-  df_ref *use_rec;
-
-  /* Process the registers set in an exception handler.  */
-  for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) == 0)
-	{
-	  unsigned int dregno = DF_REF_REGNO (def);
-	  unsigned int start = problem_data->regno_start[dregno];
-	  unsigned int len = problem_data->regno_len[dregno];
-	  bitmap_set_range (&bb_info->def, start, len);
-	  bitmap_clear_range (&bb_info->use, start, len);
-	}
-    }
-
-  /* Process the hardware registers that are always live.  */
-  for (use_rec = df_get_artificial_uses (bb_index); *use_rec; use_rec++)
-    {
-      df_ref use = *use_rec;
-      /* Add use to set of uses in this BB.  */
-      if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == 0)
-	{
-	  unsigned int uregno = DF_REF_REGNO (use);
-	  unsigned int start = problem_data->regno_start[uregno];
-	  unsigned int len = problem_data->regno_len[uregno];
-	  bitmap_set_range (&bb_info->use, start, len);
-	}
-    }
-
-  FOR_BB_INSNS_REVERSE (bb, insn)
-    {
-      unsigned int uid = INSN_UID (insn);
-
-      if (!INSN_P (insn))
-	continue;
-
-      for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
-	{
-	  df_ref def = *def_rec;
-	  /* If the def is to only part of the reg, it does
-	     not kill the other defs that reach here.  */
-	  if (!(DF_REF_FLAGS (def) & (DF_REF_CONDITIONAL)))
-	    {
-	      unsigned int dregno = DF_REF_REGNO (def);
-	      unsigned int start = problem_data->regno_start[dregno];
-	      unsigned int len = problem_data->regno_len[dregno];
-	      unsigned int sb;
-	      unsigned int lb;
-	      if (!df_compute_accessed_bytes (def, DF_MM_MUST, &sb, &lb))
-		{
-		  start += sb;
-		  len = lb - sb;
-		}
-	      if (len)
-		{
-		  bitmap_set_range (&bb_info->def, start, len);
-		  bitmap_clear_range (&bb_info->use, start, len);
-		}
-	    }
-	}
-
-      for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
-	{
-	  df_ref use = *use_rec;
-	  unsigned int uregno = DF_REF_REGNO (use);
-	  unsigned int start = problem_data->regno_start[uregno];
-	  unsigned int len = problem_data->regno_len[uregno];
-	  unsigned int sb;
-	  unsigned int lb;
-	  if (!df_compute_accessed_bytes (use, DF_MM_MAY, &sb, &lb))
-	    {
-	      start += sb;
-	      len = lb - sb;
-	    }
-	  /* Add use to set of uses in this BB.  */
-	  if (len)
-	    bitmap_set_range (&bb_info->use, start, len);
-	}
-    }
-
-  /* Process the registers set in an exception handler or the hard
-     frame pointer if this block is the target of a non local
-     goto.  */
-  for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	{
-	  unsigned int dregno = DF_REF_REGNO (def);
-	  unsigned int start = problem_data->regno_start[dregno];
-	  unsigned int len = problem_data->regno_len[dregno];
-	  bitmap_set_range (&bb_info->def, start, len);
-	  bitmap_clear_range (&bb_info->use, start, len);
-	}
-    }
-
-#ifdef EH_USES
-  /* Process the uses that are live into an exception handler.  */
-  for (use_rec = df_get_artificial_uses (bb_index); *use_rec; use_rec++)
-    {
-      df_ref use = *use_rec;
-      /* Add use to set of uses in this BB.  */
-      if (DF_REF_FLAGS (use) & DF_REF_AT_TOP)
-	{
-	  unsigned int uregno = DF_REF_REGNO (use);
-	  unsigned int start = problem_data->regno_start[uregno];
-	  unsigned int len = problem_data->regno_len[uregno];
-	  bitmap_set_range (&bb_info->use, start, len);
-	}
-    }
-#endif
-}
-
-
-/* Compute local live register info for each basic block within BLOCKS.  */
-
-static void
-df_byte_lr_local_compute (bitmap all_blocks ATTRIBUTE_UNUSED)
-{
-  unsigned int bb_index;
-  bitmap_iterator bi;
-
-  EXECUTE_IF_SET_IN_BITMAP (df_byte_lr->out_of_date_transfer_functions, 0, bb_index, bi)
-    {
-      if (bb_index == EXIT_BLOCK)
-	{
-	  /* The exit block is special for this problem and its bits are
-	     computed from thin air.  */
-	  struct df_byte_lr_bb_info *bb_info = df_byte_lr_get_bb_info (EXIT_BLOCK);
-	  df_byte_lr_expand_bitmap (&bb_info->use, df->exit_block_uses);
-	}
-      else
-	df_byte_lr_bb_local_compute (bb_index);
-    }
-
-  bitmap_clear (df_byte_lr->out_of_date_transfer_functions);
-}
-
-
-/* Initialize the solution vectors.  */
-
-static void
-df_byte_lr_init (bitmap all_blocks)
-{
-  unsigned int bb_index;
-  bitmap_iterator bi;
-
-  EXECUTE_IF_SET_IN_BITMAP (all_blocks, 0, bb_index, bi)
-    {
-      struct df_byte_lr_bb_info *bb_info = df_byte_lr_get_bb_info (bb_index);
-      bitmap_copy (&bb_info->in, &bb_info->use);
-      bitmap_clear (&bb_info->out);
-    }
-}
-
-
-/* Confluence function that processes infinite loops.  This might be a
-   noreturn function that throws.  And even if it isn't, getting the
-   unwind info right helps debugging.  */
-static void
-df_byte_lr_confluence_0 (basic_block bb)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-  bitmap op1 = &df_byte_lr_get_bb_info (bb->index)->out;
-  if (bb != EXIT_BLOCK_PTR)
-    bitmap_copy (op1, &problem_data->hardware_regs_used);
-}
-
-
-/* Confluence function that ignores fake edges.  */
-
-static void
-df_byte_lr_confluence_n (edge e)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-  bitmap op1 = &df_byte_lr_get_bb_info (e->src->index)->out;
-  bitmap op2 = &df_byte_lr_get_bb_info (e->dest->index)->in;
-
-  /* Call-clobbered registers die across exception and call edges.  */
-  /* ??? Abnormal call edges ignored for the moment, as this gets
-     confused by sibling call edges, which crashes reg-stack.  */
-  if (e->flags & EDGE_EH)
-    bitmap_ior_and_compl_into (op1, op2, &problem_data->invalidated_by_call);
-  else
-    bitmap_ior_into (op1, op2);
-
-  bitmap_ior_into (op1, &problem_data->hardware_regs_used);
-}
-
-
-/* Transfer function.  */
-
-static bool
-df_byte_lr_transfer_function (int bb_index)
-{
-  struct df_byte_lr_bb_info *bb_info = df_byte_lr_get_bb_info (bb_index);
-  bitmap in = &bb_info->in;
-  bitmap out = &bb_info->out;
-  bitmap use = &bb_info->use;
-  bitmap def = &bb_info->def;
-
-  return bitmap_ior_and_compl (in, use, out, def);
-}
-
-
-/* Free all storage associated with the problem.  */
-
-static void
-df_byte_lr_free (void)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-
-
-  if (df_byte_lr->block_info)
-    {
-      df_byte_lr->block_info_size = 0;
-      free (df_byte_lr->block_info);
-      df_byte_lr->block_info = NULL;
-    }
-
-  BITMAP_FREE (df_byte_lr->out_of_date_transfer_functions);
-  bitmap_obstack_release (&problem_data->byte_lr_bitmaps);
-  free (problem_data->regno_start);
-  free (problem_data->regno_len);
-  free (problem_data);
-  free (df_byte_lr);
-}
-
-
-/* Debugging info at top of bb.  */
-
-static void
-df_byte_lr_top_dump (basic_block bb, FILE *file)
-{
-  struct df_byte_lr_bb_info *bb_info = df_byte_lr_get_bb_info (bb->index);
-  if (!bb_info)
-    return;
-
-  fprintf (file, ";; blr  in  \t");
-  df_print_byte_regset (file, &bb_info->in);
-  fprintf (file, ";; blr  use \t");
-  df_print_byte_regset (file, &bb_info->use);
-  fprintf (file, ";; blr  def \t");
-  df_print_byte_regset (file, &bb_info->def);
-}
-
-
-/* Debugging info at bottom of bb.  */
-
-static void
-df_byte_lr_bottom_dump (basic_block bb, FILE *file)
-{
-  struct df_byte_lr_bb_info *bb_info = df_byte_lr_get_bb_info (bb->index);
-  if (!bb_info)
-    return;
-
-  fprintf (file, ";; blr  out \t");
-  df_print_byte_regset (file, &bb_info->out);
-}
-
-
-/* All of the information associated with every instance of the problem.  */
-
-static struct df_problem problem_BYTE_LR =
-{
-  DF_BYTE_LR,                      /* Problem id.  */
-  DF_BACKWARD,                     /* Direction.  */
-  df_byte_lr_alloc,                /* Allocate the problem specific data.  */
-  df_byte_lr_reset,                /* Reset global information.  */
-  df_byte_lr_free_bb_info,         /* Free basic block info.  */
-  df_byte_lr_local_compute,        /* Local compute function.  */
-  df_byte_lr_init,                 /* Init the solution specific data.  */
-  df_worklist_dataflow,            /* Worklist solver.  */
-  df_byte_lr_confluence_0,         /* Confluence operator 0.  */
-  df_byte_lr_confluence_n,         /* Confluence operator n.  */
-  df_byte_lr_transfer_function,    /* Transfer function.  */
-  NULL,                            /* Finalize function.  */
-  df_byte_lr_free,                 /* Free all of the problem information.  */
-  df_byte_lr_free,                 /* Remove this problem from the stack of dataflow problems.  */
-  NULL,                            /* Debugging.  */
-  df_byte_lr_top_dump,             /* Debugging start block.  */
-  df_byte_lr_bottom_dump,          /* Debugging end block.  */
-  NULL,                            /* Incremental solution verify start.  */
-  NULL,                            /* Incremental solution verify end.  */
-  NULL,                       /* Dependent problem.  */
-  sizeof (struct df_byte_lr_bb_info),/* Size of entry of block_info array.  */
-  TV_DF_BYTE_LR,                   /* Timing variable.  */
-  false                            /* Reset blocks on dropping out of blocks_to_analyze.  */
-};
-
-
-/* Create a new DATAFLOW instance and add it to an existing instance
-   of DF.  The returned structure is what is used to get at the
-   solution.  */
-
-void
-df_byte_lr_add_problem (void)
-{
-  df_add_problem (&problem_BYTE_LR);
-  /* These will be initialized when df_scan_blocks processes each
-     block.  */
-  df_byte_lr->out_of_date_transfer_functions = BITMAP_ALLOC (NULL);
-}
-
-
-/* Simulate the effects of the defs of INSN on LIVE.  */
-
-void
-df_byte_lr_simulate_defs (rtx insn, bitmap live)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-  df_ref *def_rec;
-  unsigned int uid = INSN_UID (insn);
-
-  for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-
-      /* If the def is to only part of the reg, it does
-	 not kill the other defs that reach here.  */
-      if (!(DF_REF_FLAGS (def) & DF_REF_CONDITIONAL))
-	{
-	  unsigned int dregno = DF_REF_REGNO (def);
-	  unsigned int start = problem_data->regno_start[dregno];
-	  unsigned int len = problem_data->regno_len[dregno];
-	  unsigned int sb;
-	  unsigned int lb;
-	  if (!df_compute_accessed_bytes (def, DF_MM_MUST, &sb, &lb))
-	    {
-	      start += sb;
-	      len = lb - sb;
-	    }
-
-	  if (len)
-	    bitmap_clear_range (live, start, len);
-	}
-    }
-}
-
-
-/* Simulate the effects of the uses of INSN on LIVE.  */
-
-void
-df_byte_lr_simulate_uses (rtx insn, bitmap live)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-  df_ref *use_rec;
-  unsigned int uid = INSN_UID (insn);
-
-  for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
-    {
-      df_ref use = *use_rec;
-      unsigned int uregno = DF_REF_REGNO (use);
-      unsigned int start = problem_data->regno_start[uregno];
-      unsigned int len = problem_data->regno_len[uregno];
-      unsigned int sb;
-      unsigned int lb;
-
-      if (!df_compute_accessed_bytes (use, DF_MM_MAY, &sb, &lb))
-	{
-	  start += sb;
-	  len = lb - sb;
-	}
-
-      /* Add use to set of uses in this BB.  */
-      if (len)
-	bitmap_set_range (live, start, len);
-    }
-}
-
-
-/* Apply the artificial uses and defs at the top of BB in a forwards
-   direction.  */
-
-void
-df_byte_lr_simulate_artificial_refs_at_top (basic_block bb, bitmap live)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-  df_ref *def_rec;
-#ifdef EH_USES
-  df_ref *use_rec;
-#endif
-  int bb_index = bb->index;
-
-#ifdef EH_USES
-  for (use_rec = df_get_artificial_uses (bb_index); *use_rec; use_rec++)
-    {
-      df_ref use = *use_rec;
-      if (DF_REF_FLAGS (use) & DF_REF_AT_TOP)
-	{
-	  unsigned int uregno = DF_REF_REGNO (use);
-	  unsigned int start = problem_data->regno_start[uregno];
-	  unsigned int len = problem_data->regno_len[uregno];
-	  bitmap_set_range (live, start, len);
-	}
-    }
-#endif
-
-  for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	{
-	  unsigned int dregno = DF_REF_REGNO (def);
-	  unsigned int start = problem_data->regno_start[dregno];
-	  unsigned int len = problem_data->regno_len[dregno];
-	  bitmap_clear_range (live, start, len);
-	}
-    }
-}
-
-
-/* Apply the artificial uses and defs at the end of BB in a backwards
-   direction.  */
-
-void
-df_byte_lr_simulate_artificial_refs_at_end (basic_block bb, bitmap live)
-{
-  struct df_byte_lr_problem_data *problem_data
-    = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
-  df_ref *def_rec;
-  df_ref *use_rec;
-  int bb_index = bb->index;
-
-  for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) == 0)
-	{
-	  unsigned int dregno = DF_REF_REGNO (def);
-	  unsigned int start = problem_data->regno_start[dregno];
-	  unsigned int len = problem_data->regno_len[dregno];
-	  bitmap_clear_range (live, start, len);
-	}
-    }
-
-  for (use_rec = df_get_artificial_uses (bb_index); *use_rec; use_rec++)
-    {
-      df_ref use = *use_rec;
-      if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == 0)
-	{
-	  unsigned int uregno = DF_REF_REGNO (use);
-	  unsigned int start = problem_data->regno_start[uregno];
-	  unsigned int len = problem_data->regno_len[uregno];
-	  bitmap_set_range (live, start, len);
-	}
-    }
-}
-
-
-
-/*----------------------------------------------------------------------------
    This problem computes REG_DEAD and REG_UNUSED notes.
    ----------------------------------------------------------------------------*/
 
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 161116)
+++ Makefile.in	(working copy)
@@ -1197,7 +1197,6 @@  OBJS-common = \
 	dce.o \
 	ddg.o \
 	debug.o \
-	df-byte-scan.o \
 	df-core.o \
 	df-problems.o \
 	df-scan.o \
@@ -3147,8 +3146,6 @@  df-scan.o : df-scan.c $(CONFIG_H) $(SYST
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
    $(TM_P_H) $(FLAGS_H) $(TARGET_H) $(TARGET_DEF_H) $(TREE_H) output.h \
    $(TREE_PASS_H) $(EMIT_RTL_H)
-df-byte-scan.o : df-byte-scan.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
-   $(TM_P_H) $(DF_H) output.h $(DBGCNT_H)
 regstat.o : regstat.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(TM_P_H) $(FLAGS_H) $(REGS_H) output.h $(EXCEPT_H) hard-reg-set.h \
    $(BASIC_BLOCK_H) $(TIMEVAR_H) $(DF_H)
@@ -3466,7 +3463,7 @@  dbgcnt.o: dbgcnt.c $(CONFIG_H) $(SYSTEM_
 lower-subreg.o : lower-subreg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(MACHMODE_H) $(TM_H) $(RTL_H) $(TM_P_H) $(TIMEVAR_H) $(FLAGS_H) \
    insn-config.h $(BASIC_BLOCK_H) $(RECOG_H) $(OBSTACK_H) $(BITMAP_H) \
-   $(EXPR_H) $(EXCEPT_H) $(REGS_H) $(TREE_PASS_H) $(DF_H)
+   $(EXPR_H) $(EXCEPT_H) $(REGS_H) $(TREE_PASS_H) $(DF_H) dce.h
 
 $(out_object_file): $(out_file) $(CONFIG_H) coretypes.h $(TM_H) $(TREE_H) \
    $(RTL_H) $(REGS_H) hard-reg-set.h insn-config.h conditions.h \
Index: dce.c
===================================================================
--- dce.c	(revision 161116)
+++ dce.c	(working copy)
@@ -55,7 +55,7 @@  static sbitmap marked;
 
 /* Bitmap obstacks used for block processing by the fast algorithm.  */
 static bitmap_obstack dce_blocks_bitmap_obstack;
-static bitmap_obstack dce_tmp_bitmap_obstack;
+bitmap_obstack dce_tmp_bitmap_obstack;
 
 static bool find_call_stack_args (rtx, bool, bool, bitmap);
 
@@ -95,10 +95,7 @@  deletable_insn_p (rtx insn, bool fast, b
   int i;
 
   if (CALL_P (insn)
-      /* We cannot delete calls inside of the recursive dce because
-	 this may cause basic blocks to be deleted and this messes up
-	 the rest of the stack of optimization passes.  */
-      && (!df_in_progress)
+      && arg_stores
       /* We cannot delete pure or const sibling calls because it is
 	 hard to see the result.  */
       && (!SIBLING_CALL_P (insn))
@@ -152,8 +149,8 @@  deletable_insn_p (rtx insn, bool fast, b
 
 /* Return true if INSN has been marked as needed.  */
 
-static inline int
-marked_insn_p (rtx insn)
+inline bool
+dce_marked_insn_p (rtx insn)
 {
   /* Artificial defs are always needed and they do not have an insn.
      We should never see them here.  */
@@ -168,7 +165,7 @@  marked_insn_p (rtx insn)
 static void
 mark_insn (rtx insn, bool fast)
 {
-  if (!marked_insn_p (insn))
+  if (!dce_marked_insn_p (insn))
     {
       if (!fast)
 	VEC_safe_push (rtx, heap, worklist, insn);
@@ -517,7 +514,7 @@  delete_unmarked_insns (void)
 	    ;
 
 	  /* Otherwise rely only on the DCE algorithm.  */
-	  else if (marked_insn_p (insn))
+	  else if (dce_marked_insn_p (insn))
 	    continue;
 
 	  /* Beware that reaching a dbg counter limit here can result
@@ -566,10 +563,12 @@  delete_unmarked_insns (void)
 
 /* Go through the instructions and mark those whose necessity is not
    dependent on inter-instruction information.  Make sure all other
-   instructions are not marked.  */
+   instructions are not marked.  If STACK_ARGS is true, defer marking of
+   call arg stores; they will be marked later once mark_insn is run on
+   the call.  */
 
-static void
-prescan_insns_for_dce (bool fast)
+void
+prescan_insns_for_dce (bool fast, bool stack_args)
 {
   basic_block bb;
   rtx insn, prev;
@@ -578,7 +577,10 @@  prescan_insns_for_dce (bool fast)
   if (dump_file)
     fprintf (dump_file, "Finding needed instructions:\n");
 
-  if (!df_in_progress && ACCUMULATE_OUTGOING_ARGS)
+  /* We cannot delete calls inside of the recursive dce because
+     this may cause basic blocks to be deleted and this messes up
+     the rest of the stack of optimization passes.  */
+  if (stack_args && !df_in_progress && ACCUMULATE_OUTGOING_ARGS)
     arg_stores = BITMAP_ALLOC (NULL);
 
   FOR_EACH_BB (bb)
@@ -586,8 +588,6 @@  prescan_insns_for_dce (bool fast)
       FOR_BB_INSNS_REVERSE_SAFE (bb, insn, prev)
 	if (INSN_P (insn))
 	  {
-	    /* Don't mark argument stores now.  They will be marked
-	       if needed when the associated CALL is marked.  */
 	    if (arg_stores && bitmap_bit_p (arg_stores, INSN_UID (insn)))
 	      continue;
 	    if (deletable_insn_p (insn, fast, arg_stores))
@@ -661,7 +661,7 @@  mark_reg_dependencies (rtx insn)
 
 /* Initialize global variables for a new DCE pass.  */
 
-static void
+void
 init_dce (bool fast)
 {
   if (!df_in_progress)
@@ -687,7 +687,7 @@  init_dce (bool fast)
 
 /* Free the data allocated by init_dce.  */
 
-static void
+void
 fini_dce (bool fast)
 {
   sbitmap_free (marked);
@@ -709,7 +709,7 @@  rest_of_handle_ud_dce (void)
 
   init_dce (false);
 
-  prescan_insns_for_dce (false);
+  prescan_insns_for_dce (false, true);
   mark_artificial_uses ();
   while (VEC_length (rtx, worklist) > 0)
     {
@@ -767,110 +767,6 @@  struct rtl_opt_pass pass_ud_rtl_dce =
    artificial uses. */
 
 static bool
-byte_dce_process_block (basic_block bb, bool redo_out, bitmap au)
-{
-  bitmap local_live = BITMAP_ALLOC (&dce_tmp_bitmap_obstack);
-  rtx insn;
-  bool block_changed;
-  df_ref *def_rec;
-
-  if (redo_out)
-    {
-      /* Need to redo the live_out set of this block if when one of
-	 the succs of this block has had a change in it live in
-	 set.  */
-      edge e;
-      edge_iterator ei;
-      df_confluence_function_n con_fun_n = df_byte_lr->problem->con_fun_n;
-      bitmap_clear (DF_BYTE_LR_OUT (bb));
-      FOR_EACH_EDGE (e, ei, bb->succs)
-	(*con_fun_n) (e);
-    }
-
-  if (dump_file)
-    {
-      fprintf (dump_file, "processing block %d live out = ", bb->index);
-      df_print_byte_regset (dump_file, DF_BYTE_LR_OUT (bb));
-    }
-
-  bitmap_copy (local_live, DF_BYTE_LR_OUT (bb));
-
-  df_byte_lr_simulate_artificial_refs_at_end (bb, local_live);
-
-  FOR_BB_INSNS_REVERSE (bb, insn)
-    if (INSN_P (insn))
-      {
-	/* The insn is needed if there is someone who uses the output.  */
-	for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
-	  {
-	    df_ref def = *def_rec;
-	    unsigned int last;
-	    unsigned int dregno = DF_REF_REGNO (def);
-	    unsigned int start = df_byte_lr_get_regno_start (dregno);
-	    unsigned int len = df_byte_lr_get_regno_len (dregno);
-
-	    unsigned int sb;
-	    unsigned int lb;
-	    /* This is one of the only places where DF_MM_MAY should
-	       be used for defs.  Need to make sure that we are
-	       checking for all of the bits that may be used.  */
-
-	    if (!df_compute_accessed_bytes (def, DF_MM_MAY, &sb, &lb))
-	      {
-		start += sb;
-		len = lb - sb;
-	      }
-
-	    if (bitmap_bit_p (au, dregno))
-	      {
-		mark_insn (insn, true);
-		goto quickexit;
-	      }
-
-	    last = start + len;
-	    while (start < last)
-	      if (bitmap_bit_p (local_live, start++))
-		{
-		  mark_insn (insn, true);
-		  goto quickexit;
-		}
-	  }
-
-      quickexit:
-
-	/* No matter if the instruction is needed or not, we remove
-	   any regno in the defs from the live set.  */
-	df_byte_lr_simulate_defs (insn, local_live);
-
-	/* On the other hand, we do not allow the dead uses to set
-	   anything in local_live.  */
-	if (marked_insn_p (insn))
-	  df_byte_lr_simulate_uses (insn, local_live);
-
-	if (dump_file)
-	  {
-	    fprintf (dump_file, "finished processing insn %d live out = ",
-		     INSN_UID (insn));
-	    df_print_byte_regset (dump_file, local_live);
-	  }
-      }
-
-  df_byte_lr_simulate_artificial_refs_at_top (bb, local_live);
-
-  block_changed = !bitmap_equal_p (local_live, DF_BYTE_LR_IN (bb));
-  if (block_changed)
-    bitmap_copy (DF_BYTE_LR_IN (bb), local_live);
-  BITMAP_FREE (local_live);
-  return block_changed;
-}
-
-
-/* Process basic block BB.  Return true if the live_in set has
-   changed. REDO_OUT is true if the info at the bottom of the block
-   needs to be recalculated before starting.  AU is the proper set of
-   artificial uses. */
-
-static bool
 dce_process_block (basic_block bb, bool redo_out, bitmap au)
 {
   bitmap local_live = BITMAP_ALLOC (&dce_tmp_bitmap_obstack);
@@ -904,7 +800,7 @@  dce_process_block (basic_block bb, bool 
   FOR_BB_INSNS_REVERSE (bb, insn)
     if (INSN_P (insn))
       {
-	bool needed = marked_insn_p (insn);
+	bool needed = dce_marked_insn_p (insn);
 
 	/* The insn is needed if there is someone who uses the output.  */
 	if (!needed)
@@ -938,12 +834,10 @@  dce_process_block (basic_block bb, bool 
 }
 
 
-/* Perform fast DCE once initialization is done.  If BYTE_LEVEL is
-   true, use the byte level dce, otherwise do it at the pseudo
-   level.  */
+/* Perform fast DCE once initialization is done.  */
 
 static void
-fast_dce (bool byte_level)
+fast_dce (void)
 {
   int *postorder = df_get_postorder (DF_BACKWARD);
   int n_blocks = df_get_n_blocks (DF_BACKWARD);
@@ -964,7 +858,7 @@  fast_dce (bool byte_level)
   bitmap au_eh = &df->eh_block_artificial_uses;
   int i;
 
-  prescan_insns_for_dce (true);
+  prescan_insns_for_dce (true, true);
 
   for (i = 0; i < n_blocks; i++)
     bitmap_set_bit (all_blocks, postorder[i]);
@@ -985,14 +879,9 @@  fast_dce (bool byte_level)
 	      continue;
 	    }
 
-	  if (byte_level)
-	    local_changed
-	      = byte_dce_process_block (bb, bitmap_bit_p (redo_out, index),
-					  bb_has_eh_pred (bb) ? au_eh : au);
-	  else
-	    local_changed
-	      = dce_process_block (bb, bitmap_bit_p (redo_out, index),
-				   bb_has_eh_pred (bb) ? au_eh : au);
+	  local_changed
+	    = dce_process_block (bb, bitmap_bit_p (redo_out, index),
+				 bb_has_eh_pred (bb) ? au_eh : au);
 	  bitmap_set_bit (processed, index);
 
 	  if (local_changed)
@@ -1028,15 +917,12 @@  fast_dce (bool byte_level)
 	     to redo the dataflow equations for the blocks that had a
 	     change at the top of the block.  Then we need to redo the
 	     iteration.  */
-	  if (byte_level)
-	    df_analyze_problem (df_byte_lr, all_blocks, postorder, n_blocks);
-	  else
-	    df_analyze_problem (df_lr, all_blocks, postorder, n_blocks);
+	  df_analyze_problem (df_lr, all_blocks, postorder, n_blocks);
 
 	  if (old_flag & DF_LR_RUN_DCE)
 	    df_set_flags (DF_LR_RUN_DCE);
 
-	  prescan_insns_for_dce (true);
+	  prescan_insns_for_dce (true, true);
 	}
     }
 
@@ -1047,27 +933,13 @@  fast_dce (bool byte_level)
   BITMAP_FREE (all_blocks);
 }
 
-
 /* Fast register level DCE.  */
 
 static unsigned int
 rest_of_handle_fast_dce (void)
 {
   init_dce (true);
-  fast_dce (false);
-  fini_dce (true);
-  return 0;
-}
-
-
-/* Fast byte level DCE.  */
-
-static unsigned int
-rest_of_handle_fast_byte_dce (void)
-{
-  df_byte_lr_add_problem ();
-  init_dce (true);
-  fast_dce (true);
+  fast_dce ();
   fini_dce (true);
   return 0;
 }
@@ -1139,24 +1011,3 @@  struct rtl_opt_pass pass_fast_rtl_dce =
   TODO_ggc_collect                      /* todo_flags_finish */
  }
 };
-
-struct rtl_opt_pass pass_fast_rtl_byte_dce =
-{
- {
-  RTL_PASS,
-  "byte-dce",                           /* name */
-  gate_fast_dce,                        /* gate */
-  rest_of_handle_fast_byte_dce,         /* execute */
-  NULL,                                 /* sub */
-  NULL,                                 /* next */
-  0,                                    /* static_pass_number */
-  TV_DCE,                               /* tv_id */
-  0,                                    /* properties_required */
-  0,                                    /* properties_provided */
-  0,                                    /* properties_destroyed */
-  0,                                    /* todo_flags_start */
-  TODO_dump_func |
-  TODO_df_finish | TODO_verify_rtl_sharing |
-  TODO_ggc_collect                      /* todo_flags_finish */
- }
-};
Index: dce.h
===================================================================
--- dce.h	(revision 161116)
+++ dce.h	(working copy)
@@ -20,6 +20,13 @@  along with GCC; see the file COPYING3.  
 #ifndef GCC_DCE_H
 #define GCC_DCE_H
 
+extern bitmap_obstack dce_tmp_bitmap_obstack;
+
+extern void init_dce (bool);
+extern void fini_dce (bool);
+extern void prescan_insns_for_dce (bool, bool);
+extern bool dce_marked_insn_p (rtx);
+
 extern void run_fast_dce (void);
 extern void run_fast_df_dce (void);