Patchwork [SMS,2/2,RFC] Register pressure estimation for the partial schedule (re-submission)

login
register
mail settings
Submitter Revital Eres
Date Jan. 1, 2012, 7:44 p.m.
Message ID <CAHz1=dWgzjfV-BWdEvL8UCCm-ZwhP2J+4QKgv08xunf0r4QEBQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/133764/
State New
Headers show

Comments

Revital Eres - Jan. 1, 2012, 7:44 p.m.
Hello,

Thanks for the comments! I incorporated them in the attached patch.

Currently testing and bootstrap with the other patch in the series on
ppc64-redhat-linux, enabling SMS on loops with SC 1.

Thanks again,
Revital

2012-01-01  Richard Sandiford  <richard.sandiford@linaro.org>
            Revital Eres  <revital.eres@linaro.org>

        * loop-invariant.c (get_regno_pressure_class): Move function to...
        * ira.c: Here.
        * common.opt (fmodulo-sched-reg-pressure, -fmodulo-sched-verbose):
        New flags.
        * doc/invoke.texi (fmodulo-sched-reg-pressure,
        -fmodulo-sched-verbose): Document the flags.
        * ira.h (get_regno_pressure_class,
        reset_pseudo_classes_defined_p): Declare.
        * ira-costs.c (reset_pseudo_classes_defined_p): New function.
        * Makefile.in (modulo-sched.o): Include ira.h and modulo-sched.h.
        (modulo-sched-pressure.o): New.
        * modulo-sched.c (ira.h, modulo-sched.h): New includes.
        (partial_schedule_ptr, ps_insn_ptr, struct ps_insn,
        struct ps_reg_move_info, struct partial_schedule): Move to
        modulo-sched.h.
        (ps_rtl_insn, ps_reg_move): Remove static.
        (apply_reg_moves): Remove static and call df_insn_rescan only
        if PS is final.
        (undo_reg_moves): New function.
        (sms_schedule): Call register pressure estimation.
        * modulo-sched.h: New file.
        * modulo-sched-pressure.c: New file.

Patch

Index: doc/invoke.texi

===================================================================
--- doc/invoke.texi	(revision 182766)

+++ doc/invoke.texi	(working copy)

@@ -374,6 +374,7 @@  Objective-C and Objective-C++ Dialects}.

 -floop-parallelize-all -flto -flto-compression-level @gol
 -flto-partition=@var{alg} -flto-report -fmerge-all-constants @gol
 -fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol
+-fmodulo-sched-reg-pressure -fmodulo-sched-verbose=@var{n} @gol

 -fmove-loop-invariants fmudflap -fmudflapir -fmudflapth -fno-branch-count-reg @gol
 -fno-default-inline @gol
 -fno-defer-pop -fno-function-cse -fno-guess-branch-probability @gol
@@ -6476,6 +6477,16 @@  deleted which will trigger the generatio

 life-range analysis.  This option is effective only with
 @option{-fmodulo-sched} enabled.
 
+@item -fmodulo-sched-reg-pressure

+@opindex fmodulo-sched-reg-pressure

+Do not apply @option{-fmodulo-sched} to loops if the result would lead

+to register spilling within the loop.

+This option is effective only with @option{-fmodulo-sched} enabled.

+

+@item -fmodulo-sched-verbose=@var{n}

+@opindex fmodulo-sched-verbose

+Set up how verbose dump file for the SMS will be.  

+

 @item -fno-branch-count-reg
 @opindex fno-branch-count-reg
 Do not use ``decrement and branch'' instructions on a count register,
Index: modulo-sched.h

===================================================================
--- modulo-sched.h	(revision 0)

+++ modulo-sched.h	(revision 0)

@@ -0,0 +1,120 @@ 

+/* Swing Modulo Scheduling implementation.

+   Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 

+   Free Software Foundation, Inc.

+   Contributed by Revital Eres <revital.eres@linaro.org> 

+

+This file is part of GCC.

+

+GCC is free software; you can redistribute it and/or modify it under

+the terms of the GNU General Public License as published by the Free

+Software Foundation; either version 3, or (at your option) any later

+version.

+

+GCC is distributed in the hope that it will be useful, but WITHOUT ANY

+WARRANTY; without even the implied warranty of MERCHANTABILITY or

+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

+for more details.

+

+You should have received a copy of the GNU General Public License

+along with GCC; see the file COPYING3.  If not see

+<http://www.gnu.org/licenses/>.  */

+

+#ifndef GCC_SMS_H

+#define GCC_SMS_H

+

+#include "ddg.h"

+

+extern HARD_REG_SET eliminable_regset;

+

+typedef struct partial_schedule *partial_schedule_ptr;

+

+typedef struct ps_insn *ps_insn_ptr;

+

+/* A single instruction in the partial schedule.  */

+struct ps_insn

+{

+  /* Identifies the instruction to be scheduled.  Values smaller than

+     the ddg's num_nodes refer directly to ddg nodes.  A value of

+     X - num_nodes refers to register move X.  */

+  int id;

+

+  /* The (absolute) cycle in which the PS instruction is scheduled.

+     Same as SCHED_TIME (node).  */

+  int cycle;

+

+  /* The next/prev PS_INSN in the same row.  */

+  ps_insn_ptr next_in_row,

+	      prev_in_row;

+

+};

+

+/* Information about a register move that has been added to a partial

+   schedule.  */

+struct ps_reg_move_info

+{

+  /* The source of the move is defined by the ps_insn with id DEF.

+     The destination is used by the ps_insns with the ids in USES.  */

+  int def;

+  sbitmap uses;

+

+  /* The original form of USES' instructions used OLD_REG, but they

+     should now use NEW_REG.  */

+  rtx old_reg;

+  rtx new_reg;

+

+  /* The number of consecutive stages that the move occupies.  */

+  int num_consecutive_stages;

+

+  /* An instruction that sets NEW_REG to the correct value.  The first

+     move associated with DEF will have an rhs of OLD_REG; later moves

+     use the result of the previous move.  */

+  rtx insn;

+};

+

+typedef struct ps_reg_move_info ps_reg_move_info;

+DEF_VEC_O (ps_reg_move_info);

+DEF_VEC_ALLOC_O (ps_reg_move_info, heap);

+

+/* Holds the partial schedule as an array of II rows.  Each entry of the

+   array points to a linked list of PS_INSNs, which represents the

+   instructions that are scheduled for that row.  */

+struct partial_schedule

+{

+  int ii;	/* Number of rows in the partial schedule.  */

+  int history;  /* Threshold for conflict checking using DFA.  */

+

+  /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */

+  ps_insn_ptr *rows;

+

+  /* rows_last[i] points to the last insn in the linked list pointed

+     by rows[i].  */

+  ps_insn_ptr *rows_last;

+  

+  /* All the moves added for this partial schedule.  Index X has

+     a ps_insn id of X + g->num_nodes.  */

+  VEC (ps_reg_move_info, heap) *reg_moves;

+

+  /*  rows_length[i] holds the number of instructions in the row.

+      It is used only (as an optimization) to back off quickly from

+      trying to schedule a node in a full row; that is, to avoid running

+      through futile DFA state transitions.  */

+  int *rows_length;

+  

+  /* The earliest absolute cycle of an insn in the partial schedule.  */

+  int min_cycle;

+

+  /* The latest absolute cycle of an insn in the partial schedule.  */

+  int max_cycle;

+

+  ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */

+

+  int stage_count;  /* The stage count of the partial schedule.  */

+};

+

+int spillage_in_ps (struct loop *, partial_schedule_ptr);

+rtx ps_rtl_insn (partial_schedule_ptr, int);

+void apply_reg_moves (partial_schedule_ptr, bool);

+void undo_reg_moves (partial_schedule_ptr);

+struct ps_reg_move_info *ps_reg_move (partial_schedule_ptr, int);

+

+#endif

Index: modulo-sched-pressure.c

===================================================================
--- modulo-sched-pressure.c	(revision 0)

+++ modulo-sched-pressure.c	(revision 0)

@@ -0,0 +1,317 @@ 

+/* Register pressure estimation for the usage of

+   Swing Modulo Scheduling.

+   Copyright (C) 2011

+   Free Software Foundation, Inc.

+   Contributed by Revital Eres <revital.eres@linaro.org>

+

+This file is part of GCC.

+

+GCC is free software; you can redistribute it and/or modify it under

+the terms of the GNU General Public License as published by the Free

+Software Foundation; either version 3, or (at your option) any later

+version.

+

+GCC is distributed in the hope that it will be useful, but WITHOUT ANY

+WARRANTY; without even the implied warranty of MERCHANTABILITY or

+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

+for more details.

+

+You should have received a copy of the GNU General Public License

+along with GCC; see the file COPYING3.  If not see

+<http://www.gnu.org/licenses/>.  */

+

+

+#include "config.h"

+#include "system.h"

+#include "coretypes.h"

+#include "tm.h"

+#include "diagnostic-core.h"

+#include "rtl.h"

+#include "tm_p.h"

+#include "hard-reg-set.h"

+#include "regs.h"

+#include "function.h"

+#include "flags.h"

+#include "insn-config.h"

+#include "insn-attr.h"

+#include "except.h"

+#include "recog.h"

+#include "sched-int.h"

+#include "target.h"

+#include "cfglayout.h"

+#include "cfgloop.h"

+#include "cfghooks.h"

+#include "expr.h"

+#include "params.h"

+#include "gcov-io.h"

+#include "timevar.h"

+#include "tree-pass.h"

+#include "dbgcnt.h"

+#include "df.h"

+#include "regs.h"

+#include "ira.h"

+#include "modulo-sched.h"

+

+

+/* Registers currently living.  */

+static bitmap_head curr_regs_live;

+

+/* Current reg pressure for each pressure class.  */

+static int curr_reg_pressure[N_REG_CLASSES];

+

+/* Maximum reg pressure for each pressure class.  */

+static int max_reg_pressure[N_REG_CLASSES];

+

+/* Increase (if INCR_P) or decrease current register pressure for

+   register REGNO.  */

+static void

+update_pressure (int regno, bool incr_p)

+{

+  int nregs;

+  enum reg_class pressure_class;

+

+  if (regno < FIRST_PSEUDO_REGISTER

+      && (TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)

+	  || TEST_HARD_REG_BIT (eliminable_regset, regno)))

+    return;

+

+  /* Update the current set of live registers.  Exit early if nothing

+     has changed.  We want to increase curr_reg_pressure as we scan

+     upwards and encounter a last use; if regno is already live, this

+     use is not last.  Likewise, we want to decrease curr_reg_pressure

+     as we encounter a def; regno might not be live when !incr_p, if

+     the def feeds only uses in next iteration.  */

+  if (incr_p

+      ? !bitmap_set_bit (&curr_regs_live, regno)

+      : !bitmap_clear_bit (&curr_regs_live, regno))

+    return;

+

+  pressure_class = get_regno_pressure_class (regno, &nregs);

+

+  if (incr_p)

+    {

+      curr_reg_pressure[pressure_class] += nregs;

+      if (curr_reg_pressure[pressure_class] >

+	  max_reg_pressure[pressure_class])

+	max_reg_pressure[pressure_class] = curr_reg_pressure[pressure_class];

+    }

+  else

+    curr_reg_pressure[pressure_class] -= nregs;

+

+  if (dump_file && flag_modulo_sched_verbose > 0)

+    {

+      if (incr_p)

+	fprintf (dump_file, "Increase pressure of regno %d by #%d regs: "

+		 "%s(P%d,M%d)\n",

+		 regno, nregs, reg_class_names[pressure_class],

+		 curr_reg_pressure[pressure_class],

+		 max_reg_pressure[pressure_class]);

+      else

+	fprintf (dump_file, "Decrease pressure of regno %d by #%d regs: "

+		 "%s(P%d,M%d)\n",

+		 regno, nregs, reg_class_names[pressure_class],

+		 curr_reg_pressure[pressure_class],

+		 max_reg_pressure[pressure_class]);

+    }

+}

+

+static void

+initialize_reg_moves_pressure_info (partial_schedule_ptr ps)

+{

+  ps_reg_move_info *move;

+  int i;

+

+  resize_reg_info ();

+

+  FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)

+    {

+      int regno_new = REGNO (move->new_reg);

+      int regno_old = REGNO (move->old_reg);

+      

+      /* Update register class information for the register moves.  */

+      setup_reg_classes (regno_new, reg_preferred_class (regno_old),

+			 reg_alternate_class (regno_old),

+			 reg_allocno_class (regno_old));

+    }

+}

+

+/* Calculate the live registers in PS.  */

+static void

+calc_liveness (partial_schedule_ptr ps)

+{

+  rtx insn;

+  int k;

+  ps_insn_ptr ps_i;

+  basic_block bb = ps->g->bb;

+  edge e = find_fallthru_edge (bb->succs);

+

+  /* The registers defined in bb which have uses outside of it should

+     be included in the set of live registers.  */

+  bitmap_ior_and_into (&curr_regs_live, DF_LR_IN (e->dest),

+		       &DF_LR_BB_INFO (bb)->def);

+

+  for (k = ps->ii - 1; k >= 0; k--)

+    {

+      for (ps_i = ps->rows_last[k]; ps_i; ps_i = ps_i->prev_in_row)

+	{

+	  df_ref *use_rec;

+	  df_ref *def_rec;

+

+	  insn = ps_rtl_insn (ps, ps_i->id);

+

+	  if (!NONDEBUG_INSN_P (insn))

+	    continue;

+

+	  if (dump_file && flag_modulo_sched_verbose > 0)

+	    {

+	      fprintf (dump_file, "\nAnalyzing instruction:\n");

+	      print_rtl_single (dump_file, insn);

+	    }

+

+	  /* Process all uses, all defs, and then all uses again.

+	     The last two steps are the natural way one would go about

+	     updating live registers in a bottom-up scan, except that in

+	     some cases the same physical register cannot be assigned to

+	     both use and def on same insn, so the first step is added

+	     conservatively.  The first two steps give us an estimate of

+	     the pressure when dying inputs cannot be tied to outputs

+	     (which is the worst case).  The second two steps update

+	     the set of live registers ready for the next instruction.	*/

+	  if (ps_i->id >= ps->g->num_nodes)

+	    {

+	      /* Handle register moves.  */

+	      int old_regno = REGNO (ps_reg_move (ps, ps_i->id)->old_reg);

+	      int new_regno = REGNO (ps_reg_move (ps, ps_i->id)->new_reg);

+

+	      update_pressure (old_regno, true);

+	      update_pressure (new_regno, false);

+	      update_pressure (old_regno, true);

+	    }

+	  else

+	    {

+	      for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)

+		update_pressure (DF_REF_REGNO (*use_rec), true);

+

+	      for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)

+		update_pressure (DF_REF_REGNO (*def_rec), false);

+

+	      for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)

+		update_pressure (DF_REF_REGNO (*use_rec), true);

+	    }

+

+	  if (dump_file && flag_modulo_sched_verbose > 0)

+	    {

+	      bitmap_iterator bi;

+	      unsigned j;

+

+	      fprintf (dump_file, "live regs:\n");

+	      EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)

+		fprintf (dump_file, "%d ", j);

+	      fprintf (dump_file, "\n");

+	    }

+	}

+    }

+

+  if (dump_file && flag_modulo_sched_verbose > 0)

+    {

+      bitmap_iterator bi;

+      unsigned j;

+

+      fprintf (dump_file, "\nlive regs:\n");

+      EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)

+	fprintf (dump_file, "%d ", j);

+      fprintf (dump_file, "\n");

+    }

+}

+

+/* Initialize the data-structures needed for the register pressure

+   calculation.   */

+static void

+initialize_reg_pressure_info (partial_schedule_ptr ps)

+{

+  initialize_reg_moves_pressure_info (ps);

+  

+  memset (max_reg_pressure, 0, sizeof (max_reg_pressure));

+  memset (curr_reg_pressure, 0, sizeof (curr_reg_pressure));

+  bitmap_initialize (&curr_regs_live, &reg_obstack);

+}

+

+/* Return the amount of spillage in PS.  LOOP is the original loop.  */

+int

+spillage_in_ps (struct loop *loop, partial_schedule_ptr ps)

+{

+  int i;

+  int spillage = 0;

+

+  /* The register moves instructions do not appear in the basic-block yet

+     only in the PS however we need to substitute their uses in the basic

+     block in order to calculate the pressure correctly.  We must undo

+     this at the end of the function because the loop should later on

+     be versioned without the substitutions.  */

+  apply_reg_moves (ps, false);

+  initialize_reg_pressure_info (ps);

+

+  /* The register pressure is calculated by calling calc_liveness twice:

+     after the first round, curr_reg_pressure includes all "upwards

+     exposed" registers in the ps; feeding them into the second round

+     will account for cross-iteration dependences.  */

+  if (dump_file && flag_modulo_sched_verbose > 0)

+    {

+      fprintf (dump_file, "\nStarting register pressure estimation\n");

+      fprintf (dump_file, "Pass 1 of liveness calculation\n");

+      fprintf (dump_file, "===============================\n");

+    }

+

+  calc_liveness (ps);

+  if (dump_file && flag_modulo_sched_verbose > 0)

+    {

+      fprintf (dump_file, "Pass 2 of liveness calculation\n");

+      fprintf (dump_file, "===============================\n");

+    }

+

+  calc_liveness (ps);

+

+  if (dump_file)

+    {

+      struct loop *parent;

+

+      parent = loop_outer (loop);

+      fprintf (dump_file, "\n  Loop %d (parent %d, header bb%d, depth %d)\n",

+	       loop->num, (parent == NULL ? -1 : parent->num),

+	       loop->header->index, loop_depth (loop));

+      fprintf (dump_file, "Pressure:\n");

+    }

+

+  for (i = 0; i < ira_pressure_classes_num; i++)

+    {

+      enum reg_class pressure_class;

+

+      pressure_class = ira_pressure_classes[i];

+

+      if (max_reg_pressure[pressure_class] == 0)

+	continue;

+

+      if (dump_file)

+	{

+	  fprintf (dump_file, "%s=%d  %d ", reg_class_names[pressure_class],

+		   max_reg_pressure[pressure_class],

+		   ira_available_class_regs[pressure_class]);

+	  if (max_reg_pressure[pressure_class]

+	      > ira_class_hard_regs_num[pressure_class])

+	    fprintf (dump_file, "(pressure) ");

+	}

+

+      if (max_reg_pressure[pressure_class] >

+	  ira_class_hard_regs_num[pressure_class])

+	spillage +=

+	  max_reg_pressure[pressure_class] -

+	  ira_class_hard_regs_num[pressure_class];

+    }

+

+  if (dump_file)

+    fprintf (dump_file, "\nspillage: %d\n", spillage);

+  

+  bitmap_clear (&curr_regs_live);

+  undo_reg_moves (ps);

+  return spillage;

+}

Index: loop-invariant.c

===================================================================
--- loop-invariant.c	(revision 182766)

+++ loop-invariant.c	(working copy)

@@ -1619,34 +1619,6 @@  static rtx regs_set[(FIRST_PSEUDO_REGIST

 /* Number of regs stored in the previous array.  */
 static int n_regs_set;
 
-/* Return pressure class and number of needed hard registers (through

-   *NREGS) of register REGNO.  */

-static enum reg_class

-get_regno_pressure_class (int regno, int *nregs)

-{

-  if (regno >= FIRST_PSEUDO_REGISTER)

-    {

-      enum reg_class pressure_class;

-

-      pressure_class = reg_allocno_class (regno);

-      pressure_class = ira_pressure_class_translate[pressure_class];

-      *nregs

-	= ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];

-      return pressure_class;

-    }

-  else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)

-	   && ! TEST_HARD_REG_BIT (eliminable_regset, regno))

-    {

-      *nregs = 1;

-      return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];

-    }

-  else

-    {

-      *nregs = 0;

-      return NO_REGS;

-    }

-}

-

 /* Increase (if INCR_P) or decrease current register pressure for
    register REGNO.  */
 static void
Index: common.opt

===================================================================
--- common.opt	(revision 182766)

+++ common.opt	(working copy)

@@ -1461,6 +1461,15 @@  fmodulo-sched-allow-regmoves

 Common Report Var(flag_modulo_sched_allow_regmoves)
 Perform SMS based modulo scheduling with register moves allowed
 
+fmodulo-sched-reg-pressure

+Common Report Var(flag_modulo_sched_reg_pressure)

+Perform SMS based modulo scheduling.  The scheduling will not be applied

+if it estimated to cause spilling.

+

+fmodulo-sched-verbose=

+Common RejectNegative Joined UInteger Var(flag_modulo_sched_verbose) 

+-fmodulo-sched-verbose=<number>  Control SMS's level of diagnostic messages.

+

 fmove-loop-invariants
 Common Report Var(flag_move_loop_invariants) Init(1) Optimization
 Move loop invariant computations out of loops
Index: ira.c

===================================================================
--- ira.c	(revision 182766)

+++ ira.c	(working copy)

@@ -3513,6 +3513,34 @@  build_insn_chain (void)

     print_insn_chains (dump_file);
 }
 
+/* Return pressure class and number of needed hard registers (through

+   *NREGS) of register REGNO.  */

+enum reg_class

+get_regno_pressure_class (int regno, int *nregs)

+{

+  if (regno >= FIRST_PSEUDO_REGISTER)

+    {

+      enum reg_class pressure_class;

+

+      pressure_class = reg_allocno_class (regno);

+      pressure_class = ira_pressure_class_translate[pressure_class];

+      *nregs

+	= ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];

+      return pressure_class;

+    }

+  else if (!TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)

+	   && !TEST_HARD_REG_BIT (eliminable_regset, regno))

+    {

+      *nregs = 1;

+      return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];

+    }

+  else

+    {

+      *nregs = 0;

+      return NO_REGS;

+    }

+}

+

 
 
 /* All natural loops.  */
Index: ira.h

===================================================================
--- ira.h	(revision 182766)

+++ ira.h	(working copy)

@@ -132,6 +132,7 @@  extern void ira_finish_once (void);

 extern void ira_setup_eliminable_regset (void);
 extern rtx ira_eliminate_regs (rtx, enum machine_mode);
 extern void ira_set_pseudo_classes (FILE *);
+extern void reset_pseudo_classes_defined_p (void);

 extern void ira_implicitly_set_insn_hard_regs (HARD_REG_SET *);
 
 extern void ira_sort_regnos_for_alter_reg (int *, int, unsigned int *);
@@ -145,3 +146,4 @@  extern bool ira_better_spill_reload_regn

 extern bool ira_bad_reload_regno (int, rtx, rtx);
 
 extern void ira_adjust_equiv_reg_cost (unsigned, int);
+enum reg_class get_regno_pressure_class (int, int *);

Index: ira-costs.c

===================================================================
--- ira-costs.c	(revision 182766)

+++ ira-costs.c	(working copy)

@@ -2070,6 +2070,13 @@  ira_set_pseudo_classes (FILE *dump_file)

   finish_costs ();
 }
 
+/* Reset pseudo_classes_defined_p.  */

+void

+reset_pseudo_classes_defined_p (void)

+{

+  pseudo_classes_defined_p = false;

+}

+

 
 
 /* Change hard register costs for allocnos which lives through
Index: Makefile.in

===================================================================
--- Makefile.in	(revision 182766)

+++ Makefile.in	(working copy)

@@ -1298,6 +1298,7 @@  OBJS = \

 	mcf.o \
 	mode-switching.o \
 	modulo-sched.o \
+	modulo-sched-pressure.o \

 	omega.o \
 	omp-low.o \
 	optabs.o \
@@ -3317,12 +3318,18 @@  ddg.o : ddg.c $(DDG_H) $(CONFIG_H) $(SYS

    $(FLAGS_H) insn-config.h $(INSN_ATTR_H) $(EXCEPT_H) $(RECOG_H) \
    $(SCHED_INT_H) $(CFGLAYOUT_H) $(CFGLOOP_H) $(EXPR_H) $(BITMAP_H) \
    hard-reg-set.h sbitmap.h $(TM_H)
+modulo-sched-pressure.o : modulo-sched-pressure.c $(CONFIG_H) $(CONFIG_H) $(SYSTEM_H) \

+   coretypes.h $(TARGET_H) $(DIAGNOSTIC_CORE_H) $(RTL_H) $(TM_P_H) $(REGS_H) $(FUNCTION_H) \

+   $(FLAGS_H) insn-config.h $(INSN_ATTR_H) $(EXCEPT_H) $(RECOG_H) \

+   $(SCHED_INT_H) $(CFGLAYOUT_H) $(CFGLOOP_H) $(EXPR_H) $(PARAMS_H) \

+   cfghooks.h $(GCOV_IO_H) hard-reg-set.h $(TM_H) $(TIMEVAR_H) $(TREE_PASS_H) \

+   $(DF_H) $(DBGCNT_H) ira.h modulo-sched.h 

 modulo-sched.o : modulo-sched.c $(DDG_H) $(CONFIG_H) $(CONFIG_H) $(SYSTEM_H) \
    coretypes.h $(TARGET_H) $(DIAGNOSTIC_CORE_H) $(RTL_H) $(TM_P_H) $(REGS_H) $(FUNCTION_H) \
    $(FLAGS_H) insn-config.h $(INSN_ATTR_H) $(EXCEPT_H) $(RECOG_H) \
    $(SCHED_INT_H) $(CFGLAYOUT_H) $(CFGLOOP_H) $(EXPR_H) $(PARAMS_H) \
    cfghooks.h $(GCOV_IO_H) hard-reg-set.h $(TM_H) $(TIMEVAR_H) $(TREE_PASS_H) \
-   $(DF_H) $(DBGCNT_H)

+   $(DF_H) $(DBGCNT_H) ira.h modulo-sched.h 

 haifa-sched.o : haifa-sched.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(SCHED_INT_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(FUNCTION_H) \
    $(INSN_ATTR_H) $(DIAGNOSTIC_CORE_H) $(RECOG_H) $(EXCEPT_H) $(TM_P_H) $(TARGET_H) output.h \

--- modulo-sched1.c	2012-01-01 04:56:43.498000515 -0800

+++ modulo-sched.c	2012-01-01 10:28:56.714000516 -0800

@@ -48,6 +48,8 @@  along with GCC; see the file COPYING3.

 #include "tree-pass.h"
 #include "dbgcnt.h"
 #include "df.h"
+#include "ira.h"

+#include "modulo-sched.h"

 
 #ifdef INSN_SCHEDULING
 
@@ -102,9 +104,6 @@  along with GCC; see the file COPYING3.

 /* This page defines partial-schedule structures and functions for
    modulo scheduling.  */
 
-typedef struct partial_schedule *partial_schedule_ptr;

-typedef struct ps_insn *ps_insn_ptr;

-

 /* The minimum (absolute) cycle that a node of ps was scheduled in.  */
 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
 
@@ -121,88 +120,6 @@  typedef struct ps_insn *ps_insn_ptr;

 /* The stage count of ps.  */
 #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
 
-/* A single instruction in the partial schedule.  */

-struct ps_insn

-{

-  /* Identifies the instruction to be scheduled.  Values smaller than

-     the ddg's num_nodes refer directly to ddg nodes.  A value of

-     X - num_nodes refers to register move X.  */

-  int id;

-

-  /* The (absolute) cycle in which the PS instruction is scheduled.

-     Same as SCHED_TIME (node).  */

-  int cycle;

-

-  /* The next/prev PS_INSN in the same row.  */

-  ps_insn_ptr next_in_row,

-	      prev_in_row;

-

-};

-

-/* Information about a register move that has been added to a partial

-   schedule.  */

-struct ps_reg_move_info

-{

-  /* The source of the move is defined by the ps_insn with id DEF.

-     The destination is used by the ps_insns with the ids in USES.  */

-  int def;

-  sbitmap uses;

-

-  /* The original form of USES' instructions used OLD_REG, but they

-     should now use NEW_REG.  */

-  rtx old_reg;

-  rtx new_reg;

-

-  /* The number of consecutive stages that the move occupies.  */

-  int num_consecutive_stages;

-

-  /* An instruction that sets NEW_REG to the correct value.  The first

-     move associated with DEF will have an rhs of OLD_REG; later moves

-     use the result of the previous move.  */

-  rtx insn;

-};

-

-typedef struct ps_reg_move_info ps_reg_move_info;

-DEF_VEC_O (ps_reg_move_info);

-DEF_VEC_ALLOC_O (ps_reg_move_info, heap);

-

-/* Holds the partial schedule as an array of II rows.  Each entry of the

-   array points to a linked list of PS_INSNs, which represents the

-   instructions that are scheduled for that row.  */

-struct partial_schedule

-{

-  int ii;	/* Number of rows in the partial schedule.  */

-  int history;  /* Threshold for conflict checking using DFA.  */

-

-  /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */

-  ps_insn_ptr *rows;

-

-  /* rows_last[i] points to the last insn in the linked list pointed

-     by rows[i].  */

-  ps_insn_ptr *rows_last;

-  

-  /* All the moves added for this partial schedule.  Index X has

-     a ps_insn id of X + g->num_nodes.  */

-  VEC (ps_reg_move_info, heap) *reg_moves;

-

-  /*  rows_length[i] holds the number of instructions in the row.

-      It is used only (as an optimization) to back off quickly from

-      trying to schedule a node in a full row; that is, to avoid running

-      through futile DFA state transitions.  */

-  int *rows_length;

-  

-  /* The earliest absolute cycle of an insn in the partial schedule.  */

-  int min_cycle;

-

-  /* The latest absolute cycle of an insn in the partial schedule.  */

-  int max_cycle;

-

-  ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */

-

-  int stage_count;  /* The stage count of the partial schedule.  */

-};

-

-

 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
 static void free_partial_schedule (partial_schedule_ptr);
 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
@@ -308,7 +225,7 @@  static struct haifa_sched_info sms_sched

 
 /* Partial schedule instruction ID in PS is a register move.  Return
    information about it.  */
-static struct ps_reg_move_info *

+struct ps_reg_move_info *

 ps_reg_move (partial_schedule_ptr ps, int id)
 {
   gcc_checking_assert (id >= ps->g->num_nodes);
@@ -317,7 +234,7 @@  ps_reg_move (partial_schedule_ptr ps, in

 
 /* Return the rtl instruction that is being scheduled by partial schedule
    instruction ID, which belongs to schedule PS.  */
-static rtx

+rtx

 ps_rtl_insn (partial_schedule_ptr ps, int id)
 {
   if (id < ps->g->num_nodes)
@@ -824,9 +741,10 @@  schedule_reg_moves (partial_schedule_ptr

 }
 
 /* Emit the moves associatied with PS.  Apply the substitutions
-   associated with them.  */

-static void

-apply_reg_moves (partial_schedule_ptr ps)

+   associated with them.  If UPDATE_DF_P is true then update the df

+   information.  */

+void

+apply_reg_moves (partial_schedule_ptr ps, bool update_df_p)

 {
   ps_reg_move_info *move;
   int i;
@@ -839,11 +757,29 @@  apply_reg_moves (partial_schedule_ptr ps

       EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi)
 	{
 	  replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
-	  df_insn_rescan (ps->g->nodes[i_use].insn);

+	  if (update_df_p)

+	    df_insn_rescan (ps->g->nodes[i_use].insn);

 	}
     }
 }
 
+/* Undo the moves associatied with PS.  */

+void

+undo_reg_moves (partial_schedule_ptr ps)

+{

+  ps_reg_move_info *move;

+  int i;

+

+  FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)

+  {

+    unsigned int i_use;

+    sbitmap_iterator sbi;

+

+    EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi)

+      replace_rtx (ps->g->nodes[i_use].insn, move->new_reg, move->old_reg);

+  }

+}

+

 /* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
    SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
    will move to cycle zero.  */
@@ -1534,6 +1470,13 @@  sms_schedule (void)

     fprintf (dump_file, "=========================\n\n");
   }
 
+  if (flag_modulo_sched_reg_pressure)

+    {

+      regstat_init_n_sets_and_refs ();

+      ira_set_pseudo_classes (dump_file);

+      ira_setup_eliminable_regset ();

+    }

+

   /* We don't want to perform SMS on new loops - created by versioning.  */
   FOR_EACH_LOOP (li, loop, 0)
     {
@@ -1683,7 +1626,9 @@  sms_schedule (void)

 	  set_columns_for_ps (ps);
 
 	  min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
-	  if (!schedule_reg_moves (ps))

+	  if (!schedule_reg_moves (ps)

+	      || (flag_modulo_sched_reg_pressure

+		  && spillage_in_ps (loop, ps) > 0))

 	    {
 	      mii = ps->ii + 1;
 	      free_partial_schedule (ps);
@@ -1744,7 +1689,7 @@  sms_schedule (void)

 	  /* The life-info is not valid any more.  */
 	  df_set_bb_dirty (g->bb);
 
-	  apply_reg_moves (ps);

+	  apply_reg_moves (ps, true);

 	  if (dump_file)
 	    print_node_sched_params (dump_file, g->num_nodes, ps);
 	  /* Generate prolog and epilog.  */
@@ -1758,6 +1703,18 @@  sms_schedule (void)

       free_ddg (g);
     }
 
+  if (flag_modulo_sched_reg_pressure)

+    {

+      regstat_free_n_sets_and_refs ();

+      /* When setting pseudo_classes_defined_p within SMS the IRA

+	 pass that comes later applies one pass for finding pseudo/allocno

+	 costs instead of two; assuming the first pass was made before

+	 the final scheduling.	However the first pass could be inaccurate

+	 as it was done on the partial schedule.  */

+      reset_pseudo_classes_defined_p ();

+      free_reg_info ();

+    }

+

   free (g_arr);
 
   /* Release scheduler data, needed until now because of DFA.  */