Patchwork Patch for AMD Dispatch Scheduler

login
register
mail settings
Submitter reza yazdani
Date Aug. 13, 2010, 12:27 a.m.
Message ID <886108.82044.qm@web33007.mail.mud.yahoo.com>
Download mbox | patch
Permalink /patch/61657/
State New
Headers show

Comments

reza yazdani - Aug. 13, 2010, 12:27 a.m.
Dispatch scheduling is a new BD feature. It is composed of two parts: the scheduling part and the alignment part.

The scheduling part (this patch) arranges instructions to maximize the throughput of the hardware dispatcher. It makes sure dispatch widow boundaries are roughly observed. It is roughly, because the lengths of instructions, in number of bytes, are not known at the scheduling time. In x86 some instruction lengths may not be known until assembly time where information such as branch offsets are computed. Scheduling part is called once before register allocation and once after register allocation.

The alignment part (not in this patch) makes sure dispatch widows align at the correct boundaries.

Dispatch Scheduling is implemented as an extension to Haifa Scheduler pass. Scheduler is programed to follow x86-BD dispatching rules during the scheduling.

A new command line flag –mdispatch-scheduler is defined. This option sets flag_dispatch_scheduling. To perform dispatch scheduling “-march=bdver1” and Haifa Scheduling flags must all be selected on the command line.

Testing
-------

Self compile ran with “–mdispatch-scheduling –O3”. No new test added for this implementation. Make check of i386 tests passes. No difference in the number of failures with and without the dispatch flag.

ChangeLog
---------

2010-08-12  Reza Yazdani  <reza.yazdani@amd.com>

    * tm.texi.in (TARGET_SCHED_DISPATCH): New.
    (TARGET_SCHED_DISPATCH_INIT): New.
    (TARGET_SCHED_ADD_TO_DISPATCH_WINDOW): New.
    (TARGET_SCHED_FITS_DISPATCH_WINDOW): New.
    (TARGET_SCHED_DISPATCH_VIOLATION): New,
    (TARGET_SCHED_DEBUG_PRINT_INSN_DISPATCH_INFO): New.
    * tm.texi: Regererated.
    * hooks.c (hook_void_rtx): New.
    * hooks.h (hook_void_rtx): New.
    * target.def (dispatch_init): Defined.
    (dispatch): Defined.
    (fits_dispatch_window): Defined.
    (dispatch_violation): Defined.
    (is_cmp): Defined.
    (debug_print_insn_dispatch_info): Defined.
    (add_to_dispatch_window): Defined.
    * haifa-sched.c (ready_remove_first_dispatch): New.
    (debug_ready_dispatch): New.
    (debug_ready_dispatch): New.
    * sched-init.h (debug_ready_dispatch): Declared.
    * i386.h (debug_print_dispatch_window): Declared.
    (debug_print_insn_dispatch_info): Declared.
    * i386.opt (-mdispatch-scheduler): Declared.
    (flag_dispatch_scheduling): Declared.
    * i386.c (has_dispatch): New.
    (get_mem_group): New.
    (is_cmp): New.
    (dispatch_violation): New.
    (is_branch): New.
    (is_prefetch): New.
    (init_window): New.
    (allocate_window): New.
    (init_dispatch_sched): New.
    (is_end_basic_block): New.
    (process_end_window): New.
    (allocate_next_window): New.
    (find_con): New.
    (get_num_imm): New.
    (has_imm): New.
    (get_insn_length): New.
    (get_insn_path): New.
    (get_insn_group): New.
    (count_num_restricted): New.
    (fits_dispatch_window): New.
    (add_insn_window): New.
    (add_to_dispatch_window): New.
    (print_dispatch_window): New.
    (debug_print_dispatch_window): New.
    (print_insn_dispatch_info): New.
    (debug_print_insn_dispatch_info): New.
Uros Bizjak - Aug. 13, 2010, 8:33 a.m.
On Fri, Aug 13, 2010 at 2:27 AM, reza yazdani <yazdani_reza@yahoo.com> wrote:
> Dispatch scheduling is a new BD feature. It is composed of two parts: the scheduling part and the alignment part.
>
> The scheduling part (this patch) arranges instructions to maximize the throughput of the hardware dispatcher. It makes sure dispatch widow boundaries are roughly observed. It is roughly, because the lengths of instructions, in number of bytes, are not known at the scheduling time. In x86 some instruction lengths may not be known until assembly time where information such as branch offsets are computed. Scheduling part is called once before register allocation and once after register allocation.
>
> The alignment part (not in this patch) makes sure dispatch widows align at the correct boundaries.
>
> Dispatch Scheduling is implemented as an extension to Haifa Scheduler pass. Scheduler is programed to follow x86-BD dispatching rules during the scheduling.
>
> A new command line flag –mdispatch-scheduler is defined. This option sets flag_dispatch_scheduling. To perform dispatch scheduling “-march=bdver1” and Haifa Scheduling flags must all be selected on the command line.
>
> Testing
> -------
>
> Self compile ran with “–mdispatch-scheduling –O3”. No new test added for this implementation. Make check of i386 tests passes. No difference in the number of failures with and without the dispatch flag.

+/* Number of allowable groups in a dispatch window.  It is an array
+   indexed by dispatch_group enum.  100 is used as a big number,
+   because the number of these kind of operations does not have any
+   effect in dispatch window, but we need them for other reasons in
+   the table.  */
+static int num_allowable_groups[disp_last] =
+{
+  0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
+};

Some invalid number, like -1 doesn't fit there?

+/* Returns decode type on an AMDFAM10 machine which can be
+   "DIRECT", "DOUBLE", "VECTOR" which are decoded
+   to 1, 2 or more than 2 micro-ops respectively for INSN.  */
+static enum attr_amdfam10_decode
+dispatch_group_amdfam10_decode (rtx insn)

Space between comment and function declaration.

+/* Return true if INSN is a prefetch instruction.  */
+static bool
+is_prefetch (rtx insn)
+{
+  return ((strcmp (GET_RTX_NAME (GET_CODE (insn)), "prefetch_sse") == 0)
+         || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
+                     "prefetch_sse_rex") == 0)
+         || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
+                     "prefetch_3dnow") == 0)
+         || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
+                     "prefetch_3dnow_rex") == 0));
+}

No! Please introduce "prefetch" type and handle it elsewhere instead
of the call to is_prefetch. The names already changed in the
mainline...

+static void
+process_end_window (void)
+{
+  gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
+  if (dispatch_window_list->next)
+    {
+      gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
+      gcc_assert (dispatch_window_list->window_size +
dispatch_window_list1->window_size <= 48);
+      init_window (1);
+    }
+  init_window (0);
+}

Watch for line lengths (there are multiple violations in the patch).

+static void
+find_con (const_rtx in_rtx, int *imm, int *imm32, int *imm64)

find_constant, please ?
+  code = GET_CODE (in_rtx);
+  if (code == CONST_INT)
+    {
+      (*imm)++;
+      (*imm32)++;
+    }
+  else if (code == CONST_DOUBLE)
+    {
+      (*imm)++;
+      (*imm64)++;
+    }

This will work in the wrong way on 64bit hosts, where CONST_INT also
covers 64bit immediates. Try to compile:

long long test (long long a)
{
  return a + 0x1122334455667788ll;
}

#(insn:TI 6 3 23 imm.c:2 (set (reg:DI 0 ax [61])
#        (const_int 1234605616436508552 [0x1122334455667788])) 89
{*movdi_1_rex64} (expr_list:REG_EQUIV (const_int 1234605616436508552
[0x1122334455667788])
#        (nil)))
	movabsq	$1234605616436508552, %rax	# 6	*movdi_1_rex64/3	[length = 10]

+static int
+get_num_imm (rtx insn, int *imm, int *imm32, int *imm64)

get_num_immediates...
Also, this function should be rewritten to use for_each_rtx to call
find_con, see many examples in gcc source.

+static bool
+has_imm (rtx insn)

has_immediate

+/* Get bytes length of INSN.
+   This function is very similar to the static function min_insn_size
+   in i386.c.  The main difference is the use of get_attr_length.  */
+
+static int
+get_insn_length (rtx insn)

Huh? min_insn_size also uses get_attr_length. This function is almost
exact copy of min_insn_size (minus some early discards and "important
cases" that are present in min_insn_size and not here). Please remove
this function.

+static enum insn_path
+get_insn_path (rtx insn)
+{
+  enum attr_amdfam10_decode path = dispatch_group_amdfam10_decode (insn);
+
+  if ((int)path == 0)
+    return path_single;
+
+  if ((int)path == 1)
+    return path_double;
+
+  return path_multi;

Just inline dispatch_group_amdfam10_decode (this is the only user).
Also, don't cast attributes, you can use
AMDFAM10_DECODE_{DIRECT,VECTOR,DOUBLE} defines.

+static int
+count_num_restricted (rtx insn, dispatch_windows *window_list)

Watch line lengths in this function!

+static void
+print_dispatch_window (FILE *file, int window_num)

Please also make this a DEBUG_FUNCTION, the convention is to name it
"debug_dispatch_window_file".

+  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d,\
+ imm_size = %d\n",
+          list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);

No multiline strings... If it can't be split, it is tolerable for the
string can go over 72 character limit.

+/* Print to stderr a dispatch window.  */

Please print to stdout, as all other debug functions do.

+DEBUG_FUNCTION void
+debug_print_dispatch_window (int window_num)

debug_dispatch_window

+static void
+print_insn_dispatch_info (FILE *file, rtx insn)

Similar to above, make this a DEBUG_FUNCTION and rename it to
debug_insn_dispatch_info_file. Also, watch for multiline strings.

+DEBUG_FUNCTION void
+debug_print_insn_dispatch_info (rtx insn)
+{
+  print_insn_dispatch_info (stderr, insn);
+}

As above, print to stdout and rename this function to debug_insn_dispatch_info.

Thanks,
Uros.
Andrew Pinski - Aug. 13, 2010, 6:54 p.m.
On Fri, Aug 13, 2010 at 1:33 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>
> +/* Return true if INSN is a prefetch instruction.  */
> +static bool
> +is_prefetch (rtx insn)
> +{
> +  return ((strcmp (GET_RTX_NAME (GET_CODE (insn)), "prefetch_sse") == 0)
> +         || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
> +                     "prefetch_sse_rex") == 0)
> +         || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
> +                     "prefetch_3dnow") == 0)
> +         || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
> +                     "prefetch_3dnow_rex") == 0));
> +}
>
> No! Please introduce "prefetch" type and handle it elsewhere instead
> of the call to is_prefetch. The names already changed in the
> mainline...


No need to introduce a "prefetch" type as they will contain a prefetch RTX.

You can do:
static bool
is_prefetch (rtx insn)
{
  return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
}

Thanks,
Andrew Pinski
Richard Henderson - Aug. 16, 2010, 11:14 p.m.
On 08/12/2010 05:27 PM, reza yazdani wrote:
> The scheduling part (this patch) arranges instructions to maximize
> the throughput of the hardware dispatcher. It makes sure dispatch
> widow boundaries are roughly observed. It is roughly, because the
> lengths of instructions, in number of bytes, are not known at the
> scheduling time. In x86 some instruction lengths may not be known
> until assembly time where information such as branch offsets are
> computed. Scheduling part is called once before register allocation
> and once after register allocation.

I'm a bit confused how this "dispatch" scheduling is different
from other scheduling, and why it needs so many new hooks.  The
whole process smells very similar to "bundling" like we'd do on
ia64 for instance.

For instance, if I compare the structure of your new 
ready_remove_first_dispatch function to choose_ready, I see
many similarities.  It sure looks like the multipass_dfa
scheduling hooks can be made to do what you want.

Can you explain what's fundamentally different about your bits?



r~
Vladimir Makarov - Aug. 20, 2010, 3:29 p.m.
On 08/12/2010 08:27 PM, reza yazdani wrote:
> Dispatch scheduling is a new BD feature. It is composed of two parts: the scheduling part and the alignment part.
>
> The scheduling part (this patch) arranges instructions to maximize the throughput of the hardware dispatcher. It makes sure dispatch widow boundaries are roughly observed. It is roughly, because the lengths of instructions, in number of bytes, are not known at the scheduling time. In x86 some instruction lengths may not be known until assembly time where information such as branch offsets are computed. Scheduling part is called once before register allocation and once after register allocation.
>
> The alignment part (not in this patch) makes sure dispatch widows align at the correct boundaries.
>
> Dispatch Scheduling is implemented as an extension to Haifa Scheduler pass. Scheduler is programed to follow x86-BD dispatching rules during the scheduling.
>
   I thought about this patch for a long time.  The insn scheduler is
very machine-dependent pass and it is hard to use the same model for
so different targets.  The insn scheduler reached a point where it has
too many hooks some of them is duplicated.  We need to do some work to
decrease number of hooks and their renaming.  For example, code for
add_to_dispatch_window call can be hidden in variable_issue or
dispatch_init in issue_rate or dfa_{pre,post}_cycle_insn.  Of course,
the work is not for you because it needs a lot of knowledge about GCC
insn scheduler.

   As for the scheduler part of the patch.  I think it is ok for now.
But if you used dfa pipeline hazard recognizer as Richard Henderson
mentioned, you would not need function ready_remove_first_dispatch and
you could use first cycle multi-pass insn scheduling (although it is
not necessary for your target because constraints do not depend on the
insn order in the window as for Itanium but it might change in future
who knows).  You could also use modulo-scheduling which might be more
important for OOO processors than insn scheduling.  You would not need
a new option too for dispatch scheduling.  Writing for dfa description
of a new processor would require nontraditional programming like
defining new attributes for md insns and usage of them in dfa
description.  I think it is too late for GCC4.6 to rewrite this all.
But I think it is worth to be considered for next GCC release.  I guess you
could avoid the current implementation if you discussed the approach 
with the GCC
community before starting the implementation.

   On the other hand, the current implementation of dfa pipeline hazard 
recognizer
has own disadvantage for x86/x86_64.  The recognizer is considered to be 
too big
because of numerous automata.

   The scheduler part of patch is ok for the trunk.

Thanks.


> A new command line flag –mdispatch-scheduler is defined. This option sets flag_dispatch_scheduling. To perform dispatch scheduling “-march=bdver1” and Haifa Scheduling flags must all be selected on the command line.
>
Sebastian Pop - Aug. 20, 2010, 9:57 p.m.
On Fri, Aug 20, 2010 at 10:29, Vladimir N. Makarov <vmakarov@redhat.com> wrote:
> I guess you could avoid the current implementation if you discussed
> the approach with the GCC community before starting the
> implementation.

FYI, http://gcc.gnu.org/ml/gcc/2010-06/msg00402.html

Sebastian
reza yazdani - Aug. 21, 2010, 1:10 a.m.
I thought we did that when we talked to SuSE. In presentation we had at Austin, we specifically asked if this implementation was okay and they said it is okay.

Reza

--- On Fri, 8/20/10, Sebastian Pop <sebpop@gmail.com> wrote:

> From: Sebastian Pop <sebpop@gmail.com>
> Subject: Re: Patch for AMD Dispatch Scheduler
> To: "Vladimir N. Makarov" <vmakarov@redhat.com>
> Cc: "reza yazdani" <yazdani_reza@yahoo.com>, gcc-patches@gcc.gnu.org, jh@suse.cz, ubizjak@gmail.com
> Date: Friday, August 20, 2010, 2:57 PM
> On Fri, Aug 20, 2010 at 10:29,
> Vladimir N. Makarov <vmakarov@redhat.com>
> wrote:
> > I guess you could avoid the current implementation if
> you discussed
> > the approach with the GCC community before starting
> the
> > implementation.
> 
> FYI, http://gcc.gnu.org/ml/gcc/2010-06/msg00402.html
> 
> Sebastian
>
Vladimir Makarov - Aug. 31, 2010, 8:22 p.m.
On 08/20/2010 05:57 PM, Sebastian Pop wrote:
> On Fri, Aug 20, 2010 at 10:29, Vladimir N. Makarov<vmakarov@redhat.com>  wrote:
>> I guess you could avoid the current implementation if you discussed
>> the approach with the GCC community before starting the
>> implementation.
> FYI,http://gcc.gnu.org/ml/gcc/2010-06/msg00402.html
>
Sorry then.  I missed this discussion because of my vacation in June.  
That is unfortunate for me again that I was not able to answer this 
email quickly because my another vacation on previous week.


As for the usage of DFA for recognizing the bulldozer constraints, it is 
not trivial (because of implementing small adders with DFA) but possible 
task.  But again I am not insisting on it although there are some 
advantages in such approach.

I'd advice to investigate modulo-scheduling too if you are interesting 
in improving code for the bulldozer.  As I wrote it is more important 
for OOO processor (especially for FP code) than the global 
insn-scheduling.  At least as I checked a couple years ago Intel 
compiler does software pipelining for x86_64.

Patch

Index: doc/tm.texi
===================================================================
--- doc/tm.texi	(revision 163182)
+++ doc/tm.texi	(working copy)
@@ -6759,6 +6759,38 @@  bound will be used in case this hook is 
 of instructions divided by the issue rate.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_SCHED_DISPATCH (void)
+This hook is called by Haifa Scheduler.  It returns true if dispatch scheduling
+is supported in hardware.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_SCHED_DISPATCH_INIT (void)
+This hook is called by Haifa Scheduler.  It initializes the dispatch scheduler.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_SCHED_ADD_TO_DISPATCH_WINDOW (rtx @var{insn})
+This hook is called by Haifa Scheduler.  It adds the last instruction scheduled
+to the dispatch window.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_SCHED_FITS_DISPATCH_WINDOW (rtx @var{insn})
+This hook is called by Haifa Scheduler.  It checks if an insn fits dispatch
+window.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_SCHED_DISPATCH_VIOLATION (void)
+This hook is called by Haifa Scheduler.  It reports a dispatch violation.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_SCHED_IS_CMP (rtx @var{insn})
+This hook is called by Haifa Scheduler.  It reports if insn is a compare.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_SCHED_DEBUG_PRINT_INSN_DISPATCH_INFO (rtx @var{insn})
+This hook is called by Haifa Scheduler.  It reports information related to
+dispatch scheduling.
+@end deftypefn
+
 @node Sections
 @section Dividing the Output into Sections (Texts, Data, @dots{})
 @c the above section title is WAY too long.  maybe cut the part between
Index: doc/tm.texi.in
===================================================================
--- doc/tm.texi.in	(revision 163182)
+++ doc/tm.texi.in	(working copy)
@@ -6759,6 +6759,38 @@  bound will be used in case this hook is 
 of instructions divided by the issue rate.
 @end deftypefn
 
+@hook TARGET_SCHED_DISPATCH
+This hook is called by Haifa Scheduler.  It returns true if dispatch scheduling
+is supported in hardware.
+@end deftypefn
+
+@hook TARGET_SCHED_DISPATCH_INIT
+This hook is called by Haifa Scheduler.  It initializes the dispatch scheduler.
+@end deftypefn
+
+@hook TARGET_SCHED_ADD_TO_DISPATCH_WINDOW
+This hook is called by Haifa Scheduler.  It adds the last instruction scheduled
+to the dispatch window.
+@end deftypefn
+
+@hook TARGET_SCHED_FITS_DISPATCH_WINDOW
+This hook is called by Haifa Scheduler.  It checks if an insn fits dispatch
+window.
+@end deftypefn
+
+@hook TARGET_SCHED_DISPATCH_VIOLATION
+This hook is called by Haifa Scheduler.  It reports a dispatch violation.
+@end deftypefn
+
+@hook TARGET_SCHED_IS_CMP
+This hook is called by Haifa Scheduler.  It reports if insn is a compare.
+@end deftypefn
+
+@hook TARGET_SCHED_DEBUG_PRINT_INSN_DISPATCH_INFO
+This hook is called by Haifa Scheduler.  It reports information related to
+dispatch scheduling.
+@end deftypefn
+
 @node Sections
 @section Dividing the Output into Sections (Texts, Data, @dots{})
 @c the above section title is WAY too long.  maybe cut the part between
Index: hooks.c
===================================================================
--- hooks.c	(revision 163182)
+++ hooks.c	(working copy)
@@ -335,8 +335,16 @@  hook_constcharptr_int_const_tree_const_t
 }
 
 /* Generic hook that takes a const_tree and returns NULL_TREE.  */
+
 tree
 hook_tree_const_tree_null (const_tree t ATTRIBUTE_UNUSED)
 {
   return NULL;
 }
+
+/* Generic hook that takes a rtx and return is void.  */
+
+void
+hook_void_rtx (rtx insn ATTRIBUTE_UNUSED)
+{
+}
Index: hooks.h
===================================================================
--- hooks.h	(revision 163182)
+++ hooks.h	(working copy)
@@ -54,6 +54,7 @@  extern bool hook_bool_tree_tree_true (tr
 extern bool hook_bool_tree_bool_false (tree, bool);
 
 extern void hook_void_void (void);
+extern void hook_void_rtx (rtx);
 extern void hook_void_constcharptr (const char *);
 extern void hook_void_FILEptr_constcharptr (FILE *, const char *);
 extern void hook_void_tree (tree);
Index: target.def
===================================================================
--- target.def	(revision 163182)
+++ target.def	(working copy)
@@ -761,6 +761,62 @@  DEFHOOK
  "",
  int, (struct ddg *g), NULL)
 
+/* The following member value is a function that initializes dispatch
+   schedling.  */
+DEFHOOK
+(dispatch_init,
+"",
+void, (void),
+hook_void_void)
+
+/* The following member value is a a function that returns true is
+   dispatch schedling is supported in hardware.  */
+DEFHOOK
+(dispatch,
+"",
+bool, (void),
+hook_bool_void_false)
+
+/* The following member value is a function that reports if the insn
+   fits the dispatch window.  */
+DEFHOOK
+(fits_dispatch_window,
+"",
+bool, (rtx insn),
+hook_bool_rtx_false)
+
+/* The following member value is a function that reports a dispatch violation
+   schedling.  */
+DEFHOOK
+(dispatch_violation,
+"",
+bool, (void),
+hook_bool_void_false)
+
+/* The following member value is a function that reports if the insn
+   is a compare.  */
+DEFHOOK
+(is_cmp,
+"",
+bool, (rtx insn),
+hook_bool_rtx_false)
+
+/* The following member value is a function dumps information about dispatch
+   scheduling windows.  */
+DEFHOOK
+(debug_print_insn_dispatch_info,
+"",
+void, (rtx insn),
+hook_void_rtx)
+
+/* The following member function adds an instruction to the dispatch window.  */
+
+DEFHOOK
+(add_to_dispatch_window,
+"",
+void, (rtx insn),
+hook_void_rtx)
+
 HOOK_VECTOR_END (sched)
 
 /* Functions relating to vectorization.  */
Index: haifa-sched.c
===================================================================
--- haifa-sched.c	(revision 163182)
+++ haifa-sched.c	(working copy)
@@ -142,6 +142,7 @@  along with GCC; see the file COPYING3.  
 #include "recog.h"
 #include "sched-int.h"
 #include "target.h"
+#include "target-def.h"
 #include "output.h"
 #include "params.h"
 #include "vecprim.h"
@@ -531,7 +532,6 @@  static void extend_h_i_d (void);
    inserted at the beginning of the block (in schedule_block()).  */
 
 static void ready_add (struct ready_list *, rtx, bool);
-static rtx ready_remove_first (struct ready_list *);
 
 static void queue_to_ready (struct ready_list *);
 static int early_queue_to_ready (state_t, struct ready_list *);
@@ -2003,9 +2003,13 @@  queue_to_ready (struct ready_list *ready
   q_ptr = NEXT_Q (q_ptr);
 
   if (dbg_cnt (sched_insn) == false)
-    /* If debug counter is activated do not requeue insn next after
-       last_scheduled_insn.  */
-    skip_insn = next_nonnote_nondebug_insn (last_scheduled_insn);
+    {
+      /* If debug counter is activated do not requeue insn next after
+	 last_scheduled_insn.  */
+      skip_insn = next_nonnote_insn (last_scheduled_insn);
+      while (skip_insn && DEBUG_INSN_P (skip_insn))
+	skip_insn = next_nonnote_insn (skip_insn);
+    }
   else
     skip_insn = NULL_RTX;
 
@@ -2605,6 +2609,55 @@  max_issue (struct ready_list *ready, int
   return best;
 }
 
+/* Get first insn that fits in the dispatch window.  If none exists return
+   first insn.  */
+
+static rtx
+ready_remove_first_dispatch (struct ready_list *ready)
+{
+  int i;
+  rtx insn = ready_element (ready, 0);
+
+  if (ready->n_ready == 1
+      || INSN_CODE (insn) < 0
+      || !INSN_P (insn)
+      || !active_insn_p (insn)
+      || targetm.sched.fits_dispatch_window (insn))
+    return ready_remove_first (ready);
+
+  for (i = 1; i < ready->n_ready; i++)
+    {
+      insn = ready_element (ready, i);
+      if (INSN_CODE (insn) < 0
+	  || !INSN_P (insn)
+	  || !active_insn_p (insn))
+	continue;
+      if (targetm.sched.fits_dispatch_window (insn))
+	{
+	  /* Return ith element of ready.  */
+	  insn = ready_remove (ready, i);
+	  return insn;
+	}
+    }
+
+  if (targetm.sched.dispatch_violation ())
+    return ready_remove_first (ready);
+
+  for (i = 1; i < ready->n_ready; i++)
+    {
+      insn = ready_element (ready, i);
+      if (INSN_CODE (insn) < 0
+	  || !INSN_P (insn)
+	  || !active_insn_p (insn))
+	continue;
+      if (targetm.sched.is_cmp (insn))
+	  /* Return ith element of ready.  */
+	  return ready_remove (ready, i);
+    }
+
+  return ready_remove_first (ready);
+}
+
 /* The following function chooses insn from READY and modifies
    READY.  The following function is used only for first
    cycle multipass scheduling.
@@ -2642,7 +2695,10 @@  choose_ready (struct ready_list *ready, 
   if (lookahead <= 0 || SCHED_GROUP_P (ready_element (ready, 0))
       || DEBUG_INSN_P (ready_element (ready, 0)))
     {
-      *insn_ptr = ready_remove_first (ready);
+      if (targetm.sched.dispatch ())
+	*insn_ptr = ready_remove_first_dispatch (ready);
+      else
+	*insn_ptr = ready_remove_first (ready);
       return 0;
     }
   else
@@ -3140,6 +3196,11 @@  schedule_block (basic_block *target_bb)
 						       last_scheduled_insn);
 
 	  move_insn (insn, last_scheduled_insn, current_sched_info->next_tail);
+
+	  /* Add INSN to dispatch window if dispatch scheduling is supported.  */
+	  if (targetm.sched.dispatch ())
+	    targetm.sched.add_to_dispatch_window (insn);
+
 	  reemit_notes (insn);
 	  last_scheduled_insn = insn;
 
@@ -3364,8 +3425,12 @@  sched_init (void)
   flag_schedule_speculative_load = 0;
 #endif
 
+  if (targetm.sched.dispatch ())
+    targetm.sched.dispatch_init ();
+
   sched_pressure_p = (flag_sched_pressure && ! reload_completed
 		      && common_sched_info->sched_pass_id == SCHED_RGN_PASS);
+
   if (sched_pressure_p)
     ira_setup_eliminable_regset ();
 
@@ -5562,4 +5627,17 @@  sched_emit_insn (rtx pat)
   return insn;
 }
 
+/* Print to STDERR the status of the ready list with respect to
+   dispatch windows.  */
+
+DEBUG_FUNCTION void
+debug_ready_dispatch (void)
+{
+  int i;
+
+  fprintf (stderr, "Number of ready: %d\n", ready.n_ready);
+  for (i = 0; i < ready.n_ready; i++)
+    targetm.sched.debug_print_insn_dispatch_info (ready_element (&ready, i));
+}
+
 #endif /* INSN_SCHEDULING */
Index: sched-int.h
===================================================================
--- sched-int.h	(revision 163182)
+++ sched-int.h	(working copy)
@@ -1269,6 +1269,7 @@  extern void add_block (basic_block, basi
 extern rtx bb_note (basic_block);
 extern void concat_note_lists (rtx, rtx *);
 extern rtx sched_emit_insn (rtx);
+extern void debug_ready_dispatch (void);
 
 
 /* Types and functions in sched-rgn.c.  */
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 163182)
+++ config/i386/i386.h	(working copy)
@@ -2411,6 +2411,9 @@  struct GTY(()) machine_function {
 #define SYMBOL_REF_DLLEXPORT_P(X) \
 	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLEXPORT) != 0)
 
+extern void debug_print_dispatch_window (int);
+extern void debug_print_insn_dispatch_info (rtx);
+
 /*
 Local variables:
 version-control: t
Index: config/i386/i386.opt
===================================================================
--- config/i386/i386.opt	(revision 163182)
+++ config/i386/i386.opt	(working copy)
@@ -250,6 +250,11 @@  Enable automatic generation of fused flo
 if the ISA supports such instructions.  The -mfused-madd option is on by
 default.
 
+mdispatch-scheduler
+Target RejectNegative Var(flag_dispatch_scheduler)
+Do dispatch scheduling if processor is bdver1 and Haifa scheduling
+is selected.
+
 ;; ISA support
 
 m32
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 163182)
+++ config/i386/i386.c	(working copy)
@@ -7942,7 +7942,6 @@  get_pc_thunk_name (char name[32], unsign
     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
 }
 
-
 /* This function generates code for -fpic that loads %ebx with
    the return address of the caller and then returns.  */
 
@@ -20640,6 +20639,16 @@  ix86_issue_rate (void)
     }
 }
 
+/* Return TRUE if Dispatch Scheduling is supported.  */
+
+static bool
+has_dispatch (void)
+{
+  if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
+    return true;
+  return false;
+}
+
 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
    by DEP_INSN and nothing set by DEP_INSN.  */
 
@@ -31551,6 +31560,20 @@  ix86_enum_va_list (int idx, const char *
 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   ia32_multipass_dfa_lookahead
+#undef TARGET_SCHED_DISPATCH
+#define TARGET_SCHED_DISPATCH has_dispatch
+#undef TARGET_SCHED_DISPATCH_INIT
+#define TARGET_SCHED_DISPATCH_INIT init_dispatch_sched
+#undef TARGET_SCHED_ADD_TO_DISPATCH_WINDOW
+#define TARGET_SCHED_ADD_TO_DISPATCH_WINDOW add_to_dispatch_window
+#undef TARGET_SCHED_FITS_DISPATCH_WINDOW
+#define TARGET_SCHED_FITS_DISPATCH_WINDOW fits_dispatch_window
+#undef TARGET_SCHED_DISPATCH_VIOLATION
+#define TARGET_SCHED_DISPATCH_VIOLATION dispatch_violation
+#undef TARGET_SCHED_IS_CMP
+#define TARGET_SCHED_IS_CMP is_cmp
+#undef TARGET_SCHED_DEBUG_PRINT_INSN_DISPATCH_INFO
+#define TARGET_SCHED_DEBUG_PRINT_INSN_DISPATCH_INFO debug_print_insn_dispatch_info
 
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
@@ -31747,6 +31770,822 @@  ix86_enum_va_list (int idx, const char *
 #undef TARGET_ASM_CODE_END
 #define TARGET_ASM_CODE_END ix86_code_end
 
+/* The size of the dispatch window is the total number of bytes of
+   object code allowed in a window.  */
+#define DISPATCH_WINDOW_SIZE 16
+
+/* Number of dispatch windows considered for scheduling.  */
+#define MAX_DISPATCH_WINDOWS 3
+
+/* Maximum number of instructions in a window.  */
+#define MAX_INSN 4
+
+/* Maximum number of immediate operands in a window.  */
+#define MAX_IMM 4
+
+/* Maximum number of immediate bits allowed in a window.  */
+#define MAX_IMM_SIZE 128
+
+/* Maximum number of 32 bit immediates allowed in a window.  */
+#define MAX_IMM_32 4
+
+/* Maximum number of 64 bit immediates allowed in a window.  */
+#define MAX_IMM_64 2
+
+/* Maximum total of loads or prefetches allowed in a window.  */
+#define MAX_LOAD 2
+
+/* Maximum total of stores allowed in a window.  */
+#define MAX_STORE 1
+
+#undef BIG
+#define BIG 100
+
+
+/* Dispatch groups.  Istructions that affect the mix in a dispatch window.  */
+enum dispatch_group
+{
+  disp_no_group = 0,
+  disp_load,
+  disp_store,
+  disp_load_store,
+  disp_prefetch,
+  disp_imm,
+  disp_imm_32,
+  disp_imm_64,
+  disp_branch,
+  disp_cmp,
+  disp_jcc,
+  disp_last
+};
+
+/* Number of allowable groups in a dispatch window.  It is an array
+   indexed by dispatch_group enum.  100 is used as a big number,
+   because the number of these kind of operations does not have any
+   effect in dispatch window, but we need them for other reasons in
+   the table.  */
+static int num_allowable_groups[disp_last] =
+{
+  0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
+};
+
+char group_name[disp_last + 1][16] =
+{
+  "disp_no_group", "disp_load", "disp_store", "disp_load_store",
+  "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
+  "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
+};
+
+/* Instruction path.  */
+enum insn_path
+{
+  no_path = 0,
+  path_single, /* Single micro op.  */
+  path_double, /* Double micro op.  */
+  path_multi,  /* Instructions with more than 2 micro op..  */
+  last_path
+};
+
+/* sched_insn_info defines a window to the instructions scheduled in
+   the basic block.  It contains a pointer to the insn_info table and
+   the instruction scheduled.
+
+   Windows are allocated for each basic block and are linked
+   together.  */
+typedef struct sched_insn_info_s
+{
+  rtx insn;
+  enum dispatch_group group;
+  enum insn_path path;
+  int byte_len;
+  int imm_bytes;
+} sched_insn_info;
+
+/* Linked list of dispatch windows.  This is a two way list of
+   dispatch windows of a basic block.  It contains information about
+   the number of uops in the window and the total number of
+   instructions and of bytes in the object code for this dispatch
+   window.  */
+typedef struct dispatch_windows_s
+{
+  int num_insn;            /* Number of insn in the window.  */
+  int num_uops;            /* Number of uops in the window.  */
+  int window_size;         /* Number of bytes in the window.  */
+  int window_num;          /* Window number between 0 or 1.  */
+  int num_imm;             /* Number of immediates in an insn.  */
+  int num_imm_32;          /* Number of 32 bit immediates in an insn.  */
+  int num_imm_64;          /* Number of 64 bit immediates in an insn.  */
+  int imm_size;            /* Total immediates in the window.  */
+  int num_loads;           /* Total memory loads in the window.  */
+  int num_stores;          /* Total memory stores in the window.  */
+  int violation;          /* Violation exists in window.  */
+  sched_insn_info *window; /* Pointer to the window.  */
+  struct dispatch_windows_s *next;
+  struct dispatch_windows_s *prev;
+} dispatch_windows;
+
+static dispatch_windows *dispatch_window_list;
+static dispatch_windows *dispatch_window_list1;
+
+/* Returns decode type on an AMDFAM10 machine which can be
+   "DIRECT", "DOUBLE", "VECTOR" which are decoded
+   to 1, 2 or more than 2 micro-ops respectively for INSN.  */
+static enum attr_amdfam10_decode
+dispatch_group_amdfam10_decode (rtx insn)
+{
+  enum attr_amdfam10_decode amdfam10_decode;
+  amdfam10_decode = get_attr_amdfam10_decode (insn);
+  return amdfam10_decode;
+}
+
+/* Get dispatch group of INSN.  */
+
+static enum dispatch_group
+get_mem_group (rtx insn)
+{
+  enum attr_memory memory;
+
+  if (INSN_CODE (insn) < 0)
+    return disp_no_group;
+  memory = get_attr_memory (insn);
+  if (memory == MEMORY_STORE)
+    return disp_store;
+
+  if (memory == MEMORY_LOAD)
+    return disp_load;
+
+  if (memory == MEMORY_BOTH)
+    return disp_load_store;
+
+  return disp_no_group;
+}
+
+/* Return true if INSN is a compare instruction.  */
+
+static bool
+is_cmp (rtx insn)
+{
+  enum attr_type type;
+  type = get_attr_type (insn);
+  return (type == TYPE_TEST || type == TYPE_ICMP || type == TYPE_FCMP);
+}
+
+/* Return true if a dispatch violation encountered.  */
+
+static bool
+dispatch_violation (void)
+{
+  if (dispatch_window_list->next)
+    return dispatch_window_list->next->violation;
+  return dispatch_window_list->violation;
+}
+
+/* Return true if INSN is a branch instruction.  */
+
+static bool
+is_branch (rtx insn)
+{
+  return (CALL_P (insn) || JUMP_P (insn));
+}
+
+/* Return true if INSN is a prefetch instruction.  */
+static bool
+is_prefetch (rtx insn)
+{
+  return ((strcmp (GET_RTX_NAME (GET_CODE (insn)), "prefetch_sse") == 0)
+	  || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
+		      "prefetch_sse_rex") == 0)
+	  || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
+		      "prefetch_3dnow") == 0)
+	  || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
+		      "prefetch_3dnow_rex") == 0));
+}
+
+/* This function initializes a dispatch window and the list container holding a
+   pointer to the window for WINDOW_NUM.  */
+
+static void
+init_window (int window_num)
+{
+  int i;
+  dispatch_windows *new_list;
+
+  if (window_num == 0)
+    new_list = dispatch_window_list;
+  else
+    new_list = dispatch_window_list1;
+
+  new_list->num_insn = 0;
+  new_list->num_uops = 0;
+  new_list->window_size = 0;
+  new_list->next = NULL;
+  new_list->prev = NULL;
+  new_list->window_num = window_num;
+  new_list->num_imm = 0;
+  new_list->num_imm_32 = 0;
+  new_list->num_imm_64 = 0;
+  new_list->imm_size = 0;
+  new_list->num_loads = 0;
+  new_list->num_stores = 0;
+  new_list->violation = false;
+
+  for (i = 0; i < MAX_INSN; i++)
+    {
+      new_list->window[i].insn = NULL;
+      new_list->window[i].group = disp_no_group;
+      new_list->window[i].path = no_path;
+      new_list->window[i].byte_len = 0;
+      new_list->window[i].imm_bytes = 0;
+    }
+  return;
+}
+
+/* This function allocates and initializes a dispatch window and the
+   list container holding a pointer to the window.  */
+
+static dispatch_windows *
+allocate_window (void)
+{
+  dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
+  new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
+
+  return new_list;
+}
+
+/* This routine initializes the dispatch scheduling information.  It
+   initiates building dispatch scheduler tables and constructs the
+   first dispatch window.  */
+
+static void
+init_dispatch_sched (void)
+{
+  /* Allocate a dispatch list and a window.  */
+  dispatch_window_list = allocate_window ();
+  dispatch_window_list1 = allocate_window ();
+  init_window (0);
+  init_window (1);
+}
+
+
+/* This function returns true if GROUP indicates a branch.  End of
+   a basic block does not have to be a branch, but here we assume only
+   branches end a window.  */
+
+static bool
+is_end_basic_block (enum dispatch_group group)
+{
+  return group == disp_branch;
+}
+
+/* This function is called when the end of a window processing is reached.  */
+
+static void
+process_end_window (void)
+{
+  gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
+  if (dispatch_window_list->next)
+    {
+      gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
+      gcc_assert (dispatch_window_list->window_size + dispatch_window_list1->window_size <= 48);
+      init_window (1);
+    }
+  init_window (0);
+}
+
+/* Allocates a new dispatch window and adds it to WINDOW_LIST.
+   WINDOW_NUM is either 0 or 1.  A maximum of two windows are generated
+   for 48 bytes of instructions.  Note that these windows are not dispatch
+   windows that their sizes are DISPATCH_WINDOW_SIZE.  */
+
+static dispatch_windows *
+allocate_next_window (int window_num)
+{
+  if (window_num == 0)
+    {
+      if (dispatch_window_list->next)
+	  init_window (1);
+      init_window (0);
+      return dispatch_window_list;
+    }
+
+  dispatch_window_list->next = dispatch_window_list1;
+  dispatch_window_list1->prev = dispatch_window_list;
+
+  return dispatch_window_list1;
+}
+
+/* Recursive function returning total sizes of immediate operands of an
+   instruction along with number of corresponding immediate-operands.
+   IN_RTX is the input instruction.  IMM is the total of immediates.
+   IMM32 is the number of 32 bit immediates.  IMM64 is the number of 64
+   bit immediates.  */
+
+static void
+find_con (const_rtx in_rtx, int *imm, int *imm32, int *imm64)
+{
+  int i = 0;
+  int j;
+  const char *format_ptr;
+  enum rtx_code code;
+
+  if (in_rtx == 0)
+    return;
+
+  code = GET_CODE (in_rtx);
+  if (code == CONST_INT)
+    {
+      (*imm)++;
+      (*imm32)++;
+    }
+  else if (code == CONST_DOUBLE)
+    {
+      (*imm)++;
+      (*imm64)++;
+    }
+  else if (GET_CODE (in_rtx) == CODE_LABEL)
+    {
+      if (LABEL_KIND (in_rtx) == LABEL_NORMAL)
+	{
+	  (*imm)++;
+	  (*imm32)++;
+	}
+    }
+
+  if (GET_CODE (in_rtx) == VAR_LOCATION)
+    {
+      find_con (PAT_VAR_LOCATION_LOC (in_rtx), imm, imm32, imm64);
+      i = GET_RTX_LENGTH (VAR_LOCATION);
+    }
+
+  if (GET_CODE (in_rtx) == CONST_DOUBLE && FLOAT_MODE_P (GET_MODE (in_rtx)))
+    i = 5;
+
+  format_ptr = GET_RTX_FORMAT (GET_CODE (in_rtx)) + i;
+  for (; i < GET_RTX_LENGTH (GET_CODE (in_rtx)); i++)
+    switch (*format_ptr++)
+      {
+      case 'e':
+	find_con (XEXP (in_rtx, i), imm, imm32, imm64);
+	break;
+
+      case 'E':
+      case 'V':
+	if (XVEC (in_rtx, i) != NULL)
+	    for (j = 0; j < XVECLEN (in_rtx, i); j++)
+	      find_con (XVECEXP (in_rtx, i, j), imm, imm32, imm64);
+	break;
+      }
+}
+
+/* Return total sizes of immediate operands of an instruction along with number
+   of corresponding immediate-operands.  It initializes its parameters to zero
+   befor calling FIND_CON.
+   INSN is the input instruction.  IMM is the total of immediates.
+   IMM32 is the number of 32 bit immediates.  IMM64 is the number of 64
+   bit immediates.  */
+
+static int
+get_num_imm (rtx insn, int *imm, int *imm32, int *imm64)
+{
+  *imm = 0;
+  *imm32 = 0;
+  *imm64 = 0;
+  find_con (insn, imm, imm32, imm64);
+  return *imm32 * 4 + *imm64 * 8;
+}
+
+/* This function indicates if an operand of INSN is an immediate.  */
+
+static bool
+has_imm (rtx insn)
+{
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (insn)
+    return get_num_imm (insn, &num_imm_operand, &num_imm32_operand,
+			&num_imm64_operand);
+  return false;
+}
+
+/* Get bytes length of INSN.
+   This function is very similar to the static function min_insn_size
+   in i386.c.  The main difference is the use of get_attr_length.  */
+
+static int
+get_insn_length (rtx insn)
+{
+  int len, l = 0;
+
+  if (!INSN_P (insn) || !active_insn_p (insn))
+    return 0;
+
+  len = get_attr_length (insn);
+  /* For normal instructions we rely on get_attr_length being exact,
+     with a few exceptions.  */
+  if (!JUMP_P (insn))
+    {
+      enum attr_type type = get_attr_type (insn);
+
+      switch (type)
+	{
+	case TYPE_MULTI:
+	  if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+	      || asm_noperands (PATTERN (insn)) >= 0)
+	    return 0;
+	  break;
+	case TYPE_OTHER:
+	case TYPE_FCMP:
+	  break;
+	default:
+	  /* Otherwise trust get_attr_length.  */
+	  return len;
+	}
+
+      l = get_attr_length_address (insn);
+      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
+	l = 4;
+    }
+  if (l)
+    return 1+l;
+  else
+    return 2;
+}
+
+
+/* Return single or double path for instructions.  */
+
+static enum insn_path
+get_insn_path (rtx insn)
+{
+  enum attr_amdfam10_decode path = dispatch_group_amdfam10_decode (insn);
+
+  if ((int)path == 0)
+    return path_single;
+
+  if ((int)path == 1)
+    return path_double;
+
+  return path_multi;
+}
+
+/* Return INSN dispatch group.  */
+
+static enum dispatch_group
+get_insn_group (rtx insn)
+{
+  enum dispatch_group group = get_mem_group (insn);
+  if (group)
+    return group;
+
+  if (is_branch (insn))
+    return disp_branch;
+
+  if (is_cmp (insn))
+    return disp_cmp;
+
+  if (has_imm (insn))
+    return disp_imm;
+
+  if (is_prefetch (insn))
+    return disp_prefetch;
+
+  return disp_no_group;
+}
+
+/* Count number of GROUP restricted INSN's in a dispatch
+   window WINDOW_LIST.  */
+
+static int
+count_num_restricted (rtx insn, dispatch_windows *window_list)
+{
+  enum dispatch_group group = get_insn_group (insn);
+  int imm_size;
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (group == disp_no_group)
+    return 0;
+
+  if (group == disp_imm)
+    {
+      imm_size = get_num_imm (insn, &num_imm_operand, &num_imm32_operand,
+			      &num_imm64_operand);
+      if (window_list->imm_size + imm_size > MAX_IMM_SIZE
+	  || num_imm_operand + window_list->num_imm > MAX_IMM
+	  || (num_imm32_operand > 0
+	      && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
+		  || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
+	  || (num_imm64_operand > 0
+	      && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
+		  || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
+	  || (window_list->imm_size + imm_size == MAX_IMM_SIZE
+	      && num_imm64_operand > 0
+	      && ((window_list->num_imm_64 > 0
+		   && window_list->num_insn >= 2)
+		  || window_list->num_insn >= 3)))
+	return BIG;
+
+      return 1;
+    }
+
+  if ((group == disp_load_store
+       && (window_list->num_loads >= MAX_LOAD
+	   || window_list->num_stores >= MAX_STORE))
+      || ((group == disp_load
+	   || group == disp_prefetch)
+	  && window_list->num_loads >= MAX_LOAD)
+      || (group == disp_store
+	  && window_list->num_stores >= MAX_STORE))
+    return BIG;
+
+  return 1;
+}
+
+/* This function returns true if INSN satisfies dispatch rules on the
+   last window scheduled.  */
+
+static bool
+fits_dispatch_window (rtx insn)
+{
+  dispatch_windows *window_list = dispatch_window_list;
+  dispatch_windows *window_list_next = dispatch_window_list->next;
+  int num_restrict;
+  enum dispatch_group group = get_insn_group (insn);
+  enum insn_path path = get_insn_path (insn);
+  int sum;
+
+  /* Make disp_cmp and disp_jcc get scheduled at the latest.  These
+     instructions should be given the lowest priority in the
+     scheduling process in Haifa scheduler to make sure they will be
+     scheduled in the same dispatch window as the refrence to them.  */
+  if (group == disp_jcc || group == disp_cmp)
+    return false;
+
+  /* Check nonrestricted.  */
+  if (group == disp_no_group || group == disp_branch)
+    return true;
+
+  /* Get last dispatch window.  */
+  if (window_list_next)
+    window_list = window_list_next;
+
+  if (window_list->window_num == 1)
+    {
+     sum = window_list->prev->window_size + window_list->window_size;
+      if (sum == 32
+	  || (get_insn_length (insn) + sum) >= 48)
+	/* Window 1 is full.  Go for next window.  */
+	return true;
+    }
+
+  num_restrict = count_num_restricted (insn, window_list);
+
+  if (num_restrict > num_allowable_groups[group])
+    return false;
+
+  /* See if it fits in the first window.  */
+  if (window_list->window_num == 0)
+    {
+      /* The first widow should have only single and double path
+	 uops.  */
+      if (path == path_double
+	  && (window_list->num_uops + 2) > MAX_INSN)
+	return false;
+      else if (path != path_single)
+        return false;
+    }
+  return true;
+}
+
+/* Add an instruction INSN with NUM_UOPS micro-operations to the
+   dispatch window WINDOW_LIST.  */
+
+static void
+add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
+{
+  int byte_len = get_insn_length (insn);
+  int num_insn = window_list->num_insn;
+  int imm_size;
+  sched_insn_info *window = window_list->window;
+  enum dispatch_group group = get_insn_group (insn);
+  enum insn_path path = get_insn_path (insn);
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (!window_list->violation && group != disp_cmp
+      && !fits_dispatch_window (insn))
+    window_list->violation = true;
+
+  imm_size = get_num_imm (insn, &num_imm_operand, &num_imm32_operand,
+			  &num_imm64_operand);
+    /* Initialize window with new instruction.  */
+  window[num_insn].insn = insn;
+  window[num_insn].byte_len = byte_len;
+  window[num_insn].group = group;
+  window[num_insn].path = path;
+  window[num_insn].imm_bytes = imm_size;
+
+  window_list->window_size += byte_len;
+  window_list->num_insn = num_insn + 1;
+  window_list->num_uops = window_list->num_uops + num_uops;
+  window_list->imm_size += imm_size;
+  window_list->num_imm += num_imm_operand;
+  window_list->num_imm_32 += num_imm32_operand;
+  window_list->num_imm_64 += num_imm64_operand;
+
+  if (group == disp_store)
+    window_list->num_stores += 1;
+  else if (group == disp_load
+	   || group == disp_prefetch)
+    window_list->num_loads += 1;
+  else if (group == disp_load_store)
+    {
+      window_list->num_stores += 1;
+      window_list->num_loads += 1;
+    }
+}
+
+/* Adds a scheduled instruction, INSN, to the current dispatch window.
+   If the total bytes of instructions or the number of instructions in
+   the window exceed allowable, it allocates a new window.  */
+
+static void
+add_to_dispatch_window (rtx insn)
+{
+  int byte_len;
+  dispatch_windows *window_list;
+  dispatch_windows *next_list;
+  dispatch_windows *window0_list;
+  enum insn_path path;
+  enum dispatch_group insn_group;
+  bool insn_fits;
+  int num_insn;
+  int num_uops;
+  int window_num;
+  int insn_num_uops;
+  int sum;
+
+  if (INSN_CODE (insn) < 0)
+    return;
+
+  byte_len = get_insn_length (insn);
+  window_list = dispatch_window_list;
+  next_list = window_list->next;
+  path = get_insn_path (insn);
+  insn_group = get_insn_group (insn);
+
+  /* Get the last dispatch window.  */
+  if (next_list)
+      window_list = dispatch_window_list->next;
+
+  if (path == path_single)
+    insn_num_uops = 1;
+  else if (path == path_double)
+    insn_num_uops = 2;
+  else
+    insn_num_uops = (int) path;
+
+  /* If current window is full, get a new window.
+     Window number zero is full, if MAX_INSN uops are scheduled in it.
+     Window number one is full, if window zero's bytes plus window
+     one's bytes is 32, or if the bytes of the new instruction added
+     to the total makes it greater than 48, or it has already MAX_INSN
+     instructions in it.  */
+  num_insn = window_list->num_insn;
+  num_uops = window_list->num_uops;
+  window_num = window_list->window_num;
+  insn_fits = fits_dispatch_window (insn);
+
+  if (num_insn >= MAX_INSN
+      || num_uops + insn_num_uops > MAX_INSN
+      || !(insn_fits))
+    {
+      window_num = ~window_num & 1;
+      window_list = allocate_next_window (window_num);
+    }
+
+  if (window_num == 0)
+    {
+      add_insn_window (insn, window_list, insn_num_uops);
+      if (window_list->num_insn >= MAX_INSN
+	  && insn_group == disp_branch)
+	{
+	  process_end_window ();
+	  return;
+	}
+    }
+  else if (window_num == 1)
+    {
+      window0_list = window_list->prev;
+      sum = window0_list->window_size + window_list->window_size;
+      if (sum == 32
+	  || (byte_len + sum) >= 48)
+	{
+	  process_end_window ();
+	  window_list = dispatch_window_list;
+	}
+
+      add_insn_window (insn, window_list, insn_num_uops);
+    }
+  else
+    gcc_unreachable ();
+
+  if (is_end_basic_block (insn_group))
+    {
+      /* End of basic block is reached do end-basic-block process.  */
+      process_end_window ();
+      return;
+    }
+}
+
+/* Print the dispatch window, WINDOW_NUM, to FILE.  */
+
+static void
+print_dispatch_window (FILE *file, int window_num)
+{
+  dispatch_windows *list;
+  int i;
+
+  if (window_num == 0)
+    list = dispatch_window_list;
+  else
+    list = dispatch_window_list1;
+
+  fprintf (file, "Window #%d:\n", list->window_num);
+  fprintf (file, "  num_insn = %d, num_uops = %d, window_size = %d\n",
+	  list->num_insn, list->num_uops, list->window_size);
+  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d,\
+ imm_size = %d\n",
+	   list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
+
+  fprintf (file, "  num_loads = %d, num_stores = %d\n", list->num_loads,
+	  list->num_stores);
+  fprintf (file, " insn info:\n");
+
+  for (i = 0; i < MAX_INSN; i++)
+    {
+      if (!list->window[i].insn)
+	break;
+      fprintf (file, "    group[%d] = %s, insn[%d] = %p, path[%d] = %d\
+ byte_len[%d] = %d, imm_bytes[%d] = %d\n",
+	      i, group_name[list->window[i].group],
+	      i, (void *)list->window[i].insn,
+	      i, list->window[i].path,
+	      i, list->window[i].byte_len,
+	      i, list->window[i].imm_bytes);
+    }
+}
+
+/* Print to stderr a dispatch window.  */
+
+DEBUG_FUNCTION void
+debug_print_dispatch_window (int window_num)
+{
+  print_dispatch_window (stderr, window_num);
+}
+
+/* Print INSN dispatch information to FILE.  */
+
+static void
+print_insn_dispatch_info (FILE *file, rtx insn)
+{
+  int byte_len;
+  enum insn_path path;
+  enum dispatch_group group;
+  int imm_size;
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (INSN_CODE (insn) < 0)
+    return;
+
+  byte_len = get_insn_length (insn);
+  path = get_insn_path (insn);
+  group = get_insn_group (insn);
+  imm_size = get_num_imm (insn, &num_imm_operand, &num_imm32_operand,
+			  &num_imm64_operand);
+
+  fprintf (file, " insn info:\n");
+  fprintf (file, "  group = %s, path = %d, byte_len = %d\n", group_name[group],
+	   path, byte_len);
+  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d,\
+ imm_size = %d\n",
+	   num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
+}
+
+/* Print INSN dispatch information.  */
+
+DEBUG_FUNCTION void
+debug_print_insn_dispatch_info (rtx insn)
+{
+  print_insn_dispatch_info (stderr, insn);
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"