diff mbox

Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

Message ID CA+4CFy61Z69kcA=je6+-H=vWcdKurqi4VafnZ+RpiV9b-cMRug@mail.gmail.com
State New
Headers show

Commit Message

Wei Mi Nov. 27, 2013, 10:31 p.m. UTC
>> Hmm, maybe attack from the other direction? -- could we clear SCHED_GROUP_P
>> for each insn at the start of this loop in sched_analyze?
>>
>> It's not as clean in the sense that SCHED_GROUP_P "escapes" the scheduler,
>> but it might be an option.
>>
>>    for (insn = head;; insn = NEXT_INSN (insn))
>>     {
>>
>>       if (INSN_P (insn))
>>         {
>>           /* And initialize deps_lists.  */
>>           sd_init_insn (insn);
>>         }
>>
>>       deps_analyze_insn (deps, insn);
>>
>>       if (insn == tail)
>>         {
>>           if (sched_deps_info->use_cselib)
>>             cselib_finish ();
>>           return;
>>         }
>>     }
>> Jeff
>>>
>>>
>>
>
> Thanks for the suggestion. It looks workable. Then I need to move the
> SCHED_GROUP_P setting for macrofusion from sched_init to a place
> inside sched_analyze after the SCHED_GROUP_P cleanup. It will be more
> consistent with the settings for cc0 setter-user group and call group,
> which are both inside sched_analyze.
> I am trying this method...
>
> Thanks,
> Wei.

Here is the patch. The patch does the SCHED_GROUP_P cleanup in
sched_analyze before deps_analyze_insn set SCHED_GROUP_P and chain the
insn with prev insns. And it move try_group_insn for macrofusion from
sched_init to sched_analyze_insn.

bootstrap and regression pass on x86_64-linux-gnu. Is it ok?

Thanks,
Wei.

2013-11-27  Wei Mi  <wmi@google.com>

        PR rtl-optimization/59020
        * sched-deps.c (try_group_insn): Move it from haifa-sched.c to here.
        (sched_analyze_insn): Call try_group_insn.
        (sched_analyze): Cleanup SCHED_GROUP_P before start the analysis.
        * haifa-sched.c (try_group_insn): Moved to sched-deps.c.
        (group_insns_for_macro_fusion): Removed.
        (sched_init): Remove calling group_insns_for_macro_fusion.

2013-11-27  Wei Mi  <wmi@google.com>

        PR rtl-optimization/59020
        * testsuite/gcc.dg/pr59020.c: New.
        * testsuite/gcc.dg/macro-fusion-1.c: New.
        * testsuite/gcc.dg/macro-fusion-2.c: New.

Comments

Jeff Law Dec. 3, 2013, 4:53 a.m. UTC | #1
On 11/27/13 15:31, Wei Mi wrote:
>>> Hmm, maybe attack from the other direction? -- could we clear SCHED_GROUP_P
>>> for each insn at the start of this loop in sched_analyze?
>>>
>>> It's not as clean in the sense that SCHED_GROUP_P "escapes" the scheduler,
>>> but it might be an option.
>>>
>>>     for (insn = head;; insn = NEXT_INSN (insn))
>>>      {
>>>
>>>        if (INSN_P (insn))
>>>          {
>>>            /* And initialize deps_lists.  */
>>>            sd_init_insn (insn);
>>>          }
>>>
>>>        deps_analyze_insn (deps, insn);
>>>
>>>        if (insn == tail)
>>>          {
>>>            if (sched_deps_info->use_cselib)
>>>              cselib_finish ();
>>>            return;
>>>          }
>>>      }
>>> Jeff
>>>>
>>>>
>>>
>>
>> Thanks for the suggestion. It looks workable. Then I need to move the
>> SCHED_GROUP_P setting for macrofusion from sched_init to a place
>> inside sched_analyze after the SCHED_GROUP_P cleanup. It will be more
>> consistent with the settings for cc0 setter-user group and call group,
>> which are both inside sched_analyze.
>> I am trying this method...
>>
>> Thanks,
>> Wei.
>
> Here is the patch. The patch does the SCHED_GROUP_P cleanup in
> sched_analyze before deps_analyze_insn set SCHED_GROUP_P and chain the
> insn with prev insns. And it move try_group_insn for macrofusion from
> sched_init to sched_analyze_insn.
>
> bootstrap and regression pass on x86_64-linux-gnu. Is it ok?
>
> Thanks,
> Wei.
>
> 2013-11-27  Wei Mi  <wmi@google.com>
>
>          PR rtl-optimization/59020
>          * sched-deps.c (try_group_insn): Move it from haifa-sched.c to here.
>          (sched_analyze_insn): Call try_group_insn.
>          (sched_analyze): Cleanup SCHED_GROUP_P before start the analysis.
>          * haifa-sched.c (try_group_insn): Moved to sched-deps.c.
>          (group_insns_for_macro_fusion): Removed.
>          (sched_init): Remove calling group_insns_for_macro_fusion.
>
> 2013-11-27  Wei Mi  <wmi@google.com>
>
>          PR rtl-optimization/59020
>          * testsuite/gcc.dg/pr59020.c: New.
>          * testsuite/gcc.dg/macro-fusion-1.c: New.
>          * testsuite/gcc.dg/macro-fusion-2.c: New.
This is fine.  Thanks for your patience,

Jeff
diff mbox

Patch

Index: sched-deps.c
===================================================================
--- sched-deps.c        (revision 204923)
+++ sched-deps.c        (working copy)
@@ -2820,6 +2820,37 @@  sched_analyze_2 (struct deps_desc *deps,
     sched_deps_info->finish_rhs ();
 }

+/* Try to group comparison and the following conditional jump INSN if
+   they're already adjacent. This is to prevent scheduler from scheduling
+   them apart.  */
+
+static void
+try_group_insn (rtx insn)
+{
+  unsigned int condreg1, condreg2;
+  rtx cc_reg_1;
+  rtx prev;
+
+  if (!any_condjump_p (insn))
+    return;
+
+  targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+  cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+  prev = prev_nonnote_nondebug_insn (insn);
+  if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
+      || !prev
+      || !modified_in_p (cc_reg_1, prev))
+    return;
+
+  /* Different microarchitectures support macro fusions for different
+     combinations of insn pairs.  */
+  if (!targetm.sched.macro_fusion_pair_p
+      || !targetm.sched.macro_fusion_pair_p (prev, insn))
+    return;
+
+  SCHED_GROUP_P (insn) = 1;
+}
+
 /* Analyze an INSN with pattern X to find all dependencies.  */
 static void
 sched_analyze_insn (struct deps_desc *deps, rtx x, rtx insn)
@@ -2843,6 +2874,11 @@  sched_analyze_insn (struct deps_desc *de
   can_start_lhs_rhs_p = (NONJUMP_INSN_P (insn)
                         && code == SET);

+  /* Group compare and branch insns for macro-fusion.  */
+  if (targetm.sched.macro_fusion_p
+      && targetm.sched.macro_fusion_p ())
+    try_group_insn (insn);
+
   if (may_trap_p (x))
     /* Avoid moving trapping instructions across function calls that might
        not always return.  */
@@ -3733,6 +3769,10 @@  sched_analyze (struct deps_desc *deps, r
        {
          /* And initialize deps_lists.  */
          sd_init_insn (insn);
+         /* Clean up SCHED_GROUP_P which may have been set by last
+            scheduler pass.  */
+         if (SCHED_GROUP_P (insn))
+           SCHED_GROUP_P (insn) = 0;
        }

       deps_analyze_insn (deps, insn);
Index: haifa-sched.c
===================================================================
--- haifa-sched.c       (revision 204923)
+++ haifa-sched.c       (working copy)
@@ -6554,50 +6554,6 @@  setup_sched_dump (void)
                ? stderr : dump_file);
 }

-/* Try to group comparison and the following conditional jump INSN if
-   they're already adjacent. This is to prevent scheduler from scheduling
-   them apart.  */
-
-static void
-try_group_insn (rtx insn)
-{
-  unsigned int condreg1, condreg2;
-  rtx cc_reg_1;
-  rtx prev;
-
-  if (!any_condjump_p (insn))
-    return;
-
-  targetm.fixed_condition_code_regs (&condreg1, &condreg2);
-  cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
-  prev = prev_nonnote_nondebug_insn (insn);
-  if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
-      || !prev
-      || !modified_in_p (cc_reg_1, prev))
-    return;
-
-  /* Different microarchitectures support macro fusions for different
-     combinations of insn pairs.  */
-  if (!targetm.sched.macro_fusion_pair_p
-      || !targetm.sched.macro_fusion_pair_p (prev, insn))
-    return;
-
-  SCHED_GROUP_P (insn) = 1;
-}
-
-/* If the last cond jump and the cond register defining insn are consecutive
-   before scheduling, we want them to be in a schedule group. This is good
-   for performance on microarchitectures supporting macro-fusion.  */
-
-static void
-group_insns_for_macro_fusion ()
-{
-  basic_block bb;
-
-  FOR_EACH_BB (bb)
-    try_group_insn (BB_END (bb));
-}
-
 /* Initialize some global state for the scheduler.  This function works
    with the common data shared between all the schedulers.  It is called
    from the scheduler specific initialization routine.  */
@@ -6726,11 +6682,6 @@  sched_init (void)
     }

   curr_state = xmalloc (dfa_state_size);
-
-  /* Group compare and branch insns for macro-fusion.  */
-  if (targetm.sched.macro_fusion_p
-      && targetm.sched.macro_fusion_p ())
-    group_insns_for_macro_fusion ();
 }

 static void haifa_init_only_bb (basic_block, basic_block);
Index: testsuite/gcc.dg/macro-fusion-1.c
===================================================================
--- testsuite/gcc.dg/macro-fusion-1.c   (revision 0)
+++ testsuite/gcc.dg/macro-fusion-1.c   (revision 0)
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mtune=corei7 -fdump-rtl-sched2" } */
+/* { dg-final { scan-rtl-dump-not
"compare.*insn.*jump_insn.*jump_insn" "sched2" } } */
+
+int a[100];
+
+double bar (double sum)
+{
+  int i;
+  for (i = 0; i < 1000000; i++)
+   sum += (0.5 + (a[i%100] - 128));
+  return sum;
+}
Index: testsuite/gcc.dg/macro-fusion-2.c
===================================================================
--- testsuite/gcc.dg/macro-fusion-2.c   (revision 0)
+++ testsuite/gcc.dg/macro-fusion-2.c   (revision 0)
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mtune=corei7-avx -fdump-rtl-sched2" } */
+/* { dg-final { scan-rtl-dump-not
"compare.*insn.*jump_insn.*jump_insn" "sched2" } } */
+
+int a[100];
+
+double bar (double sum)
+{
+  int i = 100000;
+  while (i != 0)
+    {
+      sum += (0.5 + (a[i%100] - 128));
+      i--;
+    }
+  return sum;
+}
Index: testsuite/gcc.dg/pr59020.c
===================================================================
--- testsuite/gcc.dg/pr59020.c  (revision 0)
+++ testsuite/gcc.dg/pr59020.c  (revision 0)
@@ -0,0 +1,15 @@ 
+/* PR rtl-optimization/59020 */
+
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fmodulo-sched -fno-inline -march=corei7" } */
+
+int a, b, d;
+unsigned c;
+
+void f()
+{
+  unsigned q;
+  for(; a; a++)
+    if(((c %= d && 1) ? : 1) & 1)
+      for(; b; q++);
+}