diff mbox

RFC: LRA for x86/x86-64 [0/9]

Message ID CABu31nOzZXbhPM+coz_ZsYLhLAY6+ytxWySVN620gJsZ5G73Dg@mail.gmail.com
State New
Headers show

Commit Message

Steven Bosscher Oct. 2, 2012, 10:36 p.m. UTC
On Mon, Oct 1, 2012 at 1:11 AM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
> On Mon, Oct 1, 2012 at 12:44 AM, Vladimir Makarov <vmakarov@redhat.com> wrote:
>>   Actually, I don't see there is a problem with LRA right now.  I think we
>> should first to solve a whole compiler memory footprint problem for this
>> test because cpu utilization is very small for this test.  On my machine
>> with 8GB, the maximal resident space achieves almost 8GB.
>
> Sure. But note that up to IRA, the max. resident memory size of the
> test case is "only" 3.6 GB. IRA/reload allocate more than 4GB,
> doubling the foot print. If you want to solve that first, that'd be
> great of course...

BTW, I get these numbers from a hack I've made in passes.c to trace
the resident memory size and the size of the resident bitmap obstacks
when I was working on reducing the memory foot print of the test case
a couple of months ago. It's obviously not something I'd propose for
including in the trunk, but I've found this hack to be quite helpful
to identify where all the memory goes.

The output looks like this (for the LRA branch on PR54146):

...
current pass =                          mode_sw (193)   362020
3625951232   3581693952      9289728     17849088        16256
current pass =                          asmcons (194)   362020
3625951232   3581693952      9289728     17849088        16256
current pass =                              ira (197)   362020
3625951232   3581693952      9289728     17849088        16256
current pass =                           reload (198)   362020
6812741632   6732029952      9289728     17849088       105664
...

Note the big jump in the 2nd and 3rd number from ira to reload. That's
a big jump in memory foot print between the start of the IRA pass and
the start of the reload pass, the memory foot print almost doubles.

BTW In the same output I'm now including the live range compression
results from LRA. For this test case:
Compressing live ranges: from 1742579 to 554532 - 31%
Compressing live ranges: from 1742569 to 73069 - 4%
LRA_iter_stats:220333;1335056;457327;2;3
(1st number is # of basic blocks, 2nd is max_uid, 3rd is max_reg_num,
4rd and 5th are iteration counts on the main outer and inner loops of
LRA). So LRA isn't really iterating much on this test case.

Ciao!
Steven

   lra_eliminate (true);
   lra_hard_reg_substitution ();
diff mbox

Patch

Index: passes.c
===================================================================
--- passes.c    (revision 191858)
+++ passes.c    (working copy)
@@ -79,15 +79,69 @@  struct opt_pass *current_pass;

 static void register_pass_name (struct opt_pass *, const char *);

+typedef struct
+{
+  unsigned long size,resident,share,text,lib,data,dt;
+} statm_t;
+
+static void
+read_off_memory_status (statm_t &result)
+{
+  const char* statm_path = "/proc/self/statm";
+
+  FILE *f = fopen(statm_path,"r");
+  if (!f)
+    {
+      perror (statm_path);
+      gcc_unreachable ();
+    }
+  if (7 != fscanf (f, "%lu %lu %lu %lu %lu %lu %lu",
+                  &result.size, &result.resident, &result.share,
+                  &result.text, &result.lib, &result.data,
+                  &result.dt))
+    {
+      perror (statm_path);
+      gcc_unreachable ();
+    }
+  fclose(f);
+}
+
 /* Call from anywhere to find out what pass this is.  Useful for
    printing out debugging information deep inside an service
    routine.  */
+
+#include "bitmap.h"
+#include "regset.h"
+
+static size_t // NB difference from obstack_memory_used
+obstack_memory_used2 (struct obstack *h)
+{
+  struct _obstack_chunk* lp;
+  size_t nbytes = 0;
+
+  for (lp = h->chunk; lp != 0; lp = lp->prev)
+    {
+      nbytes += (size_t) (lp->limit - (char *) lp);
+    }
+  return nbytes;
+}
+
 void
 print_current_pass (FILE *file)
 {
   if (current_pass)
-    fprintf (file, "current pass = %s (%d)\n",
-            current_pass->name, current_pass->static_pass_number);
+    {
+      statm_t statm;
+      int pagesize = getpagesize ();
+      unsigned bos = obstack_memory_used2 (&bitmap_default_obstack.obstack);
+      unsigned ros = obstack_memory_used2 (&reg_obstack.obstack);
+      read_off_memory_status (statm);
+      fprintf (file, "current pass = %32s (%3d) %8d %12lu %12lu %12lu
%12u %12u\n",
+              current_pass->name, current_pass->static_pass_number,
+              max_reg_num (),
+              statm.size * pagesize, statm.resident * pagesize,
+              statm.share * pagesize, bos, ros);
+    }
   else
     fprintf (file, "no current pass.\n");
 }
@@ -2113,7 +2167,7 @@  execute_one_pass (struct opt_pass *pass)
       current_pass = NULL;
       return false;
     }
-
+print_current_pass (stderr);
   /* Pass execution event trigger: useful to identify passes being
      executed.  */
   invoke_plugin_callbacks (PLUGIN_PASS_EXECUTION, pass);
Index: lra.c
===================================================================
--- lra.c       (revision 191858)
+++ lra.c       (working copy)
@@ -2249,10 +2243,13 @@  lra (FILE *f)
   bitmap_initialize (&lra_split_pseudos, &reg_obstack);
   bitmap_initialize (&lra_optional_reload_pseudos, &reg_obstack);
   live_p = false;
+  int _inner_loop = 0, _outer_loop = 0;
   for (;;)
     {
+      _outer_loop++;
       for (;;)
        {
+         _inner_loop++;
          bitmap_clear (&lra_optional_reload_pseudos);
          /* We should try to assign hard registers to scratches even
             if there were no RTL transformations in
@@ -2271,6 +2268,7 @@  lra (FILE *f)
             to use a constant pool.  */
          lra_eliminate (false);
          lra_inheritance ();
+
          /* We need live ranges for lra_assign -- so build them.  */
          lra_create_live_ranges (true);
          live_p = true;
@@ -2304,6 +2302,7 @@  lra (FILE *f)
       bitmap_clear (&lra_matched_pseudos);
       lra_constraint_iter_after_spill = 0;
     }
+  fprintf (stderr, "\nLRA_iter_stats:%u;%u;%u;%u;%u\n",
n_basic_blocks, get_max_uid (), max_reg_num (), _outer_loop,
_inner_loop);
   restore_scratches ();