diff mbox

Allocate all target globals using GC for SWITCHABLE_TARGETs

Message ID 20140108183415.GK892@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Jan. 8, 2014, 6:34 p.m. UTC
On Wed, Jan 08, 2014 at 01:45:40PM +0100, Jakub Jelinek wrote:
> I'd like to get rid of all the XCNEW calls in target-globals.c as a
> follow-up.

Here it is.  The rationale is both to avoid many separate heap allocations
and if TARGET_OPTION_NODE is no longer needed (all FUNCTION_DECLs
referencing it are e.g. optimized away, say static unused functions)
to avoid leaking memory.

Bootstrapped/regtested on x86_64-linux and i686-linux (together
with the i386 SWITCHABLE_TARGET patch).

Though, looking at the sizes, i686-linux allocates 0x67928
bytes which I think with ggc-page.c we allocate 0.5MB for it (acceptable),
on x86_64-linux the allocation size is 0x83aa8 and thus only ~ 15KB over
to fit into 0.5MB, thus I think we allocate 1MB.
So, if we wanted to tune for x86_64, we could not allocate say
target_flag_state (size 0x5008) in the big chunk, but instead make
it GTY((atomic)) and allocate separately.

Or perhaps do that for other very large structs?  In any case, that doesn't
look like something that probably would need to be retuned for every
release.

The current sizes of the structs are:
struct target_globals		0x80		0x40
struct target_flag_state	0x20		0x20
struct target_regs		0x5008		0x5008
struct target_hard_regs		0x35c8		0x33f8
struct target_reload		0xef70		0xef70
struct target_expmed		0x180b0		0xf4b0
struct target_optabs		0x4f0		0x4b9
struct target_cfgloop		0x1c		0x1c
struct target_ira		0x9628		0x9620
struct target_ira_int		0x3fca8		0x322e4
struct target_lra_int		0xa718		0x4e70
struct target_builtins		0x268		0x268
struct target_gcse		0x62		0x62
struct target_bb_reorder	0x4		0x4
struct target_lower_subreg	0x24c		0x18c

Perhaps use cut-off of 4KB with current sizes, anything below that
would be allocated in the single block, anything above it separately.
So 7 structs allocated together, 7 separately.

2014-01-08  Jakub Jelinek  <jakub@redhat.com>

	* target-globals.c (save_target_globals): Allocate most of the
	structs using GC in payload of target_globals struct instead
	of allocating them on the heap.



	Jakub

Comments

Richard Sandiford Jan. 8, 2014, 7:41 p.m. UTC | #1
Jakub Jelinek <jakub@redhat.com> writes:
> 2014-01-08  Jakub Jelinek  <jakub@redhat.com>
>
> 	* target-globals.c (save_target_globals): Allocate most of the
> 	structs using GC in payload of target_globals struct instead
> 	of allocating them on the heap.

Looks good to me FWIW.  I don't know either way about the one-big-blob thing.

Note that we'll still leak memory when deleting TARGET_OPTION_NODEs
because target_ira_int and target_lra_int have pointers to heap-allocated
storage.

Thanks,
Richard
Jakub Jelinek Jan. 8, 2014, 7:54 p.m. UTC | #2
On Wed, Jan 08, 2014 at 07:41:26PM +0000, Richard Sandiford wrote:
> Jakub Jelinek <jakub@redhat.com> writes:
> > 2014-01-08  Jakub Jelinek  <jakub@redhat.com>
> >
> > 	* target-globals.c (save_target_globals): Allocate most of the
> > 	structs using GC in payload of target_globals struct instead
> > 	of allocating them on the heap.
> 
> Looks good to me FWIW.  I don't know either way about the one-big-blob thing.
> 
> Note that we'll still leak memory when deleting TARGET_OPTION_NODEs
> because target_ira_int and target_lra_int have pointers to heap-allocated
> storage.

Yeah, perhaps that is something to fix incrementally.

But, at least we will not leak ~ 0.5MB per (unique) target attribute
used on some unused function.

	Jakub
Richard Henderson Jan. 9, 2014, 4:25 p.m. UTC | #3
On 01/08/2014 10:34 AM, Jakub Jelinek wrote:
>    struct target_globals *g;
> -
> -  g = ggc_alloc_target_globals ();
> -  g->flag_state = XCNEW (struct target_flag_state);
> -  g->regs = XCNEW (struct target_regs);
> +  struct target_globals_extra {
> +    struct target_globals g;
> +    struct target_flag_state flag_state;
> +    struct target_regs regs;
> +    struct target_hard_regs hard_regs;
> +    struct target_reload reload;
> +    struct target_expmed expmed;
> +    struct target_optabs optabs;
> +    struct target_cfgloop cfgloop;
> +    struct target_ira ira;
> +    struct target_ira_int ira_int;
> +    struct target_lra_int lra_int;
> +    struct target_builtins builtins;
> +    struct target_gcse gcse;
> +    struct target_bb_reorder bb_reorder;
> +    struct target_lower_subreg lower_subreg;
> +  } *p;
> +  p = (struct target_globals_extra *)
> +      ggc_internal_cleared_alloc_stat (sizeof (struct target_globals_extra)
> +				       PASS_MEM_STAT);
> +  g = (struct target_globals *) p;
> +  g->flag_state = &p->flag_state;
> +  g->regs = &p->regs;
>    g->rtl = ggc_alloc_cleared_target_rtl ();

So, we're relying on something pointing to G, thus keeping the whole P alive?
I suppose that works but it's fairly ugly that's for sure.

As for the extra ~500k wasted on x86_64, we can either fix our gc allocator to
do something sensible with these high-order allocations, or we can do nearly
this same trick only with libc.  I.e.

  struct target_globals_extra {
    struct target_flag_state flag_state;
    struct target_regs regs;
    struct target_hard_regs hard_regs;
    struct target_reload reload;
    struct target_expmed expmed;
    struct target_optabs optabs;
    struct target_cfgloop cfgloop;
    struct target_ira ira;
    struct target_ira_int ira_int;
    struct target_lra_int lra_int;
    struct target_builtins builtins;
    struct target_gcse gcse;
    struct target_bb_reorder bb_reorder;
    struct target_lower_subreg lower_subreg;
  } *p;

  g = ggc_alloc_target_globals ();
  p = XCNEW (target_globals_extra);
  ...


r~
Jakub Jelinek Jan. 9, 2014, 4:35 p.m. UTC | #4
On Thu, Jan 09, 2014 at 08:25:31AM -0800, Richard Henderson wrote:
> > +  p = (struct target_globals_extra *)
> > +      ggc_internal_cleared_alloc_stat (sizeof (struct target_globals_extra)
> > +				       PASS_MEM_STAT);
> > +  g = (struct target_globals *) p;
> > +  g->flag_state = &p->flag_state;
> > +  g->regs = &p->regs;
> >    g->rtl = ggc_alloc_cleared_target_rtl ();
> 
> So, we're relying on something pointing to G, thus keeping the whole P alive?
> I suppose that works but it's fairly ugly that's for sure.

The separate structures aren't really installed individually, they are
always installed together through restore_target_globals.  As long as the
any FUNCTION_DECL with such TARGET_OPTION_NODE exists, it will be reachable.

The reason why it needs to be GC is:
1) in two of these target_* structures there are embedded rtxes etc. the GC
needs to see
2) if all FUNCTION_DECL with such combination of target attributes are GCed,
we'd leak memory

> As for the extra ~500k wasted on x86_64, we can either fix our gc allocator to
> do something sensible with these high-order allocations, or we can do nearly
> this same trick only with libc.  I.e.
> 
>   struct target_globals_extra {
>     struct target_flag_state flag_state;
>     struct target_regs regs;
>     struct target_hard_regs hard_regs;
>     struct target_reload reload;
>     struct target_expmed expmed;
>     struct target_optabs optabs;
>     struct target_cfgloop cfgloop;
>     struct target_ira ira;
>     struct target_ira_int ira_int;
>     struct target_lra_int lra_int;
>     struct target_builtins builtins;
>     struct target_gcse gcse;
>     struct target_bb_reorder bb_reorder;
>     struct target_lower_subreg lower_subreg;
>   } *p;
> 
>   g = ggc_alloc_target_globals ();
>   p = XCNEW (target_globals_extra);
>   ...

That would be fine for 1), but would mean 2).  It is also fine to GC
allocate each structure individually, but some (like bb_reorder) are say
just 4 bytes long, so it might be overkill.

As noted by Richard S., IRA/LRA still puts pointers to heap allocated
objects into some of the structures, so there is some leak anyway, but not
as big.

	Jakub
Richard Henderson Jan. 9, 2014, 4:42 p.m. UTC | #5
On 01/09/2014 08:35 AM, Jakub Jelinek wrote:
> That would be fine for 1), but would mean 2).  It is also fine to GC
> allocate each structure individually, but some (like bb_reorder) are say
> just 4 bytes long, so it might be overkill.


Hmm..  Perhaps define the whole structure as you do, but somewhere global
enough that ggc-page.c can see it, and add to the extra_order_size_table?
I don't know how much memory wastage there would be there, but I can't imagine
it's as much as 0.5MB.


r~
diff mbox

Patch

--- gcc/target-globals.c.jj	2014-01-08 10:23:22.000000000 +0100
+++ gcc/target-globals.c	2014-01-08 14:00:13.183231122 +0100
@@ -68,24 +68,43 @@  struct target_globals *
 save_target_globals (void)
 {
   struct target_globals *g;
-
-  g = ggc_alloc_target_globals ();
-  g->flag_state = XCNEW (struct target_flag_state);
-  g->regs = XCNEW (struct target_regs);
+  struct target_globals_extra {
+    struct target_globals g;
+    struct target_flag_state flag_state;
+    struct target_regs regs;
+    struct target_hard_regs hard_regs;
+    struct target_reload reload;
+    struct target_expmed expmed;
+    struct target_optabs optabs;
+    struct target_cfgloop cfgloop;
+    struct target_ira ira;
+    struct target_ira_int ira_int;
+    struct target_lra_int lra_int;
+    struct target_builtins builtins;
+    struct target_gcse gcse;
+    struct target_bb_reorder bb_reorder;
+    struct target_lower_subreg lower_subreg;
+  } *p;
+  p = (struct target_globals_extra *)
+      ggc_internal_cleared_alloc_stat (sizeof (struct target_globals_extra)
+				       PASS_MEM_STAT);
+  g = (struct target_globals *) p;
+  g->flag_state = &p->flag_state;
+  g->regs = &p->regs;
   g->rtl = ggc_alloc_cleared_target_rtl ();
-  g->hard_regs = XCNEW (struct target_hard_regs);
-  g->reload = XCNEW (struct target_reload);
-  g->expmed = XCNEW (struct target_expmed);
-  g->optabs = XCNEW (struct target_optabs);
+  g->hard_regs = &p->hard_regs;
+  g->reload = &p->reload;
+  g->expmed = &p->expmed;
+  g->optabs = &p->optabs;
   g->libfuncs = ggc_alloc_cleared_target_libfuncs ();
-  g->cfgloop = XCNEW (struct target_cfgloop);
-  g->ira = XCNEW (struct target_ira);
-  g->ira_int = XCNEW (struct target_ira_int);
-  g->lra_int = XCNEW (struct target_lra_int);
-  g->builtins = XCNEW (struct target_builtins);
-  g->gcse = XCNEW (struct target_gcse);
-  g->bb_reorder = XCNEW (struct target_bb_reorder);
-  g->lower_subreg = XCNEW (struct target_lower_subreg);
+  g->cfgloop = &p->cfgloop;
+  g->ira = &p->ira;
+  g->ira_int = &p->ira_int;
+  g->lra_int = &p->lra_int;
+  g->builtins = &p->builtins;
+  g->gcse = &p->gcse;
+  g->bb_reorder = &p->bb_reorder;
+  g->lower_subreg = &p->lower_subreg;
   restore_target_globals (g);
   init_reg_sets ();
   target_reinit ();