diff mbox

[LTO] Reduce WPA memory usage

Message ID alpine.LSU.2.11.1404031300470.31108@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener April 3, 2014, 11:07 a.m. UTC
This reduces WPA memory usage at stream-out time by avoiding to
allocate the streamer cache node array and by freeing the global
out-decl-states hash tables (we do that already for the fn-decl-states).

LTO bootstrapped and bootstrapped on x86_64-unknown-linux-gnu, testing
in progress.

Ok?

Not sure if it will make a notable difference.  The pointer-map
overhead is at least 4 times of that of the vector, if we make
pointer-map behave like hash_table (3/4 full) or htab_t
(half full) then that would improve (we could even make that
configurable at pointer-set/map construction time).  Like with

       const void **old_keys = slots;

might be worth checking how much memory we save from the above.

Thanks,
Richard.

2014-04-03  Richard Biener  <rguenther@suse.de>

	* tree-streamer.h (struct streamer_tree_cache_d): Add next_idx
	member.
	(streamer_tree_cache_create): Adjust.
	* tree-streamer.c (streamer_tree_cache_add_to_node_array): Adjust
	to allow optional nodes array.
	(streamer_tree_cache_insert_1): Use next_idx to assign idx.
	(streamer_tree_cache_append): Likewise.
	(streamer_tree_cache_create): Create nodes array optionally
	as specified by parameter.
	* lto-streamer-out.c (create_output_block): Avoid maintaining
	the node array in the writer cache.
	(DFS_write_tree): Remove assertion.
	(produce_asm_for_decls): Free the out decl state hash table
	early.
	* lto-streamer-in.c (lto_data_in_create): Adjust for
	streamer_tree_cache_create prototype change.

Index: gcc/tree-streamer.c
===================================================================
*** gcc/tree-streamer.c	(revision 209018)
--- gcc/tree-streamer.c	(working copy)
*************** static void
*** 101,120 ****
  streamer_tree_cache_add_to_node_array (struct streamer_tree_cache_d *cache,
  				       unsigned ix, tree t, hashval_t hash)
  {
!   /* Make sure we're either replacing an old element or
!      appending consecutively.  */
!   gcc_assert (ix <= cache->nodes.length ());
! 
!   if (ix == cache->nodes.length ())
      {
!       cache->nodes.safe_push (t);
!       if (cache->hashes.exists ())
! 	cache->hashes.safe_push (hash);
      }
!   else
      {
!       cache->nodes[ix] = t;
!       if (cache->hashes.exists ())
  	cache->hashes[ix] = hash;
      }
  }
--- 101,119 ----
  streamer_tree_cache_add_to_node_array (struct streamer_tree_cache_d *cache,
  				       unsigned ix, tree t, hashval_t hash)
  {
!   /* We're either replacing an old element or appending consecutively.  */
!   if (cache->nodes.exists ())
      {
!       if (cache->nodes.length () == ix)
! 	cache->nodes.safe_push (t);
!       else
! 	cache->nodes[ix] = t;
      }
!   if (cache->hashes.exists ())
      {
!       if (cache->hashes.length () == ix)
! 	cache->hashes.safe_push (hash);
!       else
  	cache->hashes[ix] = hash;
      }
  }
*************** streamer_tree_cache_insert_1 (struct str
*** 146,152 ****
      {
        /* Determine the next slot to use in the cache.  */
        if (insert_at_next_slot_p)
! 	ix = cache->nodes.length ();
        else
  	ix = *ix_p;
         *slot = ix;
--- 145,151 ----
      {
        /* Determine the next slot to use in the cache.  */
        if (insert_at_next_slot_p)
! 	ix = cache->next_idx++;
        else
  	ix = *ix_p;
         *slot = ix;
*************** void
*** 211,217 ****
  streamer_tree_cache_append (struct streamer_tree_cache_d *cache,
  			    tree t, hashval_t hash)
  {
!   unsigned ix = cache->nodes.length ();
    if (!cache->node_map)
      streamer_tree_cache_add_to_node_array (cache, ix, t, hash);
    else
--- 210,216 ----
  streamer_tree_cache_append (struct streamer_tree_cache_d *cache,
  			    tree t, hashval_t hash)
  {
!   unsigned ix = cache->next_idx++;
    if (!cache->node_map)
      streamer_tree_cache_add_to_node_array (cache, ix, t, hash);
    else
*************** preload_common_nodes (struct streamer_tr
*** 326,332 ****
  /* Create a cache of pickled nodes.  */
  
  struct streamer_tree_cache_d *
! streamer_tree_cache_create (bool with_hashes, bool with_map)
  {
    struct streamer_tree_cache_d *cache;
  
--- 325,331 ----
  /* Create a cache of pickled nodes.  */
  
  struct streamer_tree_cache_d *
! streamer_tree_cache_create (bool with_hashes, bool with_map, bool with_vec)
  {
    struct streamer_tree_cache_d *cache;
  
*************** streamer_tree_cache_create (bool with_ha
*** 334,340 ****
  
    if (with_map)
      cache->node_map = new pointer_map<unsigned>;
!   cache->nodes.create (165);
    if (with_hashes)
      cache->hashes.create (165);
  
--- 333,341 ----
  
    if (with_map)
      cache->node_map = new pointer_map<unsigned>;
!   cache->next_idx = 0;
!   if (with_vec)
!     cache->nodes.create (165);
    if (with_hashes)
      cache->hashes.create (165);
  
Index: gcc/tree-streamer.h
===================================================================
*** gcc/tree-streamer.h	(revision 209018)
--- gcc/tree-streamer.h	(working copy)
*************** struct streamer_tree_cache_d
*** 52,57 ****
--- 52,60 ----
    vec<tree> nodes;
    /* The node hashes (if available).  */
    vec<hashval_t> hashes;
+ 
+   /* Next index to assign.  */
+   unsigned next_idx;
  };
  
  /* Return true if tree node EXPR should be streamed as a builtin.  For
*************** void streamer_tree_cache_append (struct
*** 97,103 ****
  				 hashval_t);
  bool streamer_tree_cache_lookup (struct streamer_tree_cache_d *, tree,
  				 unsigned *);
! struct streamer_tree_cache_d *streamer_tree_cache_create (bool, bool);
  void streamer_tree_cache_delete (struct streamer_tree_cache_d *);
  
  /* Return the tree node at slot IX in CACHE.  */
--- 100,106 ----
  				 hashval_t);
  bool streamer_tree_cache_lookup (struct streamer_tree_cache_d *, tree,
  				 unsigned *);
! struct streamer_tree_cache_d *streamer_tree_cache_create (bool, bool, bool);
  void streamer_tree_cache_delete (struct streamer_tree_cache_d *);
  
  /* Return the tree node at slot IX in CACHE.  */
Index: gcc/lto-streamer-out.c
===================================================================
*** gcc/lto-streamer-out.c	(revision 209018)
--- gcc/lto-streamer-out.c	(working copy)
*************** create_output_block (enum lto_section_ty
*** 79,85 ****
    ob->decl_state = lto_get_out_decl_state ();
    ob->main_stream = XCNEW (struct lto_output_stream);
    ob->string_stream = XCNEW (struct lto_output_stream);
!   ob->writer_cache = streamer_tree_cache_create (!flag_wpa, true);
  
    if (section_type == LTO_section_function_body)
      ob->cfg_stream = XCNEW (struct lto_output_stream);
--- 79,85 ----
    ob->decl_state = lto_get_out_decl_state ();
    ob->main_stream = XCNEW (struct lto_output_stream);
    ob->string_stream = XCNEW (struct lto_output_stream);
!   ob->writer_cache = streamer_tree_cache_create (!flag_wpa, true, false);
  
    if (section_type == LTO_section_function_body)
      ob->cfg_stream = XCNEW (struct lto_output_stream);
*************** DFS_write_tree (struct output_block *ob,
*** 1277,1283 ****
  	     ???  We still wrap these in LTO_tree_scc so at the
  	     input side we can properly identify the tree we want
  	     to ultimatively return.  */
- 	  size_t old_len = ob->writer_cache->nodes.length ();
  	  if (size == 1)
  	    lto_output_tree_1 (ob, expr, scc_hash, ref_p, this_ref_p);
  	  else
--- 1277,1282 ----
*************** DFS_write_tree (struct output_block *ob,
*** 1315,1321 ****
  		  streamer_write_zero (ob);
  		}
  	    }
- 	  gcc_assert (old_len + size == ob->writer_cache->nodes.length ());
  
  	  /* Finally truncate the vector.  */
  	  sccstack.truncate (first);
--- 1314,1319 ----
*************** produce_asm_for_decls (void)
*** 2423,2432 ****
  
    gcc_assert (!alias_pairs);
  
!   /* Write the global symbols.  */
    out_state = lto_get_out_decl_state ();
!   num_fns = lto_function_decl_states.length ();
    lto_output_decl_state_streams (ob, out_state);
    for (idx = 0; idx < num_fns; idx++)
      {
        fn_out_state =
--- 2453,2470 ----
  
    gcc_assert (!alias_pairs);
  
!   /* Get rid of the global decl state hash tables to save some memory.  */
    out_state = lto_get_out_decl_state ();
!   for (int i = 0; i < LTO_N_DECL_STREAMS; i++)
!     if (out_state->streams[i].tree_hash_table)
!       {
! 	delete out_state->streams[i].tree_hash_table;
! 	out_state->streams[i].tree_hash_table = NULL;
!       }
! 
!   /* Write the global symbols.  */
    lto_output_decl_state_streams (ob, out_state);
+   num_fns = lto_function_decl_states.length ();
    for (idx = 0; idx < num_fns; idx++)
      {
        fn_out_state =
Index: gcc/lto-streamer-in.c
===================================================================
*** gcc/lto-streamer-in.c	(revision 209018)
--- gcc/lto-streamer-in.c	(working copy)
*************** lto_data_in_create (struct lto_file_decl
*** 1365,1372 ****
    data_in->strings = strings;
    data_in->strings_len = len;
    data_in->globals_resolution = resolutions;
!   data_in->reader_cache = streamer_tree_cache_create (false, false);
! 
    return data_in;
  }
  
--- 1365,1371 ----
    data_in->strings = strings;
    data_in->strings_len = len;
    data_in->globals_resolution = resolutions;
!   data_in->reader_cache = streamer_tree_cache_create (false, false, true);
    return data_in;
  }
diff mbox

Patch

Index: gcc/pointer-set.c
===================================================================
--- gcc/pointer-set.c   (revision 209018)
+++ gcc/pointer-set.c   (working copy)
@@ -125,7 +125,7 @@  pointer_set_insert (struct pointer_set_t
 
   /* For simplicity, expand the set even if P is already there.  This can 
be
      superfluous but can happen at most once.  */
-  if (pset->n_elements > pset->n_slots / 4)
+  if (pset->n_elements * 4 > pset->n_slots * 3)
     {
       size_t old_n_slots = pset->n_slots;
       const void **old_slots = pset->slots;
Index: gcc/pointer-set.h
===================================================================
--- gcc/pointer-set.h   (revision 209018)
+++ gcc/pointer-set.h   (working copy)
@@ -109,7 +109,7 @@  pointer_map<T>::insert (const void *p, b
   /* For simplicity, expand the map even if P is already there.  This can 
be
      superfluous but can happen at most once.  */
   /* ???  Fugly that we have to inline that here.  */
-  if (n_elements > n_slots / 4)
+  if (n_elements * 4 > n_slots * 3)
     {
       size_t old_n_slots = n_slots;