diff mbox

[pph] Add support for shared data structures (issue4557055)

Message ID 20110527175411.1B4601DA1CA@topo.tor.corp.google.com
State New
Headers show

Commit Message

Diego Novillo May 27, 2011, 5:54 p.m. UTC
The LTO streamer supports shared tree pointers by using a cache of
pickled tree nodes.  The first time a tree is streamed, it's pickled
and added to the cache.  Subsequent stream operations use the pickle
reference.

This patch adds the same support for all the data structures pickled
in lang-specific fields in types and decls.  The new cache is a (void
*, int) map.

This was exposed by a C test case that generated 1,000s of bindings
with 100,000s declarations in them.  All shared.  I will add the test
case in a follow up patch later today.

Tested on x86_64.  Committed to branch.


Diego.

	* pph-streamer.h (enum pph_record_marker): Define.
	Replace all users of PPH_RECORD_START and PPH_RECORD_END to
	use the new enum type.
	* pph-streamer-in.c (pph_stream_read_shared_data): New.
	(pph_stream_register_shared_data): New.
	(ALLOC_AND_REGISTER): Define.
	(pph_start_record): Add argument CACHE_IX.  Update all users.
	(pph_stream_read_ld_base): Do not call pph_start_record.
	(pph_stream_read_ld_min): Likewise.
	(pph_stream_read_cxx_binding_1): Handle PPH_RECORD_SHARED values
	returned from pph_start_record.
	Call pph_stream_register_shared_data.
	(pph_stream_read_cxx_binding): Do not call pph_start_record.
	Handle NULL values returned from pph_stream_read_cxx_binding_1.
	(pph_stream_read_class_binding): Handle PPH_RECORD_SHARED
	values returned from pph_start_record.
	Call pph_stream_register_shared_data.
	(pph_stream_read_label_binding): Likewise.
	(pph_stream_read_binding_level): Likewise.
	(pph_stream_read_c_language_function): Likewise.
	(pph_stream_read_language_function): Likewise.
	(pph_stream_read_ld_fn): Do not call pph_start_record.
	(pph_stream_read_ld_ns): Likewise.
	(pph_stream_read_lang_type_header): Likewise.
	(pph_stream_read_lang_type_class): Likewise.
	(pph_stream_read_lang_type_ptrmem): Likewise.
	(pph_stream_read_lang_specific): Handle PPH_RECORD_SHARED
	values returned from pph_start_record.
	(pph_stream_read_lang_type_class): Likewise.
	(pph_stream_read_lang_type_ptrmem): Likewise.
	(pph_stream_read_lang_type): Likewise.
	(pph_stream_read_sorted_fields_type): Likewise.
	Call pph_stream_register_shared_data.
	(pph_stream_read_lang_type): Remove TYPE argument.
	Return a pointer to the newly materialized struct lang_type.
	Update all users.
	Support shared struct lang_type nodes.
	* pph-streamer-out.c (pph_start_record): Call pph_stream_cache_add.
	(pph_stream_write_ld_base): Do not call pph_start_record.
	(pph_stream_write_ld_min): Likewise.
	(pph_stream_write_cxx_binding): Likewise.
	Handle the case when CB is NULL.
	(pph_stream_write_ld_fn): Do not call pph_start_record.
	(pph_stream_write_ld_ns): Likewise.
	(pph_stream_write_ld_parm): Likewise.
	(pph_stream_write_lang_type_header): Likewise.
	(pph_stream_write_lang_type_ptrmem): Likewise.
	* pph-streamer.c (pph_stream_open): Initialize stream->cache.v and
	stream->cache.m.
	(pph_stream_close): Free stream->cache.v, stream->cache.m and stream.
	(pph_stream_cache_insert_at): New.
	(pph_stream_cache_add): New.
	(pph_stream_cache_get): New.
	* pph-streamer.h (pph_stream_pickle_cache): Define.
	(pph_stream): Add field 'cache'.
	(pph_stream_cache_insert_at): Declare.
	(pph_stream_cache_add): Declare.
	(pph_stream_cache_get): Declare.


--
This patch is available for review at http://codereview.appspot.com/4557055
diff mbox

Patch

diff --git a/gcc/cp/pph-streamer-in.c b/gcc/cp/pph-streamer-in.c
index b47f8f7..d9b9f1d 100644
--- a/gcc/cp/pph-streamer-in.c
+++ b/gcc/cp/pph-streamer-in.c
@@ -32,6 +32,16 @@  along with GCC; see the file COPYING3.  If not see
 #include "version.h"
 #include "cppbuiltin.h"
 
+/* Wrapper for memory allocation calls that should have their results
+   registered in the PPH streamer cache.  DATA is the pointer returned
+   by the memory allocation call in ALLOC_EXPR.  IX is the cache slot 
+   in STREAM where the newly allocated DATA should be registered at.  */
+#define ALLOC_AND_REGISTER(STREAM, IX, DATA, ALLOC_EXPR)	\
+    do {							\
+      (DATA) = (ALLOC_EXPR);					\
+      pph_stream_register_shared_data (STREAM, DATA, IX);	\
+    } while (0)
+
 /* Callback for unpacking value fields in ASTs.  BP is the bitpack 
    we are unpacking from.  EXPR is the tree to unpack.  */
 
@@ -165,16 +175,62 @@  pph_stream_init_read (pph_stream *stream)
 }
 
 
-/* Read and return a record marker from STREAM.  The marker
-   must be one of PPH_RECORD_START or PPH_RECORD_END.  If PPH_RECORD_END
-   is read, return false.  Otherwise, return true.  */
+/* Read and return a record marker from STREAM.  When a PPH_RECORD_START
+   marker is read, the next word read is an index into the streamer
+   cache where the rematerialized data structure should be stored.
+   When the writer stored this data structure for the first time, it
+   added it to its own streamer cache at slot number *CACHE_IX.
+
+   This way, if the same data structure was written a second time to
+   the stream, instead of writing the whole structure again, only the
+   index *CACHE_IX is written as a PPH_RECORD_SHARED record.
+
+   Therefore, when reading a PPH_RECORD_START marker, *CACHE_IX will
+   contain the slot number where the materialized data should be
+   cached at.  When reading a PPH_RECORD_SHARED marker, *CACHE_IX will
+   contain the slot number the reader can find the previously
+   materialized structure.  */
+
+static inline enum pph_record_marker
+pph_start_record (pph_stream *stream, unsigned *cache_ix)
+{
+  enum pph_record_marker marker;
+
+  marker = (enum pph_record_marker) pph_input_uchar (stream);
+
+  /* For PPH_RECORD_START and PPH_RECORD_SHARED markers, read the
+     streamer cache slot where we should store or find the
+     rematerialized data structure (see description above).  */
+  if (marker == PPH_RECORD_START || marker == PPH_RECORD_SHARED)
+    *cache_ix = pph_input_uint (stream);
+  else
+    gcc_assert (marker == PPH_RECORD_END);
+
+  return marker;
+}
+
+
+/* Return a shared pointer from the streamer cache in STREAM.  This is
+   called when pph_start_record returns PPH_RECORD_SHARED.  It means
+   that the data structure we are about to read has been instantiated
+   before and is present in the streamer cache.  */
+
+static void *
+pph_stream_read_shared_data (pph_stream *stream, unsigned ix)
+{
+  return pph_stream_cache_get (stream, ix);
+}
+
+
+/* Register DATA in STREAM's cache slot IX.  This is called when a
+   potentially shared data structure is first read from STREAM.
+   Subsequent reads of this data structure will get the index from the
+   table cache where this data was saved.  */
 
-static inline bool
-pph_start_record (pph_stream *stream)
+static void
+pph_stream_register_shared_data (pph_stream *stream, void *data, unsigned ix)
 {
-  unsigned char marker = pph_input_uchar (stream);
-  gcc_assert (marker == PPH_RECORD_START || marker == PPH_RECORD_END);
-  return (marker == PPH_RECORD_START);
+  pph_stream_cache_insert_at (stream, data, ix);
 }
 
 
@@ -185,9 +241,6 @@  pph_stream_read_ld_base (pph_stream *stream, struct lang_decl_base *ldb)
 {
   struct bitpack_d bp;
 
-  if (!pph_start_record (stream))
-    return;
-
   bp = pph_input_bitpack (stream);
   ldb->selector = bp_unpack_value (&bp, 16);
   ldb->language = (enum languages) bp_unpack_value (&bp, 4);
@@ -209,11 +262,6 @@  pph_stream_read_ld_base (pph_stream *stream, struct lang_decl_base *ldb)
 static void
 pph_stream_read_ld_min (pph_stream *stream, struct lang_decl_min *ldm)
 {
-  if (!pph_start_record (stream))
-    return;
-
-  gcc_assert (ldm->base.selector == 0);
-
   ldm->template_info = pph_input_tree (stream);
   if (ldm->base.u2sel == 0)
     ldm->u2.access = pph_input_tree (stream);
@@ -279,9 +327,14 @@  pph_stream_read_cxx_binding_1 (pph_stream *stream)
   struct bitpack_d bp;
   cxx_binding *cb;
   tree value, type;
+  enum pph_record_marker marker;
+  unsigned ix;
 
-  if (!pph_start_record (stream))
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_END)
     return NULL;
+  else if (marker == PPH_RECORD_SHARED)
+    return (cxx_binding *) pph_stream_read_shared_data (stream, ix);
 
   value = pph_input_tree (stream);
   type = pph_input_tree (stream);
@@ -291,6 +344,8 @@  pph_stream_read_cxx_binding_1 (pph_stream *stream)
   cb->value_is_inherited = bp_unpack_value (&bp, 1);
   cb->is_local = bp_unpack_value (&bp, 1);
 
+  pph_stream_register_shared_data (stream, cb, ix);
+
   return cb;
 }
 
@@ -303,9 +358,6 @@  pph_stream_read_cxx_binding (pph_stream *stream)
   unsigned i, num_bindings;
   cxx_binding *curr, *cb;
 
-  if (!pph_start_record (stream))
-    return NULL;
-
   /* Read the list of previous bindings.  */
   num_bindings = pph_input_uint (stream);
   for (curr = NULL, i = 0; i < num_bindings; i++)
@@ -316,30 +368,39 @@  pph_stream_read_cxx_binding (pph_stream *stream)
       curr = prev;
     }
 
-  /* Read the current binding at the end.  */
+  /* Read the current binding at the end.  Note that we do not need
+     to call pph_stream_register_shared_data as it is already done
+     by pph_stream_read_cxx_binding_1.  */
   cb = pph_stream_read_cxx_binding_1 (stream);
-  cb->previous = curr;
+  if (cb)
+    cb->previous = curr;
 
   return cb;
 }
 
 
-/* Read all the fields of cp_class_binding instance CB to OB.  REF_P
-   is true if the tree fields should be written as references.  */
+/* Read all the fields of cp_class_binding instance CB to OB.  */
 
 static cp_class_binding *
 pph_stream_read_class_binding (pph_stream *stream)
 {
   cp_class_binding *cb;
+  enum pph_record_marker marker;
+  unsigned ix;
 
-  if (!pph_start_record (stream))
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_END)
     return NULL;
+  else if (marker == PPH_RECORD_SHARED)
+    return (cp_class_binding *) pph_stream_read_shared_data (stream, ix);
 
   cb = ggc_alloc_cleared_cp_class_binding ();
   memcpy (&cb->base, pph_stream_read_cxx_binding (stream),
 	  sizeof (cxx_binding));
   cb->identifier = pph_input_tree (stream);
 
+  pph_stream_register_shared_data (stream, cb, ix);
+
   return cb;
 }
 
@@ -350,14 +411,21 @@  static cp_label_binding *
 pph_stream_read_label_binding (pph_stream *stream)
 {
   cp_label_binding *lb;
+  enum pph_record_marker marker;
+  unsigned ix;
 
-  if (!pph_start_record (stream))
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_END)
     return NULL;
+  else if (marker == PPH_RECORD_SHARED)
+    return (cp_label_binding *) pph_stream_read_shared_data (stream, ix);
 
   lb = ggc_alloc_cleared_cp_label_binding ();
   lb->label = pph_input_tree (stream);
   lb->prev_value = pph_input_tree (stream);
 
+  pph_stream_register_shared_data (stream, lb, ix);
+
   return lb;
 }
 
@@ -367,15 +435,20 @@  pph_stream_read_label_binding (pph_stream *stream)
 static struct cp_binding_level *
 pph_stream_read_binding_level (pph_stream *stream)
 {
-  unsigned i, num;
+  unsigned i, num, ix;
   cp_label_binding *sl;
   struct cp_binding_level *bl;
   struct bitpack_d bp;
+  enum pph_record_marker marker;
 
-  if (!pph_start_record (stream))
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_END)
     return NULL;
+  else if (marker == PPH_RECORD_SHARED)
+    return (struct cp_binding_level *) pph_stream_read_shared_data (stream, ix);
+
+  ALLOC_AND_REGISTER (stream, ix, bl, ggc_alloc_cleared_cp_binding_level ());
 
-  bl = ggc_alloc_cleared_cp_binding_level ();
   bl->names = pph_input_chain (stream);
   bl->names_size = pph_input_uint (stream);
   bl->namespaces = pph_input_chain (stream);
@@ -426,11 +499,18 @@  static struct c_language_function *
 pph_stream_read_c_language_function (pph_stream *stream)
 {
   struct c_language_function *clf;
+  enum pph_record_marker marker;
+  unsigned ix;
 
-  if (!pph_start_record (stream))
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_END)
     return NULL;
+  else if (marker == PPH_RECORD_SHARED)
+    return (struct c_language_function *) pph_stream_read_shared_data (stream,
+	                                                               ix);
 
-  clf = ggc_alloc_cleared_c_language_function ();
+  ALLOC_AND_REGISTER (stream, ix, clf,
+		      ggc_alloc_cleared_c_language_function ());
   clf->x_stmt_tree.x_cur_stmt_list = pph_input_tree (stream);
   clf->x_stmt_tree.stmts_are_full_exprs_p = pph_input_uint (stream);
 
@@ -445,11 +525,17 @@  pph_stream_read_language_function (pph_stream *stream)
 {
   struct bitpack_d bp;
   struct language_function *lf;
+  enum pph_record_marker marker;
+  unsigned ix;
 
-  if (!pph_start_record (stream))
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_END)
     return NULL;
+  else if (marker == PPH_RECORD_SHARED)
+    return (struct language_function *) pph_stream_read_shared_data (stream,
+								     ix);
 
-  lf = ggc_alloc_cleared_language_function ();
+  ALLOC_AND_REGISTER (stream, ix, lf, ggc_alloc_cleared_language_function ());
   memcpy (&lf->base, pph_stream_read_c_language_function (stream),
 	  sizeof (struct c_language_function));
   lf->x_cdtor_label = pph_input_tree (stream);
@@ -485,9 +571,6 @@  pph_stream_read_ld_fn (pph_stream *stream, struct lang_decl_fn *ldf)
 {
   struct bitpack_d bp;
 
-  if (!pph_start_record (stream))
-    return;
-
   bp = pph_input_bitpack (stream);
   ldf->operator_code = (enum tree_code) bp_unpack_value (&bp, 16);
   ldf->global_ctor_p = bp_unpack_value (&bp, 1);
@@ -528,9 +611,6 @@  pph_stream_read_ld_fn (pph_stream *stream, struct lang_decl_fn *ldf)
 static void
 pph_stream_read_ld_ns (pph_stream *stream, struct lang_decl_ns *ldns)
 {
-  if (!pph_start_record (stream))
-    return;
-
   ldns->level = pph_stream_read_binding_level (stream);
 }
 
@@ -540,9 +620,6 @@  pph_stream_read_ld_ns (pph_stream *stream, struct lang_decl_ns *ldns)
 static void
 pph_stream_read_ld_parm (pph_stream *stream, struct lang_decl_parm *ldp)
 {
-  if (!pph_start_record (stream))
-    return;
-
   ldp->level = pph_input_uint (stream);
   ldp->index = pph_input_uint (stream);
 }
@@ -555,10 +632,17 @@  pph_stream_read_lang_specific (pph_stream *stream, tree decl)
 {
   struct lang_decl *ld;
   struct lang_decl_base *ldb;
+  enum pph_record_marker marker;
+  unsigned ix;
 
-  if (!pph_start_record (stream))
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_END)
     return;
 
+  /* Since lang_decl is embedded in every decl, LD cannot
+     be shared.  */
+  gcc_assert (marker != PPH_RECORD_SHARED);
+
   /* Allocate a lang_decl structure for DECL.  */
   retrofit_lang_decl (decl);
 
@@ -619,13 +703,10 @@  pph_stream_alloc_tree (enum tree_code code,
 
 static void
 pph_stream_read_lang_type_header (pph_stream *stream,
-				   struct lang_type_header *lth)
+				  struct lang_type_header *lth)
 {
   struct bitpack_d bp;
 
-  if (!pph_start_record (stream))
-    return;
-
   bp = pph_input_bitpack (stream);
   lth->is_lang_type_class = bp_unpack_value (&bp, 1);
   lth->has_type_conversion = bp_unpack_value (&bp, 1);
@@ -666,12 +747,18 @@  pph_stream_read_sorted_fields_type (pph_stream *stream)
 {
   unsigned i, num_fields;
   struct sorted_fields_type *v;
+  enum pph_record_marker marker;
+  unsigned ix;
 
-  if (!pph_start_record (stream))
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_END)
     return NULL;
+  else if (marker == PPH_RECORD_SHARED)
+    return (struct sorted_fields_type *) pph_stream_read_shared_data (stream,
+								      ix);
 
   num_fields = pph_input_uint (stream);
-  v = sorted_fields_type_new (num_fields);
+  ALLOC_AND_REGISTER (stream, ix, v, sorted_fields_type_new (num_fields));
   for (i = 0; i < num_fields; i++)
     v->elts[i] = pph_input_tree (stream);
 
@@ -688,9 +775,8 @@  pph_stream_read_lang_type_class (pph_stream *stream,
 				  struct lang_type_class *ltc)
 {
   struct bitpack_d bp;
-
-  if (!pph_start_record (stream))
-    return;
+  enum pph_record_marker marker;
+  unsigned ix;
 
   ltc->align = pph_input_uchar (stream);
 
@@ -744,8 +830,16 @@  pph_stream_read_lang_type_class (pph_stream *stream,
   ltc->vtables = pph_input_tree (stream);
   ltc->typeinfo_var = pph_input_tree (stream);
   ltc->vbases = pph_stream_read_tree_vec (stream);
-  if (pph_start_record (stream))
-    ltc->nested_udts = pph_stream_read_binding_table (stream);
+
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_START)
+    {
+      ltc->nested_udts = pph_stream_read_binding_table (stream);
+      pph_stream_register_shared_data (stream, ltc->nested_udts, ix);
+    }
+  else if (marker == PPH_RECORD_SHARED)
+    ltc->nested_udts = (binding_table) pph_stream_read_shared_data (stream, ix);
+
   ltc->as_base = pph_input_tree (stream);
   ltc->pure_virtuals = pph_stream_read_tree_vec (stream);
   ltc->friend_classes = pph_input_tree (stream);
@@ -766,31 +860,35 @@  static void
 pph_stream_read_lang_type_ptrmem (pph_stream *stream,
 				  struct lang_type_ptrmem *ltp)
 {
-  if (!pph_start_record (stream))
-    return;
-
   ltp->record = pph_input_tree (stream);
 }
 
 
-/* Read all the lang-specific fields of TYPE from STREAM.  */
+/* Read all the fields in struct lang_type from STREAM.  */
 
-static void
-pph_stream_read_lang_type (pph_stream *stream, tree type)
+static struct lang_type *
+pph_stream_read_lang_type (pph_stream *stream)
 {
   struct lang_type *lt;
+  enum pph_record_marker marker;
+  unsigned ix;
 
-  if (!pph_start_record (stream))
-    return;
+  marker = pph_start_record (stream, &ix);
+  if (marker == PPH_RECORD_END)
+    return NULL;
+  else if (marker == PPH_RECORD_SHARED)
+    return (struct lang_type *) pph_stream_read_shared_data (stream, ix);
 
-  lt = ggc_alloc_cleared_lang_type (sizeof (struct lang_type));
-  TYPE_LANG_SPECIFIC (type) = lt;
+  ALLOC_AND_REGISTER (stream, ix, lt,
+		      ggc_alloc_cleared_lang_type (sizeof (struct lang_type)));
 
   pph_stream_read_lang_type_header (stream, &lt->u.h);
   if (lt->u.h.is_lang_type_class)
     pph_stream_read_lang_type_class (stream, &lt->u.c);
   else
     pph_stream_read_lang_type_ptrmem (stream, &lt->u.ptrmem);
+
+  return lt;
 }
 
 
@@ -867,13 +965,13 @@  pph_stream_read_tree (struct lto_input_block *ib ATTRIBUTE_UNUSED,
     case REFERENCE_TYPE:
     case VECTOR_TYPE:
     case VOID_TYPE:
-      pph_stream_read_lang_type (stream, expr);
+      TYPE_LANG_SPECIFIC (expr) = pph_stream_read_lang_type (stream);
       break;
 
     case QUAL_UNION_TYPE:
     case RECORD_TYPE:
     case UNION_TYPE:
-      pph_stream_read_lang_type (stream, expr);
+      TYPE_LANG_SPECIFIC (expr) = pph_stream_read_lang_type (stream);
       TYPE_BINFO (expr) = pph_input_tree (stream);
       break;
 
@@ -883,7 +981,7 @@  pph_stream_read_tree (struct lto_input_block *ib ATTRIBUTE_UNUSED,
     case TEMPLATE_TYPE_PARM:
     case TYPENAME_TYPE:
     case TYPEOF_TYPE:
-      pph_stream_read_lang_type (stream, expr);
+      TYPE_LANG_SPECIFIC (expr) = pph_stream_read_lang_type (stream);
       TYPE_CACHED_VALUES (expr) = pph_input_tree (stream);
       /* Note that we are using TYPED_CACHED_VALUES for it access to 
          the generic .values field of types. */
diff --git a/gcc/cp/pph-streamer-out.c b/gcc/cp/pph-streamer-out.c
index 82319da..a5a8dec 100644
--- a/gcc/cp/pph-streamer-out.c
+++ b/gcc/cp/pph-streamer-out.c
@@ -193,17 +193,35 @@  pph_stream_flush_buffers (pph_stream *stream)
 }
 
 
-/* Start a new record in STREAM for data in DATA.  If DATA is NULL,
-   write an end-of-record marker and return false.  Otherwise, write a
-   start-of-record marker and return true.  */
+/* Start a new record in STREAM for data in DATA.  If DATA is NULL
+   write an end-of-record marker and return false.  If DATA is not NULL
+   and did not exist in the pickle cache, add it, write a
+   start-of-record marker and return true.  If DATA existed in the
+   cache, write a shared-record marker and return false.  */
 
 static inline bool
 pph_start_record (pph_stream *stream, void *data)
 {
   if (data)
     {
-      pph_output_uchar (stream, PPH_RECORD_START);
-      return true;
+      bool existed_p;
+      unsigned ix;
+      enum pph_record_marker marker;
+
+      /* If the memory at DATA has already been streamed out, make
+	 sure that we don't write it more than once.  Otherwise,
+	 the reader will instantiate two different pointers for
+	 the same object.
+
+	 Write the index into the cache where DATA has been stored.
+	 This way, the reader will know at which slot to
+	 re-materialize DATA the first time and where to access it on
+	 subsequent reads.  */
+      existed_p = pph_stream_cache_add (stream, data, &ix);
+      marker = (existed_p) ? PPH_RECORD_SHARED : PPH_RECORD_START;
+      pph_output_uchar (stream, marker);
+      pph_output_uint (stream, ix);
+      return marker == PPH_RECORD_START;
     }
   else
     {
@@ -220,9 +238,6 @@  pph_stream_write_ld_base (pph_stream *stream, struct lang_decl_base *ldb)
 {
   struct bitpack_d bp;
 
-  if (!pph_start_record (stream, ldb))
-    return;
-
   bp = bitpack_create (stream->ob->main_stream);
   bp_pack_value (&bp, ldb->selector, 16);
   bp_pack_value (&bp, ldb->language, 4);
@@ -247,11 +262,6 @@  static void
 pph_stream_write_ld_min (pph_stream *stream, struct lang_decl_min *ldm,
 		         bool ref_p)
 {
-  if (!pph_start_record (stream, ldm))
-    return;
-
-  gcc_assert (ldm->base.selector == 0);
-
   pph_output_tree_or_ref_1 (stream, ldm->template_info, ref_p, 1);
   if (ldm->base.u2sel == 0)
     pph_output_tree_or_ref_1 (stream, ldm->u2.access, ref_p, 1);
@@ -332,16 +342,15 @@  pph_stream_write_cxx_binding (pph_stream *stream, cxx_binding *cb, bool ref_p)
   unsigned num_bindings;
   cxx_binding *prev;
 
-  if (!pph_start_record (stream, cb))
-    return;
-
-  for (num_bindings = 0, prev = cb->previous; prev; prev = prev->previous)
+  num_bindings = 0;
+  for (prev = cb ? cb->previous : NULL; prev; prev = prev->previous)
     num_bindings++;
 
   /* Write the list of previous bindings.  */
   pph_output_uint (stream, num_bindings);
-  for (prev = cb->previous; prev; prev = prev->previous)
-    pph_stream_write_cxx_binding_1 (stream, prev, ref_p);
+  if (num_bindings > 0)
+    for (prev = cb->previous; prev; prev = prev->previous)
+      pph_stream_write_cxx_binding_1 (stream, prev, ref_p);
 
   /* Write the current binding at the end.  */
   pph_stream_write_cxx_binding_1 (stream, cb, ref_p);
@@ -541,9 +550,6 @@  pph_stream_write_ld_fn (pph_stream *stream, struct lang_decl_fn *ldf,
 {
   struct bitpack_d bp;
 
-  if (!pph_start_record (stream, ldf))
-    return;
-
   bp = bitpack_create (stream->ob->main_stream);
   bp_pack_value (&bp, ldf->operator_code, 16);
   bp_pack_value (&bp, ldf->global_ctor_p, 1);
@@ -588,13 +594,7 @@  static void
 pph_stream_write_ld_ns (pph_stream *stream, struct lang_decl_ns *ldns,
 			bool ref_p)
 {
-  struct cp_binding_level *level;
-
-  if (!pph_start_record (stream, ldns))
-    return;
-
-  level = ldns->level;
-  pph_stream_write_binding_level (stream, level, ref_p);
+  pph_stream_write_binding_level (stream, ldns->level, ref_p);
 }
 
 
@@ -604,9 +604,6 @@  pph_stream_write_ld_ns (pph_stream *stream, struct lang_decl_ns *ldns,
 static void
 pph_stream_write_ld_parm (pph_stream *stream, struct lang_decl_parm *ldp)
 {
-  if (!pph_start_record (stream, ldp))
-    return;
-
   pph_output_uint (stream, ldp->level);
   pph_output_uint (stream, ldp->index);
 }
@@ -663,9 +660,6 @@  pph_stream_write_lang_type_header (pph_stream *stream,
 {
   struct bitpack_d bp;
 
-  if (!pph_start_record (stream, lth))
-    return;
-
   bp = bitpack_create (stream->ob->main_stream);
   bp_pack_value (&bp, lth->is_lang_type_class, 1);
   bp_pack_value (&bp, lth->has_type_conversion, 1);
@@ -725,9 +719,6 @@  pph_stream_write_lang_type_class (pph_stream *stream,
 {
   struct bitpack_d bp;
 
-  if (!pph_start_record (stream, ltc))
-    return;
-
   pph_output_uchar (stream, ltc->align);
 
   bp = bitpack_create (stream->ob->main_stream);
@@ -804,9 +795,6 @@  static void
 pph_stream_write_lang_type_ptrmem (pph_stream *stream, struct
 				   lang_type_ptrmem *ltp, bool ref_p)
 {
-  if (!pph_start_record (stream, ltp))
-    return;
-
   pph_output_tree_or_ref (stream, ltp->record, ref_p);
 }
 
diff --git a/gcc/cp/pph-streamer.c b/gcc/cp/pph-streamer.c
index 18a5e25..a00a243 100644
--- a/gcc/cp/pph-streamer.c
+++ b/gcc/cp/pph-streamer.c
@@ -127,6 +127,8 @@  pph_stream_open (const char *name, const char *mode)
 	pph_stream_init_write (stream);
       else
 	pph_stream_init_read (stream);
+      stream->cache.v = NULL;
+      stream->cache.m = pointer_map_create ();
     }
 
   return stream;
@@ -143,6 +145,9 @@  pph_stream_close (pph_stream *stream)
     pph_stream_flush_buffers (stream);
   fclose (stream->file);
   stream->file = NULL;
+  VEC_free (void_p, heap, stream->cache.v);
+  pointer_map_destroy (stream->cache.m);
+  free (stream);
 }
 
 
@@ -326,3 +331,74 @@  pph_stream_trace_bitpack (pph_stream *stream, struct bitpack_d *bp)
 {
   pph_stream_trace (stream, bp, sizeof (*bp), PPH_TRACE_BITPACK);
 }
+
+
+/* Insert DATA in STREAM's pickle cache at slot IX.  If DATA already
+   existed in the cache, IX must be the same as the previous entry.  */
+
+void
+pph_stream_cache_insert_at (pph_stream *stream, void *data, unsigned ix)
+{
+  void **map_slot;
+
+  map_slot = pointer_map_insert (stream->cache.m, data);
+  if (*map_slot)
+    {
+      /* DATA already existed in the cache.  Do nothing, but check
+	 that we are trying to insert DATA in the same slot that we
+	 had it in before.  */
+      unsigned HOST_WIDE_INT prev_ix = (unsigned HOST_WIDE_INT) *map_slot;
+      gcc_assert (prev_ix == ix);
+    }
+  else
+    {
+      *map_slot = (void *) (unsigned HOST_WIDE_INT) ix;
+      if (ix + 1 > VEC_length (void_p, stream->cache.v))
+	VEC_safe_grow_cleared (void_p, heap, stream->cache.v, ix + 1);
+      VEC_replace (void_p, stream->cache.v, ix, data);
+    }
+}
+
+
+/* Add pointer DATA to the pickle cache in STREAM.  On exit, *IX_P will
+   contain the slot number where DATA is stored.  Return true if DATA
+   already existed in the cache, false otherwise.  */
+
+bool
+pph_stream_cache_add (pph_stream *stream, void *data, unsigned *ix_p)
+{
+  void **map_slot;
+  unsigned ix;
+  bool existed_p;
+
+  map_slot = pointer_map_contains (stream->cache.m, data);
+  if (map_slot == NULL)
+    {
+      existed_p = false;
+      ix = VEC_length (void_p, stream->cache.v);
+      pph_stream_cache_insert_at (stream, data, ix);
+    }
+  else
+    {
+      unsigned HOST_WIDE_INT slot_ix = (unsigned HOST_WIDE_INT) *map_slot;
+      gcc_assert (slot_ix == (unsigned) slot_ix);
+      ix = (unsigned) slot_ix;
+      existed_p = true;
+    }
+
+  *ix_p = ix;
+
+  return existed_p;
+}
+
+
+/* Return the pointer at slot IX in STREAM's pickle cache.  */
+
+void *
+pph_stream_cache_get (pph_stream *stream, unsigned ix)
+{
+  void *data = VEC_index (void_p, stream->cache.v, ix);
+  gcc_assert (data);
+
+  return data;
+}
diff --git a/gcc/cp/pph-streamer.h b/gcc/cp/pph-streamer.h
index d11c545..66de09c 100644
--- a/gcc/cp/pph-streamer.h
+++ b/gcc/cp/pph-streamer.h
@@ -25,8 +25,11 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree.h"
 
 /* Record markers.  */
-static const unsigned char PPH_RECORD_START = 0xff;
-static const unsigned char PPH_RECORD_END   = 0xfe;
+enum pph_record_marker {
+  PPH_RECORD_START = 0xfd,
+  PPH_RECORD_END,
+  PPH_RECORD_SHARED
+};
 
 /* Number of sections in a PPH file.  FIXME, currently only one section
    is supported.  To add more, it will also be necessary to handle
@@ -52,6 +55,31 @@  typedef struct pph_file_header {
 } pph_file_header;
 
 
+typedef void *void_p;
+DEF_VEC_P(void_p);
+DEF_VEC_ALLOC_P(void_p,heap);
+
+/* A cache for storing pickled data structures.  This is used to implement
+   pointer sharing.
+
+   When a data structure is initially pickled for writing, a pointer
+   to it is stored in this cache.  If the same data structure is
+   streamed again, instead of pickling it, the compiler will write
+   the index into the cache.
+
+   The same mechanism is used when reading. When the data structure is
+   first materialized, its address is saved into the same cache slot
+   used when writing.  Subsequent reads will simply get the
+   materialized pointer from that slot.  */
+typedef struct pph_stream_pickle_cache {
+  /* Array of entries.  */
+  VEC(void_p,heap) *v;
+
+  /* Map between slots in the array and pointers.  */
+  struct pointer_map_t *m;
+} pph_stream_pickle_cache;
+
+
 /* A PPH stream contains all the data and attributes needed to
    write symbols, declarations and other parsing products to disk.  */
 typedef struct pph_stream {
@@ -83,6 +111,9 @@  typedef struct pph_stream {
   char *file_data;
   size_t file_size;
 
+  /* Cache of pickled data structures.  */
+  pph_stream_pickle_cache cache;
+
   /* Nonzero if the stream was opened for writing.  */
   unsigned int write_p : 1;
 } pph_stream;
@@ -100,6 +131,9 @@  void pph_stream_trace_string (pph_stream *, const char *);
 void pph_stream_trace_string_with_length (pph_stream *, const char *, unsigned);
 void pph_stream_trace_chain (pph_stream *, tree);
 void pph_stream_trace_bitpack (pph_stream *, struct bitpack_d *);
+void pph_stream_cache_insert_at (pph_stream *, void *, unsigned);
+bool pph_stream_cache_add (pph_stream *, void *, unsigned *);
+void *pph_stream_cache_get (pph_stream *, unsigned);
 
 /* In pph-streamer-out.c.  */
 void pph_stream_flush_buffers (pph_stream *);