Patchwork Shrink .debug_loc section by ~ 9%

login
register
mail settings
Submitter Jakub Jelinek
Date Oct. 8, 2010, 7:28 p.m.
Message ID <20101008192802.GO4127@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/67272/
State New
Headers show

Comments

Jakub Jelinek - Oct. 8, 2010, 7:28 p.m.
Hi!

This patch shrinks cc1plus (both i686-linux and x86_64) .debug_loc
section by roughly 9%, by sharing identical lists (if two or more DIEs
have the same .debug_loc list, they can just use the same one instead
of outputting the exact same bits twice.

Bootstrapped/regtested on x86_64-linux and i686-linux and verified after
reverting the dwarf2out.c change and recompiling cc1plus in stage3, the code
and .debug_* sections but .debug_info/.debug_loc is identical, .debug_info
has the same size and differs just in offsets listed for (location list).
I've cooked a little script that computes md5sum of each location list
from readelf -wo output separately (after stripping away the first column
with offset in .debug_loc section).
For i686-linux, .debug_loc size shrunk from 0x1628c77 => 0x1431e08 bytes
with the patch, there were 307800 unique md5sums of .debug_loc location
lists (and those md5sums were identical before/after the patch).  The only

	Jakub
Roland McGrath - Oct. 8, 2010, 8:32 p.m.
> Any other debug info consumer makes similar assumptions?

elfutils does not.


Thanks,
Roland
Tom Tromey - Oct. 8, 2010, 9:53 p.m.
>>>>> "Jakub" == Jakub Jelinek <jakub@redhat.com> writes:

Jakub> The drawback of the patch is that at least readelf -w[io] will need
Jakub> adjustments, because its assumptions no longer hold (it assumes that
Jakub> location list offsets in .debug_info section are monotonically
Jakub> increasing), so it reports warnings about .debug_loc holes.

Jakub> Any other debug info consumer makes similar assumptions?

GDB should be ok here.

Tom
Christopher Faylor - Oct. 9, 2010, 12:11 a.m.
On Fri, Oct 08, 2010 at 09:28:02PM +0200, Jakub Jelinek wrote:
>This patch shrinks cc1plus (both i686-linux and x86_64) .debug_loc
>section by roughly 9%, by sharing identical lists (if two or more DIEs
>have the same .debug_loc list, they can just use the same one instead
>of outputting the exact same bits twice.

Have you done any timing tests on this?  I'm curious whether this has any
noticeable effect on link times.

cgf
Jason Merrill - Oct. 11, 2010, 8:10 p.m.
On 10/08/2010 03:28 PM, Jakub Jelinek wrote:
> +/* Iteratively hash rtx X.  */
> +
> +static hashval_t
> +iterative_hash_rtx (const_rtx x, hashval_t hash)

This seems like it belongs in rtl.c, with rtx_equal_p.  OK with that change.

Jason

Patch

differences were just ordering and that some md5sums occurred more than
once.  Before patch there were 335379 location lists in .debug_loc,
after the patch 313887.
For x86_64-linux similarly, .debug_loc size shrunk from
0x2c0464c => 0x282e933 bytes with the patch, 351514 unique md5sums and
location list counts went down from 381983 to 357956.
The reasons why the patch saves slightly smaller number than it
theoretically could (if there were no md5sum collisions) is probably
that in dwarf2out.c we work with code labels, while readelf compares
addresses and in some rare cases where could be more labels at the same
PC, etc.

The drawback of the patch is that at least readelf -w[io] will need
adjustments, because its assumptions no longer hold (it assumes that
location list offsets in .debug_info section are monotonically
increasing), so it reports warnings about .debug_loc holes.

Any other debug info consumer makes similar assumptions?

2010-10-08  Jakub Jelinek  <jakub@redhat.com>

	* dwarf2out.c (dw_loc_list_node): Add hash and emitted fields.
	(output_loc_list): Return immediately if emitted is set,
	set it.
	(iterative_hash_rtx, hash_loc_operands, hash_locs, hash_loc_list,
	compare_loc_operands, compare_locs, loc_list_hash, loc_list_eq,
	optimize_location_lists_1, optimize_location_lists): New function.
	(dwarf2out_finish): Call optimize_location_lists.

--- gcc/dwarf2out.c.jj	2010-10-04 09:52:26.000000000 +0200
+++ gcc/dwarf2out.c	2010-10-08 16:00:42.000000000 +0200
@@ -4376,6 +4376,8 @@  typedef struct GTY(()) dw_loc_list_struc
 		      Only on head of list */
   const char *section; /* Section this loclist is relative to */
   dw_loc_descr_ref expr;
+  hashval_t hash;
+  bool emitted;
 } dw_loc_list_node;
 
 static dw_loc_descr_ref int_loc_descriptor (HOST_WIDE_INT);
@@ -10883,6 +10885,10 @@  output_loc_list (dw_loc_list_ref list_he
 {
   dw_loc_list_ref curr = list_head;
 
+  if (list_head->emitted)
+    return;
+  list_head->emitted = true;
+
   ASM_OUTPUT_LABEL (asm_out_file, list_head->ll_symbol);
 
   /* Walk the location list, and output each range + expression.  */
@@ -22400,7 +22406,449 @@  resolve_addr (dw_die_ref die)
 
   FOR_EACH_CHILD (die, c, resolve_addr (c));
 }
+
+/* Helper routines for optimize_location_lists.
+   This pass tries to share identical local lists in .debug_loc
+   section.  */
+
+/* Iteratively hash rtx X.  */
+
+static hashval_t
+iterative_hash_rtx (const_rtx x, hashval_t hash)
+{
+  enum rtx_code code;
+  enum machine_mode mode;
+  int i, j;
+  const char *fmt;
+
+  if (x == NULL_RTX)
+    return hash;
+  code = GET_CODE (x);
+  hash = iterative_hash_object (code, hash);
+  mode = GET_MODE (x);
+  hash = iterative_hash_object (mode, hash);
+  switch (code)
+    {
+    case REG:
+      i = REGNO (x);
+      return iterative_hash_object (i, hash);
+    case CONST_INT:
+      return iterative_hash_object (INTVAL (x), hash);
+    case SYMBOL_REF:
+      if (XSTR (x, 0))
+	return iterative_hash (XSTR (x, 0), strlen (XSTR (x, 0)) + 1,
+			       hash);
+      return hash;
+    case LABEL_REF:
+    case DEBUG_EXPR:
+    case VALUE:
+    case SCRATCH:
+    case CONST_DOUBLE:
+    case CONST_FIXED:
+    case DEBUG_IMPLICIT_PTR:
+      return hash;
+    default:
+      break;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    switch (fmt[i])
+      {
+      case 'w':
+	hash = iterative_hash_object (XWINT (x, i), hash);
+	break;
+      case 'n':
+      case 'i':
+	hash = iterative_hash_object (XINT (x, i), hash);
+	break;
+      case 'V':
+      case 'E':
+	j = XVECLEN (x, i);
+	hash = iterative_hash_object (j, hash);
+	for (j = 0; j < XVECLEN (x, i); j++)
+	  hash = iterative_hash_rtx (XVECEXP (x, i, j), hash);
+	break;
+      case 'e':
+	hash = iterative_hash_rtx (XEXP (x, i), hash);
+	break;
+      case 'S':
+      case 's':
+	if (XSTR (x, i))
+	  hash = iterative_hash (XSTR (x, 0), strlen (XSTR (x, 0)) + 1,
+				 hash);
+	break;
+      default:
+	break;
+      }
+  return hash;
+}
+
+/* Iteratively hash operands of LOC opcode.  */
+
+static inline hashval_t
+hash_loc_operands (dw_loc_descr_ref loc, hashval_t hash)
+{
+  dw_val_ref val1 = &loc->dw_loc_oprnd1;
+  dw_val_ref val2 = &loc->dw_loc_oprnd2;
+
+  switch (loc->dw_loc_opc)
+    {
+    case DW_OP_const4u:
+    case DW_OP_const8u:
+      if (loc->dtprel)
+	goto hash_addr;
+      /* FALLTHRU */
+    case DW_OP_const1u:
+    case DW_OP_const1s:
+    case DW_OP_const2u:
+    case DW_OP_const2s:
+    case DW_OP_const4s:
+    case DW_OP_const8s:
+    case DW_OP_constu:
+    case DW_OP_consts:
+    case DW_OP_pick:
+    case DW_OP_plus_uconst:
+    case DW_OP_breg0:
+    case DW_OP_breg1:
+    case DW_OP_breg2:
+    case DW_OP_breg3:
+    case DW_OP_breg4:
+    case DW_OP_breg5:
+    case DW_OP_breg6:
+    case DW_OP_breg7:
+    case DW_OP_breg8:
+    case DW_OP_breg9:
+    case DW_OP_breg10:
+    case DW_OP_breg11:
+    case DW_OP_breg12:
+    case DW_OP_breg13:
+    case DW_OP_breg14:
+    case DW_OP_breg15:
+    case DW_OP_breg16:
+    case DW_OP_breg17:
+    case DW_OP_breg18:
+    case DW_OP_breg19:
+    case DW_OP_breg20:
+    case DW_OP_breg21:
+    case DW_OP_breg22:
+    case DW_OP_breg23:
+    case DW_OP_breg24:
+    case DW_OP_breg25:
+    case DW_OP_breg26:
+    case DW_OP_breg27:
+    case DW_OP_breg28:
+    case DW_OP_breg29:
+    case DW_OP_breg30:
+    case DW_OP_breg31:
+    case DW_OP_regx:
+    case DW_OP_fbreg:
+    case DW_OP_piece:
+    case DW_OP_deref_size:
+    case DW_OP_xderef_size:
+      hash = iterative_hash_object (val1->v.val_int, hash);
+      break;
+    case DW_OP_skip:
+    case DW_OP_bra:
+      {
+	int offset;
+
+	gcc_assert (val1->val_class == dw_val_class_loc);
+	offset = val1->v.val_loc->dw_loc_addr - (loc->dw_loc_addr + 3);
+	hash = iterative_hash_object (offset, hash);
+      }
+      break;
+    case DW_OP_implicit_value:
+      hash = iterative_hash_object (val1->v.val_unsigned, hash);
+      switch (val2->val_class)
+	{
+	case dw_val_class_const:
+	  hash = iterative_hash_object (val2->v.val_int, hash);
+	  break;
+	case dw_val_class_vec:
+	  {
+	    unsigned int elt_size = val2->v.val_vec.elt_size;
+	    unsigned int len = val2->v.val_vec.length;
+
+	    hash = iterative_hash_object (elt_size, hash);
+	    hash = iterative_hash_object (len, hash);
+	    hash = iterative_hash (val2->v.val_vec.array,
+				   len * elt_size, hash);
+	  }
+	  break;
+	case dw_val_class_const_double:
+	  hash = iterative_hash_object (val2->v.val_double.low, hash);
+	  hash = iterative_hash_object (val2->v.val_double.high, hash);
+	  break;
+	case dw_val_class_addr:
+	  hash = iterative_hash_rtx (val2->v.val_addr, hash);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case DW_OP_bregx:
+    case DW_OP_bit_piece:
+      hash = iterative_hash_object (val1->v.val_int, hash);
+      hash = iterative_hash_object (val2->v.val_int, hash);
+      break;
+    case DW_OP_addr:
+    hash_addr:
+      if (loc->dtprel)
+	{
+	  unsigned char dtprel = 0xd1;
+	  hash = iterative_hash_object (dtprel, hash);
+	}
+      hash = iterative_hash_rtx (val1->v.val_addr, hash);
+      break;
+    case DW_OP_GNU_implicit_pointer:
+      hash = iterative_hash_object (val2->v.val_int, hash);
+      break;
+
+    default:
+      /* Other codes have no operands.  */
+      break;
+    }
+  return hash;
+}
 
+/* Iteratively hash the whole DWARF location expression LOC.  */
+
+static inline hashval_t
+hash_locs (dw_loc_descr_ref loc, hashval_t hash)
+{
+  dw_loc_descr_ref l;
+  bool sizes_computed = false;
+  /* Compute sizes, so that DW_OP_skip/DW_OP_bra can be checksummed.  */
+  size_of_locs (loc);
+
+  for (l = loc; l != NULL; l = l->dw_loc_next)
+    {
+      enum dwarf_location_atom opc = l->dw_loc_opc;
+      hash = iterative_hash_object (opc, hash);
+      if ((opc == DW_OP_skip || opc == DW_OP_bra) && !sizes_computed)
+	{
+	  size_of_locs (loc);
+	  sizes_computed = true;
+	}
+      hash = hash_loc_operands (l, hash);
+    }
+  return hash;
+}
+
+/* Compute hash of the whole location list LIST_HEAD.  */
+
+static inline void
+hash_loc_list (dw_loc_list_ref list_head)
+{
+  dw_loc_list_ref curr = list_head;
+  hashval_t hash = 0;
+
+  for (curr = list_head; curr != NULL; curr = curr->dw_loc_next)
+    {
+      hash = iterative_hash (curr->begin, strlen (curr->begin) + 1, hash);
+      hash = iterative_hash (curr->end, strlen (curr->end) + 1, hash);
+      if (curr->section)
+	hash = iterative_hash (curr->section, strlen (curr->section) + 1,
+			       hash);
+      hash = hash_locs (curr->expr, hash);
+    }
+  list_head->hash = hash;
+}
+
+/* Return true if X and Y opcodes have the same operands.  */
+
+static inline bool
+compare_loc_operands (dw_loc_descr_ref x, dw_loc_descr_ref y)
+{
+  dw_val_ref valx1 = &x->dw_loc_oprnd1;
+  dw_val_ref valx2 = &x->dw_loc_oprnd2;
+  dw_val_ref valy1 = &y->dw_loc_oprnd1;
+  dw_val_ref valy2 = &y->dw_loc_oprnd2;
+
+  switch (x->dw_loc_opc)
+    {
+    case DW_OP_const4u:
+    case DW_OP_const8u:
+      if (x->dtprel)
+	goto hash_addr;
+      /* FALLTHRU */
+    case DW_OP_const1u:
+    case DW_OP_const1s:
+    case DW_OP_const2u:
+    case DW_OP_const2s:
+    case DW_OP_const4s:
+    case DW_OP_const8s:
+    case DW_OP_constu:
+    case DW_OP_consts:
+    case DW_OP_pick:
+    case DW_OP_plus_uconst:
+    case DW_OP_breg0:
+    case DW_OP_breg1:
+    case DW_OP_breg2:
+    case DW_OP_breg3:
+    case DW_OP_breg4:
+    case DW_OP_breg5:
+    case DW_OP_breg6:
+    case DW_OP_breg7:
+    case DW_OP_breg8:
+    case DW_OP_breg9:
+    case DW_OP_breg10:
+    case DW_OP_breg11:
+    case DW_OP_breg12:
+    case DW_OP_breg13:
+    case DW_OP_breg14:
+    case DW_OP_breg15:
+    case DW_OP_breg16:
+    case DW_OP_breg17:
+    case DW_OP_breg18:
+    case DW_OP_breg19:
+    case DW_OP_breg20:
+    case DW_OP_breg21:
+    case DW_OP_breg22:
+    case DW_OP_breg23:
+    case DW_OP_breg24:
+    case DW_OP_breg25:
+    case DW_OP_breg26:
+    case DW_OP_breg27:
+    case DW_OP_breg28:
+    case DW_OP_breg29:
+    case DW_OP_breg30:
+    case DW_OP_breg31:
+    case DW_OP_regx:
+    case DW_OP_fbreg:
+    case DW_OP_piece:
+    case DW_OP_deref_size:
+    case DW_OP_xderef_size:
+      return valx1->v.val_int == valy1->v.val_int;
+    case DW_OP_skip:
+    case DW_OP_bra:
+      gcc_assert (valx1->val_class == dw_val_class_loc
+		  && valy1->val_class == dw_val_class_loc
+		  && x->dw_loc_addr == y->dw_loc_addr);
+      return valx1->v.val_loc->dw_loc_addr == valy1->v.val_loc->dw_loc_addr;
+    case DW_OP_implicit_value:
+      if (valx1->v.val_unsigned != valy1->v.val_unsigned
+	  || valx2->val_class != valy2->val_class)
+	return false;
+      switch (valx2->val_class)
+	{
+	case dw_val_class_const:
+	  return valx2->v.val_int == valy2->v.val_int;
+	case dw_val_class_vec:
+	  return valx2->v.val_vec.elt_size == valy2->v.val_vec.elt_size
+		 && valx2->v.val_vec.length == valy2->v.val_vec.length
+		 && memcmp (valx2->v.val_vec.array, valy2->v.val_vec.array,
+			    valx2->v.val_vec.elt_size
+			    * valx2->v.val_vec.length) == 0;
+	case dw_val_class_const_double:
+	  return valx2->v.val_double.low == valy2->v.val_double.low
+		 && valx2->v.val_double.high == valy2->v.val_double.high;
+	case dw_val_class_addr:
+	  return rtx_equal_p (valx2->v.val_addr, valy2->v.val_addr);
+	default:
+	  gcc_unreachable ();
+	}
+    case DW_OP_bregx:
+    case DW_OP_bit_piece:
+      return valx1->v.val_int == valy1->v.val_int
+	     && valx2->v.val_int == valy2->v.val_int;
+    case DW_OP_addr:
+    hash_addr:
+      return rtx_equal_p (valx1->v.val_addr, valx2->v.val_addr);
+    case DW_OP_GNU_implicit_pointer:
+      return valx1->val_class == dw_val_class_die_ref
+	     && valx1->val_class == valy1->val_class
+	     && valx1->v.val_die_ref.die == valy1->v.val_die_ref.die
+	     && valx2->v.val_int == valy2->v.val_int;
+    default:
+      /* Other codes have no operands.  */
+      return true;
+    }
+}
+
+/* Return true if DWARF location expressions X and Y are the same.  */
+
+static inline bool
+compare_locs (dw_loc_descr_ref x, dw_loc_descr_ref y)
+{
+  for (; x != NULL && y != NULL; x = x->dw_loc_next, y = y->dw_loc_next)
+    if (x->dw_loc_opc != y->dw_loc_opc
+	|| x->dtprel != y->dtprel
+	|| !compare_loc_operands (x, y))
+      break;
+  return x == NULL && y == NULL;
+}
+
+/* Return precomputed hash of location list X.  */
+
+static hashval_t
+loc_list_hash (const void *x)
+{
+  return ((const struct dw_loc_list_struct *) x)->hash;
+}
+
+/* Return 1 if location lists X and Y are the same.  */
+
+static int
+loc_list_eq (const void *x, const void *y)
+{
+  const struct dw_loc_list_struct *a = (const struct dw_loc_list_struct *) x;
+  const struct dw_loc_list_struct *b = (const struct dw_loc_list_struct *) y;
+  if (a == b)
+    return 1;
+  if (a->hash != b->hash)
+    return 0;
+  for (; a != NULL && b != NULL; a = a->dw_loc_next, b = b->dw_loc_next)
+    if (strcmp (a->begin, b->begin) != 0
+	|| strcmp (a->end, b->end) != 0
+	|| (a->section == NULL) != (b->section == NULL)
+	|| (a->section && strcmp (a->section, b->section) != 0)
+	|| !compare_locs (a->expr, b->expr))
+      break;
+  return a == NULL && b == NULL;
+}
+
+/* Recursively optimize location lists referenced from DIE
+   children and share them whenever possible.  */
+
+static void
+optimize_location_lists_1 (dw_die_ref die, htab_t htab)
+{
+  dw_die_ref c;
+  dw_attr_ref a;
+  unsigned ix;
+  void **slot;
+
+  FOR_EACH_VEC_ELT (dw_attr_node, die->die_attr, ix, a)
+    if (AT_class (a) == dw_val_class_loc_list)
+      {
+	dw_loc_list_ref list = AT_loc_list (a);
+	/* TODO: perform some optimizations here, before hashing
+	   it and storing into the hash table.  */
+	hash_loc_list (list);
+	slot = htab_find_slot_with_hash (htab, list, list->hash,
+					 INSERT);
+	if (*slot == NULL)
+	  *slot = (void *) list;
+	else
+	  a->dw_attr_val.v.val_loc_list = (dw_loc_list_ref) *slot;
+      }
+
+  FOR_EACH_CHILD (die, c, optimize_location_lists_1 (c, htab));
+}
+
+/* Optimize location lists referenced from DIE
+   children and share them whenever possible.  */
+
+static void
+optimize_location_lists (dw_die_ref die)
+{
+  htab_t htab = htab_create (500, loc_list_hash, loc_list_eq, NULL);
+  optimize_location_lists_1 (die, htab);
+  htab_delete (htab);
+}
+
 /* Output stuff that dwarf requires at the end of every file,
    and generate the DWARF-2 debugging info.  */
 
@@ -22621,6 +23069,9 @@  dwarf2out_finish (const char *filename)
   if (debug_info_level >= DINFO_LEVEL_VERBOSE)
     add_AT_macptr (comp_unit_die (), DW_AT_macro_info, macinfo_section_label);
 
+  if (have_location_lists)
+    optimize_location_lists (die);
+
   /* Output all of the compilation units.  We put the main one last so that
      the offsets are available to output_pubnames.  */
   for (node = limbo_die_list; node; node = node->next)