diff mbox series

[7/9,libbacktrace] Handle DW_FORM_GNU_ref_alt

Message ID 20181211101411.7067-8-tdevries@suse.de
State New
Headers show
Series Handle .gnu_debugaltlink | expand

Commit Message

Tom de Vries Dec. 11, 2018, 10:14 a.m. UTC
2018-12-10  Tom de Vries  <tdevries@suse.de>

	* dwarf.c (enum attr_val_encoding): Add ATTR_VAL_REF_ALT_INFO.
	(struct unit): Add low and high fields.
	(struct unit_vector): New type.
	(struct dwarf_data): Add units and units_counts fields.
	(read_attribute): Handle DW_FORM_GNU_ref_alt using
	ATTR_VAL_REF_ALT_INFO.
	(find_unit): New function.
	(find_address_ranges): Add and handle unit_tag parameter.
	(build_address_map): Add and handle units_vec parameter.
	(read_referenced_name_1): Handle DW_FORM_GNU_ref_alt.
	(build_dwarf_data): Pass units_vec to build_address_map.  Store resulting
	units vector.
---
 libbacktrace/dwarf.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 91 insertions(+), 10 deletions(-)

Comments

Tom de Vries Jan. 17, 2019, 12:17 a.m. UTC | #1
Hi,

this handles DW_FORM_GNU_ref_alt which references the .debug_info
section in the .gnu_debugaltlink file.

OK for trunk?

Thanks,
- Tom

On 11-12-18 11:14, Tom de Vries wrote:
> 2018-12-10  Tom de Vries  <tdevries@suse.de>
> 
> 	* dwarf.c (enum attr_val_encoding): Add ATTR_VAL_REF_ALT_INFO.
> 	(struct unit): Add low and high fields.
> 	(struct unit_vector): New type.
> 	(struct dwarf_data): Add units and units_counts fields.
> 	(read_attribute): Handle DW_FORM_GNU_ref_alt using
> 	ATTR_VAL_REF_ALT_INFO.
> 	(find_unit): New function.
> 	(find_address_ranges): Add and handle unit_tag parameter.
> 	(build_address_map): Add and handle units_vec parameter.
> 	(read_referenced_name_1): Handle DW_FORM_GNU_ref_alt.
> 	(build_dwarf_data): Pass units_vec to build_address_map.  Store resulting
> 	units vector.
> ---
>  libbacktrace/dwarf.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 91 insertions(+), 10 deletions(-)
> 
> diff --git a/libbacktrace/dwarf.c b/libbacktrace/dwarf.c
> index 99e5f4c3f51..9a0b93120c8 100644
> --- a/libbacktrace/dwarf.c
> +++ b/libbacktrace/dwarf.c
> @@ -143,6 +143,8 @@ enum attr_val_encoding
>    ATTR_VAL_REF_UNIT,
>    /* An offset to other data within the .dwarf_info section.  */
>    ATTR_VAL_REF_INFO,
> +  /* An offset to other data within the alt .dwarf_info section.  */
> +  ATTR_VAL_REF_ALT_INFO,
>    /* An offset to data in some other section.  */
>    ATTR_VAL_REF_SECTION,
>    /* A type signature.  */
> @@ -281,6 +283,10 @@ struct unit
>    /* The offset of UNIT_DATA from the start of the information for
>       this compilation unit.  */
>    size_t unit_data_offset;
> +  /* Start of the compilation unit.  */
> +  size_t low;
> +  /* End of the compilation unit.  */
> +  size_t high;
>    /* DWARF version.  */
>    int version;
>    /* Whether unit is DWARF64.  */
> @@ -339,6 +345,14 @@ struct unit_addrs_vector
>    size_t count;
>  };
>  
> +/* A growable vector of compilation unit pointer.  */
> +
> +struct unit_vector
> +{
> +  struct backtrace_vector vec;
> +  size_t count;
> +};
> +
>  /* The information we need to map a PC to a file and line.  */
>  
>  struct dwarf_data
> @@ -353,6 +367,10 @@ struct dwarf_data
>    struct unit_addrs *addrs;
>    /* Number of address ranges in list.  */
>    size_t addrs_count;
> +  /* A sorted list of units.  */
> +  struct unit **units;
> +  /* Number of units in the list.  */
> +  size_t units_count;
>    /* The unparsed .debug_info section.  */
>    const unsigned char *dwarf_info;
>    size_t dwarf_info_size;
> @@ -840,7 +858,7 @@ read_attribute (enum dwarf_form form, struct dwarf_buf *buf,
>  	  val->encoding = ATTR_VAL_NONE;
>  	  return 1;
>  	}
> -      val->encoding = ATTR_VAL_REF_SECTION;
> +      val->encoding = ATTR_VAL_REF_ALT_INFO;
>        return 1;
>      case DW_FORM_GNU_strp_alt:
>        {
> @@ -866,6 +884,34 @@ read_attribute (enum dwarf_form form, struct dwarf_buf *buf,
>      }
>  }
>  
> +/* Compare a unit offset against a unit for bsearch.  */
> +
> +static int
> +units_search (const void *vkey, const void *ventry)
> +{
> +  const uintptr_t *key = (const uintptr_t *) vkey;
> +  const struct unit *entry = *((const struct unit *const *) ventry);
> +  uintptr_t offset;
> +
> +  offset = *key;
> +  if (offset < entry->low)
> +    return -1;
> +  else if (offset >= entry->high)
> +    return 1;
> +  else
> +    return 0;
> +}
> +
> +/* Find a unit in PU containing OFFSET.  */
> +
> +static struct unit *
> +find_unit (struct unit **pu, size_t units_count, size_t offset)
> +{
> +  struct unit **u;
> +  u = bsearch (&offset, pu, units_count, sizeof (struct unit *), units_search);
> +  return u == NULL ? NULL : *u;
> +}
> +
>  /* Compare function_addrs for qsort.  When ranges are nested, make the
>     smallest one sort last.  */
>  
> @@ -1299,7 +1345,7 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
>  		     int is_bigendian, backtrace_error_callback error_callback,
>  		     void *data, struct unit *u,
>  		     struct unit_addrs_vector *addrs,
> -		     struct dwarf_data *altlink)
> +		     struct dwarf_data *altlink, enum dwarf_tag *unit_tag)
>  {
>    while (unit_buf->left > 0)
>      {
> @@ -1322,6 +1368,9 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
>        if (abbrev == NULL)
>  	return 0;
>  
> +      if (unit_tag != NULL)
> +	*unit_tag = abbrev->tag;
> +
>        lowpc = 0;
>        have_lowpc = 0;
>        highpc = 0;
> @@ -1434,7 +1483,7 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
>  				    dwarf_str, dwarf_str_size,
>  				    dwarf_ranges, dwarf_ranges_size,
>  				    is_bigendian, error_callback, data,
> -				    u, addrs, altlink))
> +				    u, addrs, altlink, NULL))
>  	    return 0;
>  	}
>      }
> @@ -1454,6 +1503,7 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
>  		   const unsigned char *dwarf_str, size_t dwarf_str_size,
>  		   int is_bigendian, backtrace_error_callback error_callback,
>  		   void *data, struct unit_addrs_vector *addrs,
> +		   struct unit_vector *unit_vec,
>  		   struct dwarf_data *altlink)
>  {
>    struct dwarf_buf info;
> @@ -1462,9 +1512,12 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
>    size_t i;
>    struct unit **pu;
>    size_t prev_addrs_count;
> +  size_t unit_offset = 0;
>  
>    memset (&addrs->vec, 0, sizeof addrs->vec);
> +  memset (&unit_vec->vec, 0, sizeof unit_vec->vec);
>    addrs->count = 0;
> +  unit_vec->count = 0;
>    prev_addrs_count = 0;
>  
>    /* Read through the .debug_info section.  FIXME: Should we use the
> @@ -1493,6 +1546,7 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
>        uint64_t abbrev_offset;
>        int addrsize;
>        struct unit *u;
> +      enum dwarf_tag unit_tag;
>  
>        if (info.reported_underflow)
>  	goto fail;
> @@ -1535,6 +1589,9 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
>  
>        addrsize = read_byte (&unit_buf);
>  
> +      u->low = unit_offset;
> +      unit_offset += len + (is_dwarf64 ? 12 : 4);
> +      u->high = unit_offset;
>        u->unit_data = unit_buf.buf;
>        u->unit_data_len = unit_buf.left;
>        u->unit_data_offset = unit_buf.buf - unit_data_start;
> @@ -1556,13 +1613,13 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
>  				dwarf_str, dwarf_str_size,
>  				dwarf_ranges, dwarf_ranges_size,
>  				is_bigendian, error_callback, data,
> -				u, addrs, altlink))
> +				u, addrs, altlink, &unit_tag))
>  	goto fail;
>  
>        if (unit_buf.reported_underflow)
>  	goto fail;
>  
> -      if (addrs->count == prev_addrs_count)
> +      if (unit_tag != DW_TAG_partial_unit && addrs->count == prev_addrs_count)
>  	{
>  	  --units_count;
>  	  units.size -= sizeof (u);
> @@ -1576,11 +1633,8 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
>    if (info.reported_underflow)
>      goto fail;
>  
> -  // We only kept the list of units to free them on failure.  On
> -  // success the units are retained, pointed to by the entries in
> -  // addrs.
> -  backtrace_vector_free (state, &units, error_callback, data);
> -
> +  unit_vec->vec = units;
> +  unit_vec->count = units_count;
>    return 1;
>  
>   fail:
> @@ -2144,6 +2198,22 @@ read_referenced_name_1 (struct dwarf_data *ddata, struct unit *u,
>        || val->encoding == ATTR_VAL_REF_UNIT)
>      return read_referenced_name (ddata, u, val->u.uint, error_callback, data);
>  
> +  if (val->encoding == ATTR_VAL_REF_ALT_INFO)
> +    {
> +      struct unit *alt_unit
> +	= find_unit (ddata->altlink->units, ddata->altlink->units_count,
> +		     val->u.uint);
> +      if (alt_unit == NULL)
> +	{
> +	  error_callback (data,
> +			  "Could not find unit for DW_FORM_GNU_ref_alt", 0);
> +	  return NULL;
> +	}
> +      uint64_t unit_offset = val->u.uint - alt_unit->low;
> +      return read_referenced_name (ddata->altlink, alt_unit, unit_offset,
> +				   error_callback, data);
> +    }
> +
>    return NULL;
>  }
>  
> @@ -3028,21 +3098,30 @@ build_dwarf_data (struct backtrace_state *state,
>    struct unit_addrs_vector addrs_vec;
>    struct unit_addrs *addrs;
>    size_t addrs_count;
> +  struct unit_vector units_vec;
> +  struct unit **units;
> +  size_t units_count;
>    struct dwarf_data *fdata;
>  
>    if (!build_address_map (state, base_address, dwarf_info, dwarf_info_size,
>  			  dwarf_abbrev, dwarf_abbrev_size, dwarf_ranges,
>  			  dwarf_ranges_size, dwarf_str, dwarf_str_size,
>  			  is_bigendian, error_callback, data, &addrs_vec,
> +			  &units_vec,
>  			  altlink))
>      return NULL;
>  
>    if (!backtrace_vector_release (state, &addrs_vec.vec, error_callback, data))
>      return NULL;
> +  if (!backtrace_vector_release (state, &units_vec.vec, error_callback, data))
> +    return NULL;
>    addrs = (struct unit_addrs *) addrs_vec.vec.base;
> +  units = (struct unit **) units_vec.vec.base;
>    addrs_count = addrs_vec.count;
> +  units_count = units_vec.count;
>    backtrace_qsort (addrs, addrs_count, sizeof (struct unit_addrs),
>  		   unit_addrs_compare);
> +  /* No qsort for units required, already sorted.  */
>  
>    fdata = ((struct dwarf_data *)
>  	   backtrace_alloc (state, sizeof (struct dwarf_data),
> @@ -3055,6 +3134,8 @@ build_dwarf_data (struct backtrace_state *state,
>    fdata->base_address = base_address;
>    fdata->addrs = addrs;
>    fdata->addrs_count = addrs_count;
> +  fdata->units = units;
> +  fdata->units_count = units_count;
>    fdata->dwarf_info = dwarf_info;
>    fdata->dwarf_info_size = dwarf_info_size;
>    fdata->dwarf_line = dwarf_line;
>
Ian Lance Taylor Jan. 17, 2019, 12:35 a.m. UTC | #2
On Wed, Jan 16, 2019 at 4:17 PM Tom de Vries <tdevries@suse.de> wrote:
>
> this handles DW_FORM_GNU_ref_alt which references the .debug_info
> section in the .gnu_debugaltlink file.
>
> OK for trunk?
>
> Thanks,
> - Tom
>
> On 11-12-18 11:14, Tom de Vries wrote:
> > 2018-12-10  Tom de Vries  <tdevries@suse.de>
> >
> >       * dwarf.c (enum attr_val_encoding): Add ATTR_VAL_REF_ALT_INFO.
> >       (struct unit): Add low and high fields.
> >       (struct unit_vector): New type.
> >       (struct dwarf_data): Add units and units_counts fields.
> >       (read_attribute): Handle DW_FORM_GNU_ref_alt using
> >       ATTR_VAL_REF_ALT_INFO.
> >       (find_unit): New function.
> >       (find_address_ranges): Add and handle unit_tag parameter.
> >       (build_address_map): Add and handle units_vec parameter.
> >       (read_referenced_name_1): Handle DW_FORM_GNU_ref_alt.
> >       (build_dwarf_data): Pass units_vec to build_address_map.  Store resulting
> >       units vector.


> > @@ -281,6 +283,10 @@ struct unit
> >    /* The offset of UNIT_DATA from the start of the information for
> >       this compilation unit.  */
> >    size_t unit_data_offset;
> > +  /* Start of the compilation unit.  */
> > +  size_t low;
> > +  /* End of the compilation unit.  */
> > +  size_t high;

The comments should say what low and high are measured in, which I
guess is file offset.  Or is it offset from the start of UNIT_DATA?
Either way, If that is right, then the fields should be named
low_offset and high_offset.  Otherwise it seems easy to confuse with
function_addrs, where low and high refer to PC values.

Also if they are offsets from UNIT_DATA then size_t is OK, but if the
are file offsets they should be off_t.


> > @@ -2144,6 +2198,22 @@ read_referenced_name_1 (struct dwarf_data *ddata, struct unit *u,
> >        || val->encoding == ATTR_VAL_REF_UNIT)
> >      return read_referenced_name (ddata, u, val->u.uint, error_callback, data);
> >
> > +  if (val->encoding == ATTR_VAL_REF_ALT_INFO)
> > +    {
> > +      struct unit *alt_unit
> > +     = find_unit (ddata->altlink->units, ddata->altlink->units_count,
> > +                  val->u.uint);
> > +      if (alt_unit == NULL)
> > +     {
> > +       error_callback (data,
> > +                       "Could not find unit for DW_FORM_GNU_ref_alt", 0);

s/Could/could/

or maybe just skip this error_callback call as discussed earlier.


> > +       return NULL;
> > +     }
> > +      uint64_t unit_offset = val->u.uint - alt_unit->low;

Earlier a unit_offset was the offset of the unit within unit_data, but
here this is an offset within a single unit.  This should just be
called offset, which is the name used by read_referenced_name.

This is OK with those changes.

Thanks.

Ian
Tom de Vries Jan. 17, 2019, 2:14 p.m. UTC | #3
On 17-01-19 01:35, Ian Lance Taylor wrote:
> On Wed, Jan 16, 2019 at 4:17 PM Tom de Vries <tdevries@suse.de> wrote:
>>
>> this handles DW_FORM_GNU_ref_alt which references the .debug_info
>> section in the .gnu_debugaltlink file.
>>
>> OK for trunk?
>>
>> Thanks,
>> - Tom
>>
>> On 11-12-18 11:14, Tom de Vries wrote:
>>> 2018-12-10  Tom de Vries  <tdevries@suse.de>
>>>
>>>       * dwarf.c (enum attr_val_encoding): Add ATTR_VAL_REF_ALT_INFO.
>>>       (struct unit): Add low and high fields.
>>>       (struct unit_vector): New type.
>>>       (struct dwarf_data): Add units and units_counts fields.
>>>       (read_attribute): Handle DW_FORM_GNU_ref_alt using
>>>       ATTR_VAL_REF_ALT_INFO.
>>>       (find_unit): New function.
>>>       (find_address_ranges): Add and handle unit_tag parameter.
>>>       (build_address_map): Add and handle units_vec parameter.
>>>       (read_referenced_name_1): Handle DW_FORM_GNU_ref_alt.
>>>       (build_dwarf_data): Pass units_vec to build_address_map.  Store resulting
>>>       units vector.
> 
> 
>>> @@ -281,6 +283,10 @@ struct unit
>>>    /* The offset of UNIT_DATA from the start of the information for
>>>       this compilation unit.  */
>>>    size_t unit_data_offset;
>>> +  /* Start of the compilation unit.  */
>>> +  size_t low;
>>> +  /* End of the compilation unit.  */
>>> +  size_t high;
> 
> The comments should say what low and high are measured in, which I
> guess is file offset.  Or is it offset from the start of UNIT_DATA?
> Either way, If that is right, then the fields should be named
> low_offset and high_offset.  Otherwise it seems easy to confuse with
> function_addrs, where low and high refer to PC values.
> 

Done.

> Also if they are offsets from UNIT_DATA then size_t is OK, but if the
> are file offsets they should be off_t.
> 

AFAIU, in the case where off_t vs size_t would make a difference, we're
running into trouble much earlier.  I've filed PR 88890 - "libbacktrace
on 32-bit system with _FILE_OFFSET_BITS == 64" (
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88890 ) about this.

Anyway, I've made the conservative choice of using off_t for now (but I
could argue that it's a memory offset, given that the assumption is that
the entire debug section is read into memory).

>>> @@ -2144,6 +2198,22 @@ read_referenced_name_1 (struct dwarf_data *ddata, struct unit *u,
>>>        || val->encoding == ATTR_VAL_REF_UNIT)
>>>      return read_referenced_name (ddata, u, val->u.uint, error_callback, data);
>>>
>>> +  if (val->encoding == ATTR_VAL_REF_ALT_INFO)
>>> +    {
>>> +      struct unit *alt_unit
>>> +     = find_unit (ddata->altlink->units, ddata->altlink->units_count,
>>> +                  val->u.uint);
>>> +      if (alt_unit == NULL)
>>> +     {
>>> +       error_callback (data,
>>> +                       "Could not find unit for DW_FORM_GNU_ref_alt", 0);
> 
> s/Could/could/
> 
> or maybe just skip this error_callback call as discussed earlier.
> 
> 

Skipped.

>>> +       return NULL;
>>> +     }
>>> +      uint64_t unit_offset = val->u.uint - alt_unit->low;
> 
> Earlier a unit_offset was the offset of the unit within unit_data, but
> here this is an offset within a single unit.  This should just be
> called offset, which is the name used by read_referenced_name.
> 

Done.

> This is OK with those changes.

Committed in two parts.

First part ...
[libbacktrace] Add find_unit

Add a function that finds the unit given an offset into .debug_info.

2018-12-10  Tom de Vries  <tdevries@suse.de>

	* dwarf.c (struct unit): Add low_offset and high_offset fields.
	(struct unit_vector): New type.
	(struct dwarf_data): Add units and units_counts fields.
	(find_unit): New function.
	(find_address_ranges): Add and handle unit_tag parameter.
	(build_address_map): Add and handle units_vec parameter.
	(build_dwarf_data): Pass units_vec to build_address_map.  Store resulting
	units vector.

---
 libbacktrace/dwarf.c | 87 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 76 insertions(+), 11 deletions(-)

diff --git a/libbacktrace/dwarf.c b/libbacktrace/dwarf.c
index 45691b4ba69..6f56c46774b 100644
--- a/libbacktrace/dwarf.c
+++ b/libbacktrace/dwarf.c
@@ -281,6 +281,12 @@ struct unit
   /* The offset of UNIT_DATA from the start of the information for
      this compilation unit.  */
   size_t unit_data_offset;
+  /* Offset of the start of the compilation unit from the start of the
+     .debug_info section.  */
+  off_t low_offset;
+  /* Offset of the end of the compilation unit from the start of the
+     .debug_info section.  */
+  off_t high_offset;
   /* DWARF version.  */
   int version;
   /* Whether unit is DWARF64.  */
@@ -339,6 +345,14 @@ struct unit_addrs_vector
   size_t count;
 };
 
+/* A growable vector of compilation unit pointer.  */
+
+struct unit_vector
+{
+  struct backtrace_vector vec;
+  size_t count;
+};
+
 /* The information we need to map a PC to a file and line.  */
 
 struct dwarf_data
@@ -353,6 +367,10 @@ struct dwarf_data
   struct unit_addrs *addrs;
   /* Number of address ranges in list.  */
   size_t addrs_count;
+  /* A sorted list of units.  */
+  struct unit **units;
+  /* Number of units in the list.  */
+  size_t units_count;
   /* The unparsed .debug_info section.  */
   const unsigned char *dwarf_info;
   size_t dwarf_info_size;
@@ -866,6 +884,34 @@ read_attribute (enum dwarf_form form, struct dwarf_buf *buf,
     }
 }
 
+/* Compare a unit offset against a unit for bsearch.  */
+
+static int
+units_search (const void *vkey, const void *ventry)
+{
+  const off_t *key = (const off_t *) vkey;
+  const struct unit *entry = *((const struct unit *const *) ventry);
+  off_t offset;
+
+  offset = *key;
+  if (offset < entry->low_offset)
+    return -1;
+  else if (offset >= entry->high_offset)
+    return 1;
+  else
+    return 0;
+}
+
+/* Find a unit in PU containing OFFSET.  */
+
+static struct unit *
+find_unit (struct unit **pu, size_t units_count, off_t offset)
+{
+  struct unit **u;
+  u = bsearch (&offset, pu, units_count, sizeof (struct unit *), units_search);
+  return u == NULL ? NULL : *u;
+}
+
 /* Compare function_addrs for qsort.  When ranges are nested, make the
    smallest one sort last.  */
 
@@ -1298,7 +1344,8 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
 		     size_t dwarf_ranges_size,
 		     int is_bigendian, struct dwarf_data *altlink,
 		     backtrace_error_callback error_callback, void *data,
-		     struct unit *u, struct unit_addrs_vector *addrs)
+		     struct unit *u, struct unit_addrs_vector *addrs,
+		     enum dwarf_tag *unit_tag)
 {
   while (unit_buf->left > 0)
     {
@@ -1321,6 +1368,9 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
       if (abbrev == NULL)
 	return 0;
 
+      if (unit_tag != NULL)
+	*unit_tag = abbrev->tag;
+
       lowpc = 0;
       have_lowpc = 0;
       highpc = 0;
@@ -1433,7 +1483,7 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
 				    dwarf_str, dwarf_str_size,
 				    dwarf_ranges, dwarf_ranges_size,
 				    is_bigendian, altlink, error_callback, data,
-				    u, addrs))
+				    u, addrs, NULL))
 	    return 0;
 	}
     }
@@ -1453,7 +1503,8 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
 		   const unsigned char *dwarf_str, size_t dwarf_str_size,
 		   int is_bigendian, struct dwarf_data *altlink,
 		   backtrace_error_callback error_callback, void *data,
-		   struct unit_addrs_vector *addrs)
+		   struct unit_addrs_vector *addrs,
+		   struct unit_vector *unit_vec)
 {
   struct dwarf_buf info;
   struct backtrace_vector units;
@@ -1461,9 +1512,12 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
   size_t i;
   struct unit **pu;
   size_t prev_addrs_count;
+  off_t unit_offset = 0;
 
   memset (&addrs->vec, 0, sizeof addrs->vec);
+  memset (&unit_vec->vec, 0, sizeof unit_vec->vec);
   addrs->count = 0;
+  unit_vec->count = 0;
   prev_addrs_count = 0;
 
   /* Read through the .debug_info section.  FIXME: Should we use the
@@ -1492,6 +1546,7 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
       uint64_t abbrev_offset;
       int addrsize;
       struct unit *u;
+      enum dwarf_tag unit_tag;
 
       if (info.reported_underflow)
 	goto fail;
@@ -1534,6 +1589,9 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
 
       addrsize = read_byte (&unit_buf);
 
+      u->low_offset = unit_offset;
+      unit_offset += len + (is_dwarf64 ? 12 : 4);
+      u->high_offset = unit_offset;
       u->unit_data = unit_buf.buf;
       u->unit_data_len = unit_buf.left;
       u->unit_data_offset = unit_buf.buf - unit_data_start;
@@ -1555,13 +1613,13 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
 				dwarf_str, dwarf_str_size,
 				dwarf_ranges, dwarf_ranges_size,
 				is_bigendian, altlink, error_callback, data,
-				u, addrs))
+				u, addrs, &unit_tag))
 	goto fail;
 
       if (unit_buf.reported_underflow)
 	goto fail;
 
-      if (addrs->count > prev_addrs_count)
+      if (addrs->count > prev_addrs_count || unit_tag == DW_TAG_partial_unit)
 	prev_addrs_count = addrs->count;
       else
 	{
@@ -1576,11 +1634,8 @@ build_address_map (struct backtrace_state *state, uintptr_t base_address,
   if (info.reported_underflow)
     goto fail;
 
-  // We only kept the list of units to free them on failure.  On
-  // success the units are retained, pointed to by the entries in
-  // addrs.
-  backtrace_vector_free (state, &units, error_callback, data);
-
+  unit_vec->vec = units;
+  unit_vec->count = units_count;
   return 1;
 
  fail:
@@ -3031,21 +3086,29 @@ build_dwarf_data (struct backtrace_state *state,
   struct unit_addrs_vector addrs_vec;
   struct unit_addrs *addrs;
   size_t addrs_count;
+  struct unit_vector units_vec;
+  struct unit **units;
+  size_t units_count;
   struct dwarf_data *fdata;
 
   if (!build_address_map (state, base_address, dwarf_info, dwarf_info_size,
 			  dwarf_abbrev, dwarf_abbrev_size, dwarf_ranges,
 			  dwarf_ranges_size, dwarf_str, dwarf_str_size,
 			  is_bigendian, altlink, error_callback, data,
-			  &addrs_vec))
+			  &addrs_vec, &units_vec))
     return NULL;
 
   if (!backtrace_vector_release (state, &addrs_vec.vec, error_callback, data))
     return NULL;
+  if (!backtrace_vector_release (state, &units_vec.vec, error_callback, data))
+    return NULL;
   addrs = (struct unit_addrs *) addrs_vec.vec.base;
+  units = (struct unit **) units_vec.vec.base;
   addrs_count = addrs_vec.count;
+  units_count = units_vec.count;
   backtrace_qsort (addrs, addrs_count, sizeof (struct unit_addrs),
 		   unit_addrs_compare);
+  /* No qsort for units required, already sorted.  */
 
   fdata = ((struct dwarf_data *)
 	   backtrace_alloc (state, sizeof (struct dwarf_data),
@@ -3058,6 +3121,8 @@ build_dwarf_data (struct backtrace_state *state,
   fdata->base_address = base_address;
   fdata->addrs = addrs;
   fdata->addrs_count = addrs_count;
+  fdata->units = units;
+  fdata->units_count = units_count;
   fdata->dwarf_info = dwarf_info;
   fdata->dwarf_info_size = dwarf_info_size;
   fdata->dwarf_line = dwarf_line;
Tom de Vries Jan. 17, 2019, 2:15 p.m. UTC | #4
On 17-01-19 15:14, Tom de Vries wrote:
> On 17-01-19 01:35, Ian Lance Taylor wrote:
>> On Wed, Jan 16, 2019 at 4:17 PM Tom de Vries <tdevries@suse.de> wrote:
>>>
>>> this handles DW_FORM_GNU_ref_alt which references the .debug_info
>>> section in the .gnu_debugaltlink file.
>>>
>>> OK for trunk?
>>>
>>> Thanks,
>>> - Tom
>>>
>>> On 11-12-18 11:14, Tom de Vries wrote:
>>>> 2018-12-10  Tom de Vries  <tdevries@suse.de>
>>>>
>>>>       * dwarf.c (enum attr_val_encoding): Add ATTR_VAL_REF_ALT_INFO.
>>>>       (struct unit): Add low and high fields.
>>>>       (struct unit_vector): New type.
>>>>       (struct dwarf_data): Add units and units_counts fields.
>>>>       (read_attribute): Handle DW_FORM_GNU_ref_alt using
>>>>       ATTR_VAL_REF_ALT_INFO.
>>>>       (find_unit): New function.
>>>>       (find_address_ranges): Add and handle unit_tag parameter.
>>>>       (build_address_map): Add and handle units_vec parameter.
>>>>       (read_referenced_name_1): Handle DW_FORM_GNU_ref_alt.
>>>>       (build_dwarf_data): Pass units_vec to build_address_map.  Store resulting
>>>>       units vector.
>>
>>
>>>> @@ -281,6 +283,10 @@ struct unit
>>>>    /* The offset of UNIT_DATA from the start of the information for
>>>>       this compilation unit.  */
>>>>    size_t unit_data_offset;
>>>> +  /* Start of the compilation unit.  */
>>>> +  size_t low;
>>>> +  /* End of the compilation unit.  */
>>>> +  size_t high;
>>
>> The comments should say what low and high are measured in, which I
>> guess is file offset.  Or is it offset from the start of UNIT_DATA?
>> Either way, If that is right, then the fields should be named
>> low_offset and high_offset.  Otherwise it seems easy to confuse with
>> function_addrs, where low and high refer to PC values.
>>
> 
> Done.
> 
>> Also if they are offsets from UNIT_DATA then size_t is OK, but if the
>> are file offsets they should be off_t.
>>
> 
> AFAIU, in the case where off_t vs size_t would make a difference, we're
> running into trouble much earlier.  I've filed PR 88890 - "libbacktrace
> on 32-bit system with _FILE_OFFSET_BITS == 64" (
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88890 ) about this.
> 
> Anyway, I've made the conservative choice of using off_t for now (but I
> could argue that it's a memory offset, given that the assumption is that
> the entire debug section is read into memory).
> 
>>>> @@ -2144,6 +2198,22 @@ read_referenced_name_1 (struct dwarf_data *ddata, struct unit *u,
>>>>        || val->encoding == ATTR_VAL_REF_UNIT)
>>>>      return read_referenced_name (ddata, u, val->u.uint, error_callback, data);
>>>>
>>>> +  if (val->encoding == ATTR_VAL_REF_ALT_INFO)
>>>> +    {
>>>> +      struct unit *alt_unit
>>>> +     = find_unit (ddata->altlink->units, ddata->altlink->units_count,
>>>> +                  val->u.uint);
>>>> +      if (alt_unit == NULL)
>>>> +     {
>>>> +       error_callback (data,
>>>> +                       "Could not find unit for DW_FORM_GNU_ref_alt", 0);
>>
>> s/Could/could/
>>
>> or maybe just skip this error_callback call as discussed earlier.
>>
>>
> 
> Skipped.
> 
>>>> +       return NULL;
>>>> +     }
>>>> +      uint64_t unit_offset = val->u.uint - alt_unit->low;
>>
>> Earlier a unit_offset was the offset of the unit within unit_data, but
>> here this is an offset within a single unit.  This should just be
>> called offset, which is the name used by read_referenced_name.
>>
> 
> Done.
> 
>> This is OK with those changes.
> 
> Committed in two parts.
> 
> First part ...
> 

And second part.

Thanks,
- Tom
[libbacktrace] Handle DW_FORM_GNU_ref_alt

Handle DW_FORM_GNU_ref_alt which references the .debug_info section in the
.gnu_debugaltlink file.

2018-12-10  Tom de Vries  <tdevries@suse.de>

	PR libbacktrace/82857
	* dwarf.c (enum attr_val_encoding): Add ATTR_VAL_REF_ALT_INFO.
	(read_attribute): Handle DW_FORM_GNU_ref_alt using
	ATTR_VAL_REF_ALT_INFO.
	(read_referenced_name_from_attr): Handle DW_FORM_GNU_ref_alt.

---
 libbacktrace/dwarf.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/libbacktrace/dwarf.c b/libbacktrace/dwarf.c
index 6f56c46774b..aacbd3a453d 100644
--- a/libbacktrace/dwarf.c
+++ b/libbacktrace/dwarf.c
@@ -143,6 +143,8 @@ enum attr_val_encoding
   ATTR_VAL_REF_UNIT,
   /* An offset to other data within the .dwarf_info section.  */
   ATTR_VAL_REF_INFO,
+  /* An offset to other data within the alt .dwarf_info section.  */
+  ATTR_VAL_REF_ALT_INFO,
   /* An offset to data in some other section.  */
   ATTR_VAL_REF_SECTION,
   /* A type signature.  */
@@ -858,7 +860,7 @@ read_attribute (enum dwarf_form form, struct dwarf_buf *buf,
 	  val->encoding = ATTR_VAL_NONE;
 	  return 1;
 	}
-      val->encoding = ATTR_VAL_REF_SECTION;
+      val->encoding = ATTR_VAL_REF_ALT_INFO;
       return 1;
     case DW_FORM_GNU_strp_alt:
       {
@@ -2200,6 +2202,19 @@ read_referenced_name_from_attr (struct dwarf_data *ddata, struct unit *u,
       || val->encoding == ATTR_VAL_REF_UNIT)
     return read_referenced_name (ddata, u, val->u.uint, error_callback, data);
 
+  if (val->encoding == ATTR_VAL_REF_ALT_INFO)
+    {
+      struct unit *alt_unit
+	= find_unit (ddata->altlink->units, ddata->altlink->units_count,
+		     val->u.uint);
+      if (alt_unit == NULL)
+	return NULL;
+
+      uint64_t offset = val->u.uint - alt_unit->low_offset;
+      return read_referenced_name (ddata->altlink, alt_unit, offset,
+				   error_callback, data);
+    }
+
   return NULL;
 }
Ian Lance Taylor Jan. 18, 2019, 2:25 p.m. UTC | #5
On Thu, Jan 17, 2019 at 6:14 AM Tom de Vries <tdevries@suse.de> wrote:
>
> On 17-01-19 01:35, Ian Lance Taylor wrote:
> > On Wed, Jan 16, 2019 at 4:17 PM Tom de Vries <tdevries@suse.de> wrote:
> >>
> >> this handles DW_FORM_GNU_ref_alt which references the .debug_info
> >> section in the .gnu_debugaltlink file.
> >>
> >> OK for trunk?
> >>
> >> Thanks,
> >> - Tom
> >>
> >> On 11-12-18 11:14, Tom de Vries wrote:
> >>> 2018-12-10  Tom de Vries  <tdevries@suse.de>
> >>>
> >>>       * dwarf.c (enum attr_val_encoding): Add ATTR_VAL_REF_ALT_INFO.
> >>>       (struct unit): Add low and high fields.
> >>>       (struct unit_vector): New type.
> >>>       (struct dwarf_data): Add units and units_counts fields.
> >>>       (read_attribute): Handle DW_FORM_GNU_ref_alt using
> >>>       ATTR_VAL_REF_ALT_INFO.
> >>>       (find_unit): New function.
> >>>       (find_address_ranges): Add and handle unit_tag parameter.
> >>>       (build_address_map): Add and handle units_vec parameter.
> >>>       (read_referenced_name_1): Handle DW_FORM_GNU_ref_alt.
> >>>       (build_dwarf_data): Pass units_vec to build_address_map.  Store resulting
> >>>       units vector.
> >
> >
> >>> @@ -281,6 +283,10 @@ struct unit
> >>>    /* The offset of UNIT_DATA from the start of the information for
> >>>       this compilation unit.  */
> >>>    size_t unit_data_offset;
> >>> +  /* Start of the compilation unit.  */
> >>> +  size_t low;
> >>> +  /* End of the compilation unit.  */
> >>> +  size_t high;
> >
> > The comments should say what low and high are measured in, which I
> > guess is file offset.  Or is it offset from the start of UNIT_DATA?
> > Either way, If that is right, then the fields should be named
> > low_offset and high_offset.  Otherwise it seems easy to confuse with
> > function_addrs, where low and high refer to PC values.
> >
>
> Done.
>
> > Also if they are offsets from UNIT_DATA then size_t is OK, but if the
> > are file offsets they should be off_t.
> >
>
> AFAIU, in the case where off_t vs size_t would make a difference, we're
> running into trouble much earlier.  I've filed PR 88890 - "libbacktrace
> on 32-bit system with _FILE_OFFSET_BITS == 64" (
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88890 ) about this.
>
> Anyway, I've made the conservative choice of using off_t for now (but I
> could argue that it's a memory offset, given that the assumption is that
> the entire debug section is read into memory).

Since the entire debug section is read into memory, if they are
offsets from UNIT_DATA, they should be size_t, not off_t.  I wasn't
trying to say that we should make a conservative choice here, I was
trying to say that we should make a correct choice.  An offset from
UNIT_DATA should be size_t, a file offset should be off_t.

Ian
diff mbox series

Patch

diff --git a/libbacktrace/dwarf.c b/libbacktrace/dwarf.c
index 99e5f4c3f51..9a0b93120c8 100644
--- a/libbacktrace/dwarf.c
+++ b/libbacktrace/dwarf.c
@@ -143,6 +143,8 @@  enum attr_val_encoding
   ATTR_VAL_REF_UNIT,
   /* An offset to other data within the .dwarf_info section.  */
   ATTR_VAL_REF_INFO,
+  /* An offset to other data within the alt .dwarf_info section.  */
+  ATTR_VAL_REF_ALT_INFO,
   /* An offset to data in some other section.  */
   ATTR_VAL_REF_SECTION,
   /* A type signature.  */
@@ -281,6 +283,10 @@  struct unit
   /* The offset of UNIT_DATA from the start of the information for
      this compilation unit.  */
   size_t unit_data_offset;
+  /* Start of the compilation unit.  */
+  size_t low;
+  /* End of the compilation unit.  */
+  size_t high;
   /* DWARF version.  */
   int version;
   /* Whether unit is DWARF64.  */
@@ -339,6 +345,14 @@  struct unit_addrs_vector
   size_t count;
 };
 
+/* A growable vector of compilation unit pointer.  */
+
+struct unit_vector
+{
+  struct backtrace_vector vec;
+  size_t count;
+};
+
 /* The information we need to map a PC to a file and line.  */
 
 struct dwarf_data
@@ -353,6 +367,10 @@  struct dwarf_data
   struct unit_addrs *addrs;
   /* Number of address ranges in list.  */
   size_t addrs_count;
+  /* A sorted list of units.  */
+  struct unit **units;
+  /* Number of units in the list.  */
+  size_t units_count;
   /* The unparsed .debug_info section.  */
   const unsigned char *dwarf_info;
   size_t dwarf_info_size;
@@ -840,7 +858,7 @@  read_attribute (enum dwarf_form form, struct dwarf_buf *buf,
 	  val->encoding = ATTR_VAL_NONE;
 	  return 1;
 	}
-      val->encoding = ATTR_VAL_REF_SECTION;
+      val->encoding = ATTR_VAL_REF_ALT_INFO;
       return 1;
     case DW_FORM_GNU_strp_alt:
       {
@@ -866,6 +884,34 @@  read_attribute (enum dwarf_form form, struct dwarf_buf *buf,
     }
 }
 
+/* Compare a unit offset against a unit for bsearch.  */
+
+static int
+units_search (const void *vkey, const void *ventry)
+{
+  const uintptr_t *key = (const uintptr_t *) vkey;
+  const struct unit *entry = *((const struct unit *const *) ventry);
+  uintptr_t offset;
+
+  offset = *key;
+  if (offset < entry->low)
+    return -1;
+  else if (offset >= entry->high)
+    return 1;
+  else
+    return 0;
+}
+
+/* Find a unit in PU containing OFFSET.  */
+
+static struct unit *
+find_unit (struct unit **pu, size_t units_count, size_t offset)
+{
+  struct unit **u;
+  u = bsearch (&offset, pu, units_count, sizeof (struct unit *), units_search);
+  return u == NULL ? NULL : *u;
+}
+
 /* Compare function_addrs for qsort.  When ranges are nested, make the
    smallest one sort last.  */
 
@@ -1299,7 +1345,7 @@  find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
 		     int is_bigendian, backtrace_error_callback error_callback,
 		     void *data, struct unit *u,
 		     struct unit_addrs_vector *addrs,
-		     struct dwarf_data *altlink)
+		     struct dwarf_data *altlink, enum dwarf_tag *unit_tag)
 {
   while (unit_buf->left > 0)
     {
@@ -1322,6 +1368,9 @@  find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
       if (abbrev == NULL)
 	return 0;
 
+      if (unit_tag != NULL)
+	*unit_tag = abbrev->tag;
+
       lowpc = 0;
       have_lowpc = 0;
       highpc = 0;
@@ -1434,7 +1483,7 @@  find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
 				    dwarf_str, dwarf_str_size,
 				    dwarf_ranges, dwarf_ranges_size,
 				    is_bigendian, error_callback, data,
-				    u, addrs, altlink))
+				    u, addrs, altlink, NULL))
 	    return 0;
 	}
     }
@@ -1454,6 +1503,7 @@  build_address_map (struct backtrace_state *state, uintptr_t base_address,
 		   const unsigned char *dwarf_str, size_t dwarf_str_size,
 		   int is_bigendian, backtrace_error_callback error_callback,
 		   void *data, struct unit_addrs_vector *addrs,
+		   struct unit_vector *unit_vec,
 		   struct dwarf_data *altlink)
 {
   struct dwarf_buf info;
@@ -1462,9 +1512,12 @@  build_address_map (struct backtrace_state *state, uintptr_t base_address,
   size_t i;
   struct unit **pu;
   size_t prev_addrs_count;
+  size_t unit_offset = 0;
 
   memset (&addrs->vec, 0, sizeof addrs->vec);
+  memset (&unit_vec->vec, 0, sizeof unit_vec->vec);
   addrs->count = 0;
+  unit_vec->count = 0;
   prev_addrs_count = 0;
 
   /* Read through the .debug_info section.  FIXME: Should we use the
@@ -1493,6 +1546,7 @@  build_address_map (struct backtrace_state *state, uintptr_t base_address,
       uint64_t abbrev_offset;
       int addrsize;
       struct unit *u;
+      enum dwarf_tag unit_tag;
 
       if (info.reported_underflow)
 	goto fail;
@@ -1535,6 +1589,9 @@  build_address_map (struct backtrace_state *state, uintptr_t base_address,
 
       addrsize = read_byte (&unit_buf);
 
+      u->low = unit_offset;
+      unit_offset += len + (is_dwarf64 ? 12 : 4);
+      u->high = unit_offset;
       u->unit_data = unit_buf.buf;
       u->unit_data_len = unit_buf.left;
       u->unit_data_offset = unit_buf.buf - unit_data_start;
@@ -1556,13 +1613,13 @@  build_address_map (struct backtrace_state *state, uintptr_t base_address,
 				dwarf_str, dwarf_str_size,
 				dwarf_ranges, dwarf_ranges_size,
 				is_bigendian, error_callback, data,
-				u, addrs, altlink))
+				u, addrs, altlink, &unit_tag))
 	goto fail;
 
       if (unit_buf.reported_underflow)
 	goto fail;
 
-      if (addrs->count == prev_addrs_count)
+      if (unit_tag != DW_TAG_partial_unit && addrs->count == prev_addrs_count)
 	{
 	  --units_count;
 	  units.size -= sizeof (u);
@@ -1576,11 +1633,8 @@  build_address_map (struct backtrace_state *state, uintptr_t base_address,
   if (info.reported_underflow)
     goto fail;
 
-  // We only kept the list of units to free them on failure.  On
-  // success the units are retained, pointed to by the entries in
-  // addrs.
-  backtrace_vector_free (state, &units, error_callback, data);
-
+  unit_vec->vec = units;
+  unit_vec->count = units_count;
   return 1;
 
  fail:
@@ -2144,6 +2198,22 @@  read_referenced_name_1 (struct dwarf_data *ddata, struct unit *u,
       || val->encoding == ATTR_VAL_REF_UNIT)
     return read_referenced_name (ddata, u, val->u.uint, error_callback, data);
 
+  if (val->encoding == ATTR_VAL_REF_ALT_INFO)
+    {
+      struct unit *alt_unit
+	= find_unit (ddata->altlink->units, ddata->altlink->units_count,
+		     val->u.uint);
+      if (alt_unit == NULL)
+	{
+	  error_callback (data,
+			  "Could not find unit for DW_FORM_GNU_ref_alt", 0);
+	  return NULL;
+	}
+      uint64_t unit_offset = val->u.uint - alt_unit->low;
+      return read_referenced_name (ddata->altlink, alt_unit, unit_offset,
+				   error_callback, data);
+    }
+
   return NULL;
 }
 
@@ -3028,21 +3098,30 @@  build_dwarf_data (struct backtrace_state *state,
   struct unit_addrs_vector addrs_vec;
   struct unit_addrs *addrs;
   size_t addrs_count;
+  struct unit_vector units_vec;
+  struct unit **units;
+  size_t units_count;
   struct dwarf_data *fdata;
 
   if (!build_address_map (state, base_address, dwarf_info, dwarf_info_size,
 			  dwarf_abbrev, dwarf_abbrev_size, dwarf_ranges,
 			  dwarf_ranges_size, dwarf_str, dwarf_str_size,
 			  is_bigendian, error_callback, data, &addrs_vec,
+			  &units_vec,
 			  altlink))
     return NULL;
 
   if (!backtrace_vector_release (state, &addrs_vec.vec, error_callback, data))
     return NULL;
+  if (!backtrace_vector_release (state, &units_vec.vec, error_callback, data))
+    return NULL;
   addrs = (struct unit_addrs *) addrs_vec.vec.base;
+  units = (struct unit **) units_vec.vec.base;
   addrs_count = addrs_vec.count;
+  units_count = units_vec.count;
   backtrace_qsort (addrs, addrs_count, sizeof (struct unit_addrs),
 		   unit_addrs_compare);
+  /* No qsort for units required, already sorted.  */
 
   fdata = ((struct dwarf_data *)
 	   backtrace_alloc (state, sizeof (struct dwarf_data),
@@ -3055,6 +3134,8 @@  build_dwarf_data (struct backtrace_state *state,
   fdata->base_address = base_address;
   fdata->addrs = addrs;
   fdata->addrs_count = addrs_count;
+  fdata->units = units;
+  fdata->units_count = units_count;
   fdata->dwarf_info = dwarf_info;
   fdata->dwarf_info_size = dwarf_info_size;
   fdata->dwarf_line = dwarf_line;