diff mbox series

libbacktrace patch committed: Avoid ambiguous binary search

Message ID CAOyqgcXcsy+KOnLvjwRw3HCfFm4eedE9ZdVThS5WNCC12rGWmQ@mail.gmail.com
State New
Headers show
Series libbacktrace patch committed: Avoid ambiguous binary search | expand

Commit Message

Ian Lance Taylor Sept. 9, 2020, 1:22 a.m. UTC
This patch to libbacktrace avoids ambiguous binary searches.
Searching for a range match can cause the search order to not match
the sort order, which can cause libbacktrace to miss matching entries.
This patch allocates an extra entry at the end of function_addrs and
unit_addrs vectors, so that we can safely compare to the next entry
when searching.  It adjusts the matching code accordingly.  This fixes
https://github.com/ianlancetaylor/libbacktrace/issues/44.
Bootstrapped and ran libbacktrace and libgo tests on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian


* dwarf.c (function_addrs_search): Compare against the next entry
low address, not the high address.
(unit_addrs_search): Likewise.
(build_address_map): Add a trailing unit_addrs.
(read_function_entry): Add a trailing function_addrs.
(read_function_info): Likewise.
(report_inlined_functions): Search backward for function_addrs
match.
(dwarf_lookup_pc): Search backward for unit_addrs and
function_addrs matches.

Comments

Ian Lance Taylor Sept. 23, 2020, 12:28 a.m. UTC | #1
On Tue, Sep 8, 2020 at 6:22 PM Ian Lance Taylor <iant@golang.org> wrote:
>
> This patch to libbacktrace avoids ambiguous binary searches.
> Searching for a range match can cause the search order to not match
> the sort order, which can cause libbacktrace to miss matching entries.
> This patch allocates an extra entry at the end of function_addrs and
> unit_addrs vectors, so that we can safely compare to the next entry
> when searching.  It adjusts the matching code accordingly.  This fixes
> https://github.com/ianlancetaylor/libbacktrace/issues/44.
> Bootstrapped and ran libbacktrace and libgo tests on
> x86_64-pc-linux-gnu.  Committed to mainline.

I realized that this isn't quite right for the case where the PC value
we are looking up is equal to the low value in the array we are
searching.  In that case to ensure consistent results we have to step
forward to the end of the sequence of identical low values, and only
then step backward.  This patch implements that.  It also ensures that
the right thing happens if someone decides to look up the PC value -1.
Bootstrapped and ran libbacktrace and Go tests on x86_64-pc-linux-gnu.
Committed to mainline.

Ian

* dwarf.c (report_inlined_functions): Handle PC == -1 and PC ==
p->low.
(dwarf_lookup_pc): Likewise.
diff --git a/libbacktrace/dwarf.c b/libbacktrace/dwarf.c
index 386701bffea..582f34bc816 100644
--- a/libbacktrace/dwarf.c
+++ b/libbacktrace/dwarf.c
@@ -3558,6 +3558,11 @@ report_inlined_functions (uintptr_t pc, struct function *function,
   if (function->function_addrs_count == 0)
     return 0;
 
+  /* Our search isn't safe if pc == -1, as that is the sentinel
+     value.  */
+  if (pc + 1 == 0)
+    return 0;
+
   p = ((struct function_addrs *)
        bsearch (&pc, function->function_addrs,
 		function->function_addrs_count,
@@ -3567,9 +3572,12 @@ report_inlined_functions (uintptr_t pc, struct function *function,
     return 0;
 
   /* Here pc >= p->low && pc < (p + 1)->low.  The function_addrs are
-     sorted by low, so we are at the end of a range of function_addrs
-     with the same low alue.  Walk backward and use the first range
-     that includes pc.  */
+     sorted by low, so if pc > p->low we are at the end of a range of
+     function_addrs with the same low value.  If pc == p->low walk
+     forward to the end of the range with that low value.  Then walk
+     backward and use the first range that includes pc.  */
+  while (pc == (p + 1)->low)
+    ++p;
   match = NULL;
   while (1)
     {
@@ -3636,8 +3644,10 @@ dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata,
 
   *found = 1;
 
-  /* Find an address range that includes PC.  */
-  entry = (ddata->addrs_count == 0
+  /* Find an address range that includes PC.  Our search isn't safe if
+     PC == -1, as we use that as a sentinel value, so skip the search
+     in that case.  */
+  entry = (ddata->addrs_count == 0 || pc + 1 == 0
 	   ? NULL
 	   : bsearch (&pc, ddata->addrs, ddata->addrs_count,
 		      sizeof (struct unit_addrs), unit_addrs_search));
@@ -3649,9 +3659,12 @@ dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata,
     }
 
   /* Here pc >= entry->low && pc < (entry + 1)->low.  The unit_addrs
-     are sorted by low, so we are at the end of a range of unit_addrs
-     with the same low value.  Walk backward and use the first range
-     that includes pc.  */
+     are sorted by low, so if pc > p->low we are at the end of a range
+     of unit_addrs with the same low value.  If pc == p->low walk
+     forward to the end of the range with that low value.  Then walk
+     backward and use the first range that includes pc.  */
+  while (pc == (entry + 1)->low)
+    ++entry;
   found_entry = 0;
   while (1)
     {
@@ -3832,9 +3845,12 @@ dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata,
     return callback (data, pc, ln->filename, ln->lineno, NULL);
 
   /* Here pc >= p->low && pc < (p + 1)->low.  The function_addrs are
-     sorted by low, so we are at the end of a range of function_addrs
-     with the same low alue.  Walk backward and use the first range
-     that includes pc.  */
+     sorted by low, so if pc > p->low we are at the end of a range of
+     function_addrs with the same low value.  If pc == p->low walk
+     forward to the end of the range with that low value.  Then walk
+     backward and use the first range that includes pc.  */
+  while (pc == (p + 1)->low)
+    ++p;
   fmatch = NULL;
   while (1)
     {
diff mbox series

Patch

diff --git a/libbacktrace/dwarf.c b/libbacktrace/dwarf.c
index 006c8181622..386701bffea 100644
--- a/libbacktrace/dwarf.c
+++ b/libbacktrace/dwarf.c
@@ -1164,9 +1164,11 @@  function_addrs_compare (const void *v1, const void *v2)
   return strcmp (a1->function->name, a2->function->name);
 }
 
-/* Compare a PC against a function_addrs for bsearch.  Note that if
-   there are multiple ranges containing PC, which one will be returned
-   is unpredictable.  We compensate for that in dwarf_fileline.  */
+/* Compare a PC against a function_addrs for bsearch.  We always
+   allocate an entra entry at the end of the vector, so that this
+   routine can safely look at the next entry.  Note that if there are
+   multiple ranges containing PC, which one will be returned is
+   unpredictable.  We compensate for that in dwarf_fileline.  */
 
 static int
 function_addrs_search (const void *vkey, const void *ventry)
@@ -1178,7 +1180,7 @@  function_addrs_search (const void *vkey, const void *ventry)
   pc = *key;
   if (pc < entry->low)
     return -1;
-  else if (pc >= entry->high)
+  else if (pc > (entry + 1)->low)
     return 1;
   else
     return 0;
@@ -1249,9 +1251,11 @@  unit_addrs_compare (const void *v1, const void *v2)
   return 0;
 }
 
-/* Compare a PC against a unit_addrs for bsearch.  Note that if there
-   are multiple ranges containing PC, which one will be returned is
-   unpredictable.  We compensate for that in dwarf_fileline.  */
+/* Compare a PC against a unit_addrs for bsearch.  We always allocate
+   an entry entry at the end of the vector, so that this routine can
+   safely look at the next entry.  Note that if there are multiple
+   ranges containing PC, which one will be returned is unpredictable.
+   We compensate for that in dwarf_fileline.  */
 
 static int
 unit_addrs_search (const void *vkey, const void *ventry)
@@ -1263,7 +1267,7 @@  unit_addrs_search (const void *vkey, const void *ventry)
   pc = *key;
   if (pc < entry->low)
     return -1;
-  else if (pc >= entry->high)
+  else if (pc > (entry + 1)->low)
     return 1;
   else
     return 0;
@@ -2091,6 +2095,7 @@  build_address_map (struct backtrace_state *state, uintptr_t base_address,
   size_t i;
   struct unit **pu;
   size_t unit_offset = 0;
+  struct unit_addrs *pa;
 
   memset (&addrs->vec, 0, sizeof addrs->vec);
   memset (&unit_vec->vec, 0, sizeof unit_vec->vec);
@@ -2231,6 +2236,17 @@  build_address_map (struct backtrace_state *state, uintptr_t base_address,
   if (info.reported_underflow)
     goto fail;
 
+  /* Add a trailing addrs entry, but don't include it in addrs->count.  */
+  pa = ((struct unit_addrs *)
+	backtrace_vector_grow (state, sizeof (struct unit_addrs),
+			       error_callback, data, &addrs->vec));
+  if (pa == NULL)
+    goto fail;
+  pa->low = 0;
+  --pa->low;
+  pa->high = pa->low;
+  pa->u = NULL;
+
   unit_vec->vec = units;
   unit_vec->count = units_count;
   return 1;
@@ -3404,8 +3420,23 @@  read_function_entry (struct backtrace_state *state, struct dwarf_data *ddata,
 
 	      if (fvec.count > 0)
 		{
+		  struct function_addrs *p;
 		  struct function_addrs *faddrs;
 
+		  /* Allocate a trailing entry, but don't include it
+		     in fvec.count.  */
+		  p = ((struct function_addrs *)
+		       backtrace_vector_grow (state,
+					      sizeof (struct function_addrs),
+					      error_callback, data,
+					      &fvec.vec));
+		  if (p == NULL)
+		    return 0;
+		  p->low = 0;
+		  --p->low;
+		  p->high = p->low;
+		  p->function = NULL;
+
 		  if (!backtrace_vector_release (state, &fvec.vec,
 						 error_callback, data))
 		    return 0;
@@ -3439,6 +3470,7 @@  read_function_info (struct backtrace_state *state, struct dwarf_data *ddata,
   struct function_vector lvec;
   struct function_vector *pfvec;
   struct dwarf_buf unit_buf;
+  struct function_addrs *p;
   struct function_addrs *addrs;
   size_t addrs_count;
 
@@ -3470,6 +3502,18 @@  read_function_info (struct backtrace_state *state, struct dwarf_data *ddata,
   if (pfvec->count == 0)
     return;
 
+  /* Allocate a trailing entry, but don't include it in
+     pfvec->count.  */
+  p = ((struct function_addrs *)
+       backtrace_vector_grow (state, sizeof (struct function_addrs),
+			      error_callback, data, &pfvec->vec));
+  if (p == NULL)
+    return;
+  p->low = 0;
+  --p->low;
+  p->high = p->low;
+  p->function = NULL;
+
   addrs_count = pfvec->count;
 
   if (fvec == NULL)
@@ -3506,30 +3550,46 @@  report_inlined_functions (uintptr_t pc, struct function *function,
 			  backtrace_full_callback callback, void *data,
 			  const char **filename, int *lineno)
 {
-  struct function_addrs *function_addrs;
+  struct function_addrs *p;
+  struct function_addrs *match;
   struct function *inlined;
   int ret;
 
   if (function->function_addrs_count == 0)
     return 0;
 
-  function_addrs = ((struct function_addrs *)
-		    bsearch (&pc, function->function_addrs,
-			     function->function_addrs_count,
-			     sizeof (struct function_addrs),
-			     function_addrs_search));
-  if (function_addrs == NULL)
+  p = ((struct function_addrs *)
+       bsearch (&pc, function->function_addrs,
+		function->function_addrs_count,
+		sizeof (struct function_addrs),
+		function_addrs_search));
+  if (p == NULL)
     return 0;
 
-  while (((size_t) (function_addrs - function->function_addrs) + 1
-	  < function->function_addrs_count)
-	 && pc >= (function_addrs + 1)->low
-	 && pc < (function_addrs + 1)->high)
-    ++function_addrs;
+  /* Here pc >= p->low && pc < (p + 1)->low.  The function_addrs are
+     sorted by low, so we are at the end of a range of function_addrs
+     with the same low alue.  Walk backward and use the first range
+     that includes pc.  */
+  match = NULL;
+  while (1)
+    {
+      if (pc < p->high)
+	{
+	  match = p;
+	  break;
+	}
+      if (p == function->function_addrs)
+	break;
+      if ((p - 1)->low < p->low)
+	break;
+      --p;
+    }
+  if (match == NULL)
+    return 0;
 
   /* We found an inlined call.  */
 
-  inlined = function_addrs->function;
+  inlined = match->function;
 
   /* Report any calls inlined into this one.  */
   ret = report_inlined_functions (pc, inlined, callback, data,
@@ -3562,11 +3622,13 @@  dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata,
 		 int *found)
 {
   struct unit_addrs *entry;
+  int found_entry;
   struct unit *u;
   int new_data;
   struct line *lines;
   struct line *ln;
-  struct function_addrs *function_addrs;
+  struct function_addrs *p;
+  struct function_addrs *fmatch;
   struct function *function;
   const char *filename;
   int lineno;
@@ -3586,14 +3648,29 @@  dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata,
       return 0;
     }
 
-  /* If there are multiple ranges that contain PC, use the last one,
-     in order to produce predictable results.  If we assume that all
-     ranges are properly nested, then the last range will be the
-     smallest one.  */
-  while ((size_t) (entry - ddata->addrs) + 1 < ddata->addrs_count
-	 && pc >= (entry + 1)->low
-	 && pc < (entry + 1)->high)
-    ++entry;
+  /* Here pc >= entry->low && pc < (entry + 1)->low.  The unit_addrs
+     are sorted by low, so we are at the end of a range of unit_addrs
+     with the same low value.  Walk backward and use the first range
+     that includes pc.  */
+  found_entry = 0;
+  while (1)
+    {
+      if (pc < entry->high)
+	{
+	  found_entry = 1;
+	  break;
+	}
+      if (entry == ddata->addrs)
+	break;
+      if ((entry - 1)->low < entry->low)
+	break;
+      --entry;
+    }
+  if (!found_entry)
+    {
+      *found = 0;
+      return 0;
+    }
 
   /* We need the lines, lines_count, function_addrs,
      function_addrs_count fields of u.  If they are not set, we need
@@ -3629,6 +3706,7 @@  dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata,
   new_data = 0;
   if (lines == NULL)
     {
+      struct function_addrs *function_addrs;
       size_t function_addrs_count;
       struct line_header lhdr;
       size_t count;
@@ -3745,24 +3823,36 @@  dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata,
   if (entry->u->function_addrs_count == 0)
     return callback (data, pc, ln->filename, ln->lineno, NULL);
 
-  function_addrs = ((struct function_addrs *)
-		    bsearch (&pc, entry->u->function_addrs,
-			     entry->u->function_addrs_count,
-			     sizeof (struct function_addrs),
-			     function_addrs_search));
-  if (function_addrs == NULL)
+  p = ((struct function_addrs *)
+       bsearch (&pc, entry->u->function_addrs,
+		entry->u->function_addrs_count,
+		sizeof (struct function_addrs),
+		function_addrs_search));
+  if (p == NULL)
     return callback (data, pc, ln->filename, ln->lineno, NULL);
 
-  /* If there are multiple function ranges that contain PC, use the
-     last one, in order to produce predictable results.  */
-
-  while (((size_t) (function_addrs - entry->u->function_addrs + 1)
-	  < entry->u->function_addrs_count)
-	 && pc >= (function_addrs + 1)->low
-	 && pc < (function_addrs + 1)->high)
-    ++function_addrs;
+  /* Here pc >= p->low && pc < (p + 1)->low.  The function_addrs are
+     sorted by low, so we are at the end of a range of function_addrs
+     with the same low alue.  Walk backward and use the first range
+     that includes pc.  */
+  fmatch = NULL;
+  while (1)
+    {
+      if (pc < p->high)
+	{
+	  fmatch = p;
+	  break;
+	}
+      if (p == entry->u->function_addrs)
+	break;
+      if ((p - 1)->low < p->low)
+	break;
+      --p;
+    }
+  if (fmatch == NULL)
+    return callback (data, pc, ln->filename, ln->lineno, NULL);
 
-  function = function_addrs->function;
+  function = fmatch->function;
 
   filename = ln->filename;
   lineno = ln->lineno;