diff mbox series

[3/3] stdlib: The qsort implementation needs to use heapsort in more cases

Message ID dc4a606da98f281312bad7a1018a50a71a0d7def.1700246487.git.fweimer@redhat.com
State New
Headers show
Series Various qsort fixes | expand

Commit Message

Florian Weimer Nov. 17, 2023, 6:44 p.m. UTC
The existing logic avoided internal stack overflow.  To avoid
a denial-of-service condition with adversarial input, it is necessary
to fall over to heapsort if tail-recursing deeply, too, which does
not result in a deep stack of pending partitions.

The new test stdlib/tst-qsort5 is based on Douglas McIlroy's paper
on this subject.
---
 stdlib/Makefile     |   3 +
 stdlib/qsort.c      |  17 +++--
 stdlib/tst-qsort5.c | 171 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 187 insertions(+), 4 deletions(-)
 create mode 100644 stdlib/tst-qsort5.c

Comments

Adhemerval Zanella Netto Nov. 20, 2023, 8:32 p.m. UTC | #1
On 17/11/23 15:44, Florian Weimer wrote:
> The existing logic avoided internal stack overflow.  To avoid
> a denial-of-service condition with adversarial input, it is necessary
> to fall over to heapsort if tail-recursing deeply, too, which does
> not result in a deep stack of pending partitions.
> 
> The new test stdlib/tst-qsort5 is based on Douglas McIlroy's paper
> on this subject.

LGTM, thanks.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

> ---
>  stdlib/Makefile     |   3 +
>  stdlib/qsort.c      |  17 +++--
>  stdlib/tst-qsort5.c | 171 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 187 insertions(+), 4 deletions(-)
>  create mode 100644 stdlib/tst-qsort5.c
> 
> diff --git a/stdlib/Makefile b/stdlib/Makefile
> index 48688f6a27..6194d1cb22 100644
> --- a/stdlib/Makefile
> +++ b/stdlib/Makefile
> @@ -215,6 +215,7 @@ tests := \
>    tst-qsort \
>    tst-qsort2 \
>    tst-qsort3 \
> +  tst-qsort5 \
>    tst-quick_exit \
>    tst-rand48 \
>    tst-rand48-2 \
> @@ -512,3 +513,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
>  		 '$(run-program-env)' '$(test-program-prefix-after-env)' \
>  		 $(common-objpfx)stdlib/; \
>  	$(evaluate-test)
> +
> +$(objpfx)tst-qsort5: $(libm)
> diff --git a/stdlib/qsort.c b/stdlib/qsort.c
> index a2f9e916ef..be01fb5598 100644
> --- a/stdlib/qsort.c
> +++ b/stdlib/qsort.c
> @@ -389,14 +389,23 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size,
>              {
>                if ((size_t) (hi - left_ptr) <= max_thresh)
>  		/* Ignore both small partitions. */
> -                top = pop (top, &lo, &hi, &depth);
> +		{
> +		  top = pop (top, &lo, &hi, &depth);
> +		  --depth;
> +		}
>                else
> -		/* Ignore small left partition. */
> -                lo = left_ptr;
> +		{
> +		  /* Ignore small left partition. */
> +		  lo = left_ptr;
> +		  --depth;
> +		}
>              }
>            else if ((size_t) (hi - left_ptr) <= max_thresh)
>  	    /* Ignore small right partition. */
> -            hi = right_ptr;
> +		{
> +		  hi = right_ptr;
> +		  --depth;
> +		}
>            else if ((right_ptr - lo) > (hi - left_ptr))
>              {
>  	      /* Push larger left partition indices. */

Ok (kinda embarrassing I missed this obvious depth update...).

> diff --git a/stdlib/tst-qsort5.c b/stdlib/tst-qsort5.c
> new file mode 100644
> index 0000000000..d3a88c30f8
> --- /dev/null
> +++ b/stdlib/tst-qsort5.c
> @@ -0,0 +1,171 @@
> +/* Adversarial test for qsort_r.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* The approach follows Douglas McIlroy, A Killer Adversary for
> +   Quicksort.  Software—Practice and Experience 29 (1999) 341-344.
> +   Downloaded <http://www.cs.dartmouth.edu/~doug/mdmspe.pdf>
> +   (2023-11-17).  */
> +
> +#include <math.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <support/check.h>
> +#include <support/support.h>
> +
> +struct context
> +{
> +  /* Called the gas value in the paper.  This value is larger than all
> +     other values (length minus one will do), so comparison with any
> +     decided value has a known result.  */
> +  int undecided_value;
> +
> +  /* If comparing undecided values, one of them as to be assigned a
> +     value to ensure consistency with future comparisons.  This is the
> +     value that will be used.  Starts out at zero.  */
> +  int next_decided;
> +
> +  /* Used to trick pivot selection.  Deciding the value for the last
> +     seen undcided value in a decided/undecided comparison happens
> +     to trick the many qsort implementations.  */
> +  int last_undecided_index;
> +
> +  /* This array contains the actually asigned values.  The call to
> +     qsort_r sorts a different array that contains indices into this
> +     array.  */
> +  int *decided_values;
> +};
> +
> +static int
> +compare_opponent (const void *l1, const void *r1, void *ctx1)
> +{
> +  const int *l = l1;
> +  const int *r = r1;
> +  struct context *ctx = ctx1;
> +  int rvalue = ctx->decided_values[*r];
> +  int lvalue = ctx->decided_values[*l];
> +
> +  if (lvalue == ctx->undecided_value)
> +    {
> +      if (rvalue == ctx->undecided_value)
> +        {
> +          /* Both values are undecided.  In this case, make a decision
> +             for the last-used undecided value.  This is tweak is very
> +             specific to quicksort.  */
> +          if (*l == ctx->last_undecided_index)
> +            {
> +              ctx->decided_values[*l] = ctx->next_decided;
> +              ++ctx->next_decided;
> +              /* The undecided value or *r is greater.  */
> +              return -1;
> +            }
> +          else
> +            {
> +              ctx->decided_values[*r] = ctx->next_decided;
> +              ++ctx->next_decided;
> +              /* The undecided value for *l is greater.  */
> +              return 1;
> +            }
> +        }
> +      else
> +        {
> +          ctx->last_undecided_index = *l;
> +          return 1;
> +        }
> +    }
> +  else
> +    {
> +      /* *l is a decided value.  */
> +      if (rvalue == ctx->undecided_value)
> +        {
> +          ctx->last_undecided_index = *r;
> +          /* The undecided value for *r is greater.  */
> +          return -1;
> +        }
> +      else
> +        return lvalue - rvalue;
> +    }
> +}
> +
> +/* Return a pointer to the adversarial permutation of length N.  */
> +static int *
> +create_permutation (size_t n)
> +{
> +  struct context ctx =
> +    {
> +      .undecided_value = n - 1, /* Larger than all other values.  */
> +      .decided_values = xcalloc (n, sizeof (int)),
> +    };
> +  for (size_t i = 0; i < n; ++i)
> +    ctx.decided_values[i] = ctx.undecided_value;
> +  int *scratch = xcalloc (n, sizeof (int));
> +  for (size_t i = 0; i < n; ++i)
> +    scratch[i] = i;
> +  qsort_r (scratch, n, sizeof (*scratch), compare_opponent, &ctx);
> +  free (scratch);
> +  return ctx.decided_values;
> +}
> +
> +/* Callback function for qsort which counts the number of invocations
> +   in *CLOSURE.  */
> +static int
> +compare_counter (const void *l1, const void *r1, void *closure)
> +{
> +  const int *l = l1;
> +  const int *r = r1;
> +  unsigned long long int *counter = closure;
> +  ++*counter;
> +  return *l - *r;
> +}
> +
> +/* Count the comparisons required for an adversarial permutation of
> +   length N.  */
> +static unsigned long long int
> +count_comparisons (size_t n)
> +{
> +  int *array = create_permutation (n);
> +  unsigned long long int counter = 0;
> +  qsort_r (array, n, sizeof (*array), compare_counter, &counter);
> +  free (array);
> +  return counter;
> +}
> +
> +/* Check the scaling factor for one adversarial permutation of length
> +   N, and report some statistics.  */
> +static void
> +check_one_n (size_t n)
> +{
> +  unsigned long long int count = count_comparisons (n);
> +  double factor = count / (n * log (count));
> +  printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n",
> +          n, count, factor);
> +  /* This is an arbitrary factor which is true for the current
> +     implementation across a wide range of sizes.  */
> +  TEST_VERIFY (factor <= 4.5);
> +}
> +
> +static int
> +do_test (void)
> +{
> +  check_one_n (100);
> +  check_one_n (1000);
> +  for (int i = 1; i <= 15; ++i)
> +    check_one_n (i * 10 * 1000);
> +  return 0;
> +}
> +
> +#include <support/test-driver.c>

Ok.
diff mbox series

Patch

diff --git a/stdlib/Makefile b/stdlib/Makefile
index 48688f6a27..6194d1cb22 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -215,6 +215,7 @@  tests := \
   tst-qsort \
   tst-qsort2 \
   tst-qsort3 \
+  tst-qsort5 \
   tst-quick_exit \
   tst-rand48 \
   tst-rand48-2 \
@@ -512,3 +513,5 @@  $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
 		 '$(run-program-env)' '$(test-program-prefix-after-env)' \
 		 $(common-objpfx)stdlib/; \
 	$(evaluate-test)
+
+$(objpfx)tst-qsort5: $(libm)
diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index a2f9e916ef..be01fb5598 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -389,14 +389,23 @@  __qsort_r (void *const pbase, size_t total_elems, size_t size,
             {
               if ((size_t) (hi - left_ptr) <= max_thresh)
 		/* Ignore both small partitions. */
-                top = pop (top, &lo, &hi, &depth);
+		{
+		  top = pop (top, &lo, &hi, &depth);
+		  --depth;
+		}
               else
-		/* Ignore small left partition. */
-                lo = left_ptr;
+		{
+		  /* Ignore small left partition. */
+		  lo = left_ptr;
+		  --depth;
+		}
             }
           else if ((size_t) (hi - left_ptr) <= max_thresh)
 	    /* Ignore small right partition. */
-            hi = right_ptr;
+		{
+		  hi = right_ptr;
+		  --depth;
+		}
           else if ((right_ptr - lo) > (hi - left_ptr))
             {
 	      /* Push larger left partition indices. */
diff --git a/stdlib/tst-qsort5.c b/stdlib/tst-qsort5.c
new file mode 100644
index 0000000000..d3a88c30f8
--- /dev/null
+++ b/stdlib/tst-qsort5.c
@@ -0,0 +1,171 @@ 
+/* Adversarial test for qsort_r.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The approach follows Douglas McIlroy, A Killer Adversary for
+   Quicksort.  Software—Practice and Experience 29 (1999) 341-344.
+   Downloaded <http://www.cs.dartmouth.edu/~doug/mdmspe.pdf>
+   (2023-11-17).  */
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <support/check.h>
+#include <support/support.h>
+
+struct context
+{
+  /* Called the gas value in the paper.  This value is larger than all
+     other values (length minus one will do), so comparison with any
+     decided value has a known result.  */
+  int undecided_value;
+
+  /* If comparing undecided values, one of them as to be assigned a
+     value to ensure consistency with future comparisons.  This is the
+     value that will be used.  Starts out at zero.  */
+  int next_decided;
+
+  /* Used to trick pivot selection.  Deciding the value for the last
+     seen undcided value in a decided/undecided comparison happens
+     to trick the many qsort implementations.  */
+  int last_undecided_index;
+
+  /* This array contains the actually asigned values.  The call to
+     qsort_r sorts a different array that contains indices into this
+     array.  */
+  int *decided_values;
+};
+
+static int
+compare_opponent (const void *l1, const void *r1, void *ctx1)
+{
+  const int *l = l1;
+  const int *r = r1;
+  struct context *ctx = ctx1;
+  int rvalue = ctx->decided_values[*r];
+  int lvalue = ctx->decided_values[*l];
+
+  if (lvalue == ctx->undecided_value)
+    {
+      if (rvalue == ctx->undecided_value)
+        {
+          /* Both values are undecided.  In this case, make a decision
+             for the last-used undecided value.  This is tweak is very
+             specific to quicksort.  */
+          if (*l == ctx->last_undecided_index)
+            {
+              ctx->decided_values[*l] = ctx->next_decided;
+              ++ctx->next_decided;
+              /* The undecided value or *r is greater.  */
+              return -1;
+            }
+          else
+            {
+              ctx->decided_values[*r] = ctx->next_decided;
+              ++ctx->next_decided;
+              /* The undecided value for *l is greater.  */
+              return 1;
+            }
+        }
+      else
+        {
+          ctx->last_undecided_index = *l;
+          return 1;
+        }
+    }
+  else
+    {
+      /* *l is a decided value.  */
+      if (rvalue == ctx->undecided_value)
+        {
+          ctx->last_undecided_index = *r;
+          /* The undecided value for *r is greater.  */
+          return -1;
+        }
+      else
+        return lvalue - rvalue;
+    }
+}
+
+/* Return a pointer to the adversarial permutation of length N.  */
+static int *
+create_permutation (size_t n)
+{
+  struct context ctx =
+    {
+      .undecided_value = n - 1, /* Larger than all other values.  */
+      .decided_values = xcalloc (n, sizeof (int)),
+    };
+  for (size_t i = 0; i < n; ++i)
+    ctx.decided_values[i] = ctx.undecided_value;
+  int *scratch = xcalloc (n, sizeof (int));
+  for (size_t i = 0; i < n; ++i)
+    scratch[i] = i;
+  qsort_r (scratch, n, sizeof (*scratch), compare_opponent, &ctx);
+  free (scratch);
+  return ctx.decided_values;
+}
+
+/* Callback function for qsort which counts the number of invocations
+   in *CLOSURE.  */
+static int
+compare_counter (const void *l1, const void *r1, void *closure)
+{
+  const int *l = l1;
+  const int *r = r1;
+  unsigned long long int *counter = closure;
+  ++*counter;
+  return *l - *r;
+}
+
+/* Count the comparisons required for an adversarial permutation of
+   length N.  */
+static unsigned long long int
+count_comparisons (size_t n)
+{
+  int *array = create_permutation (n);
+  unsigned long long int counter = 0;
+  qsort_r (array, n, sizeof (*array), compare_counter, &counter);
+  free (array);
+  return counter;
+}
+
+/* Check the scaling factor for one adversarial permutation of length
+   N, and report some statistics.  */
+static void
+check_one_n (size_t n)
+{
+  unsigned long long int count = count_comparisons (n);
+  double factor = count / (n * log (count));
+  printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n",
+          n, count, factor);
+  /* This is an arbitrary factor which is true for the current
+     implementation across a wide range of sizes.  */
+  TEST_VERIFY (factor <= 4.5);
+}
+
+static int
+do_test (void)
+{
+  check_one_n (100);
+  check_one_n (1000);
+  for (int i = 1; i <= 15; ++i)
+    check_one_n (i * 10 * 1000);
+  return 0;
+}
+
+#include <support/test-driver.c>