Message ID | 20180510155641.2950-2-amonakov@ispras.ru |
---|---|
State | New |
Headers | show |
Series | Introduce gcc_qsort | expand |
On Thu, 2018-05-10 at 18:56 +0300, Alexander Monakov wrote: > * sort.cc: New file. > * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort. > * vec.c (qsort_chk): Use gcc_qsort. [...snip...] I'm not a reviewer for this, but there's a lot of fiddly implementation logic here, so maybe this code could use the selftest framework? Maybe, in pseudo-code, something like this: template <typename T> static void test_gcc_sort () { for (creation_strategy in {in-order, backwards}: // and anything else? for (int n = 0; n < some_limit; n++) { make_a_list_of_t (n, creation_strategy) gcc_sort (the_list); assert that the list is sorted; assert that the number of calls to the callback was sane } } void test_gcc_sort_cc () { test_gcc_sort<int, int_comparator> (); test_gcc_sort<long, long_comparator> (); // etc; maybe some custom structs to exercise the deterministic property??? } ...or some such, to quickly get coverage of the various list sizes (which the implementation seems to rely on heavily), in a non-release build. Hope this is constructive Dave
On May 10, 2018 5:56:40 PM GMT+02:00, Alexander Monakov <amonakov@ispras.ru> wrote: > * sort.cc: New file. > * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort. > * vec.c (qsort_chk): Use gcc_qsort. Just a quick first remark - how about putting this into libiberty? And then name it xqsort? Richard. >--- >gcc/sort.cc | 232 >+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > gcc/system.h | 7 +- > gcc/vec.c | 2 +- > 3 files changed, 238 insertions(+), 3 deletions(-) > create mode 100644 gcc/sort.cc > >diff --git a/gcc/sort.cc b/gcc/sort.cc >new file mode 100644 >index 00000000000..4faf6d45dc6 >--- /dev/null >+++ b/gcc/sort.cc >@@ -0,0 +1,232 @@ >+/* Platform-independent deterministic sort function. >+ Copyright (C) 2018 Free Software Foundation, Inc. >+ Contributed by Alexander Monakov. >+ >+This file is part of GCC. >+ >+GCC is free software; you can redistribute it and/or modify it >+under the terms of the GNU General Public License as published by the >+Free Software Foundation; either version 3, or (at your option) any >+later version. >+ >+GCC is distributed in the hope that it will be useful, but WITHOUT >+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or >+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License >+for more details. >+ >+You should have received a copy of the GNU General Public License >+along with GCC; see the file COPYING3. If not see >+<http://www.gnu.org/licenses/>. */ >+ >+/* This implements a sort function suitable for GCC use cases: >+ - signature-compatible to C qsort, but relaxed contract: >+ - may apply the comparator to elements in a temporary buffer >+ - may abort on allocation failure >+ - deterministic (but not necessarily stable) >+ - fast, especially for common cases (0-5 elements of size 8 or 4) >+ >+ The implementation uses a network sort for up to 5 elements and >+ a merge sort on top of that. Neither stage has branches depending >on >+ comparator result, trading extra arithmetic for branch >mispredictions. */ >+ >+#ifdef GENERATOR_FILE >+#include "bconfig.h" >+#else >+#include "config.h" >+#endif >+ >+#include "system.h" >+ >+#define likely(cond) __builtin_expect ((cond), 1) >+ >+#ifdef __GNUC__ >+#define noinline __attribute__ ((__noinline__)) >+#else >+#define noinline >+#endif >+ >+/* C-style qsort comparator function type. */ >+typedef int cmp_fn (const void *, const void *); >+ >+/* Structure holding read-mostly (read-only in netsort) context. */ >+struct sort_ctx >+{ >+ cmp_fn *cmp; // pointer to comparator >+ char *out; // output buffer >+ size_t n; // number of elements >+ size_t size; // element size >+}; >+ >+/* Helper for netsort. Permute, possibly in-place, 2 or 3 elements, >+ placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */ >+static void >+reorder23 (sort_ctx *c, char *e0, char *e1, char *e2) >+{ >+#define REORDER_23(SIZE, STRIDE, OFFSET) \ >+do { \ >+ size_t t0, t1; \ >+ memcpy (&t0, e0 + OFFSET, SIZE); \ >+ memcpy (&t1, e1 + OFFSET, SIZE); \ >+ char *out = c->out + OFFSET; \ >+ if (likely (c->n == 3)) \ >+ memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \ >+ memcpy (out, &t0, SIZE); out += STRIDE; \ >+ memcpy (out, &t1, SIZE); \ >+} while (0) >+ >+ if (sizeof (size_t) == 8 && likely (c->size == 8)) >+ REORDER_23 (8, 8, 0); >+ else if (likely (c->size == 4)) >+ REORDER_23 (4, 4, 0); >+ else >+ { >+ size_t offset = 0, step = sizeof (size_t); >+ for (; offset + step <= c->size; offset += step) >+ REORDER_23 (step, c->size, offset); >+ for (; offset < c->size; offset++) >+ REORDER_23 (1, c->size, offset); >+ } >+} >+ >+/* Like reorder23, but permute 4 or 5 elements. */ >+static void >+reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char >*e4) >+{ >+#define REORDER_45(SIZE, STRIDE, OFFSET) \ >+do { \ >+ size_t t0, t1, t2, t3; \ >+ memcpy (&t0, e0 + OFFSET, SIZE); \ >+ memcpy (&t1, e1 + OFFSET, SIZE); \ >+ memcpy (&t2, e2 + OFFSET, SIZE); \ >+ memcpy (&t3, e3 + OFFSET, SIZE); \ >+ char *out = c->out + OFFSET; \ >+ if (likely (c->n == 5)) \ >+ memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \ >+ memcpy (out, &t0, SIZE); out += STRIDE; \ >+ memcpy (out, &t1, SIZE); out += STRIDE; \ >+ memcpy (out, &t2, SIZE); out += STRIDE; \ >+ memcpy (out, &t3, SIZE); \ >+} while (0) >+ >+ if (sizeof (size_t) == 8 && likely (c->size == 8)) >+ REORDER_45 (8, 8, 0); >+ else if (likely(c->size == 4)) >+ REORDER_45 (4, 4, 0); >+ else >+ { >+ size_t offset = 0, step = sizeof (size_t); >+ for (; offset + step <= c->size; offset += step) >+ REORDER_45 (step, c->size, offset); >+ for (; offset < c->size; offset++) >+ REORDER_45 (1, c->size, offset); >+ } >+} >+ >+/* Helper for netsort. Invoke comparator CMP on E0 and E1. >+ Return E0^E1 if E0 compares less than E1, zero otherwise. >+ This is noinline to avoid code growth and confine invocation >+ to a single call site, assisting indirect branch prediction. */ >+noinline static intptr_t >+cmp1 (char *e0, char *e1, cmp_fn *cmp) >+{ >+ intptr_t x = (intptr_t)e0 ^ (intptr_t)e1; >+ return x & (cmp (e0, e1) >> 31); >+} >+ >+/* Execute network sort on 2 to 5 elements from IN, placing them into >C->OUT. >+ IN may be equal to C->OUT, in which case elements are sorted in >place. */ >+static void >+netsort (char *in, sort_ctx *c) >+{ >+#define CMP(e0, e1) \ >+do { \ >+ intptr_t x = cmp1 (e1, e0, c->cmp); \ >+ e0 = (char *)((intptr_t)e0 ^ x); \ >+ e1 = (char *)((intptr_t)e1 ^ x); \ >+} while (0) >+ >+ char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size; >+ CMP (e0, e1); >+ if (likely (c->n == 3)) >+ { >+ CMP (e1, e2); >+ CMP (e0, e1); >+ } >+ if (c->n <= 3) >+ return reorder23 (c, e0, e1, e2); >+ char *e3 = e2 + c->size, *e4 = e3 + c->size; >+ if (likely (c->n == 5)) >+ { >+ CMP (e3, e4); >+ CMP (e2, e4); >+ } >+ CMP (e2, e3); >+ if (likely (c->n == 5)) >+ { >+ CMP (e0, e3); >+ CMP (e1, e4); >+ } >+ CMP (e0, e2); >+ CMP (e1, e3); >+ CMP (e1, e2); >+ reorder45 (c, e0, e1, e2, e3, e4); >+} >+ >+/* Execute merge sort on N elements from IN, placing them into OUT, >+ using TMP as temporary storage if IN is equal to OUT. >+ This is a stable sort if netsort is used only for 2 or 3 elements. >*/ >+static void >+mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp) >+{ >+ if (likely (n <= 5)) >+ { >+ c->out = out; >+ c->n = n; >+ return netsort (in, c); >+ } >+ size_t nl = n / 2, nr = n - nl, sz = nl * c->size; >+ char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in; >+ /* Sort the right half, outputting to right half of OUT. */ >+ mergesort (mid, c, nr, r, tmp); >+ /* Sort the left half, leaving left half of OUT free. */ >+ mergesort (in, c, nl, l, mid); >+ /* Merge sorted halves given by L, R to [OUT, END). */ >+#define MERGE_ELTSIZE(SIZE) \ >+do { \ >+ intptr_t mr = c->cmp (r, l) >> 31; \ >+ intptr_t lr = (intptr_t)l ^ (intptr_t)r; \ >+ lr = (intptr_t)l ^ (lr & mr); \ >+ out = (char *)memcpy (out, (char *)lr, SIZE); \ >+ out += SIZE; \ >+ r += mr & SIZE; \ >+ if (r == out) return; \ >+ l += ~mr & SIZE; \ >+} while (r != end) >+ >+ if (likely (c->cmp(r, l + (r - out) - c->size) < 0)) >+ { >+ char *end = out + n * c->size; >+ if (sizeof (size_t) == 8 && likely (c->size == 8)) >+ MERGE_ELTSIZE (8); >+ else if (likely (c->size == 4)) >+ MERGE_ELTSIZE (4); >+ else >+ MERGE_ELTSIZE (c->size); >+ } >+ memcpy (out, l, r - out); >+} >+ >+void >+gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) >+{ >+ if (n < 2) >+ return; >+ char *base = (char *)vbase; >+ sort_ctx c = {cmp, base, n, size}; >+ long long scratch[32]; >+ size_t bufsz = (n / 2) * size; >+ void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); >+ mergesort (base, &c, n, base, (char *)buf); >+ if (buf != scratch) >+ free (buf); >+} >diff --git a/gcc/system.h b/gcc/system.h >index 4abc321c71d..88dffccb8ab 100644 >--- a/gcc/system.h >+++ b/gcc/system.h >@@ -1202,11 +1202,14 @@ helper_const_non_const_cast (const char *p) >/* qsort comparator consistency checking: except in release-checking >compilers, >redirect 4-argument qsort calls to qsort_chk; keep 1-argument >invocations >corresponding to vec::qsort (cmp): they use C qsort internally anyway. >*/ >-#if CHECKING_P >+void qsort_chk (void *, size_t, size_t, int (*)(const void *, const >void *)); >+void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const >void *)); > #define PP_5th(a1, a2, a3, a4, a5, ...) a5 > #undef qsort >+#if CHECKING_P >#define qsort(...) PP_5th (__VA_ARGS__, qsort_chk, 3, 2, qsort, 0) >(__VA_ARGS__) >-void qsort_chk (void *, size_t, size_t, int (*)(const void *, const >void *)); >+#else >+#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) >(__VA_ARGS__) > #endif > > #endif /* ! GCC_SYSTEM_H */ >diff --git a/gcc/vec.c b/gcc/vec.c >index 11924a80a2d..2941715a34a 100644 >--- a/gcc/vec.c >+++ b/gcc/vec.c >@@ -215,7 +215,7 @@ void > qsort_chk (void *base, size_t n, size_t size, > int (*cmp)(const void *, const void *)) > { >- (qsort) (base, n, size, cmp); >+ gcc_qsort (base, n, size, cmp); > #if 0 > #define LIM(n) (n) > #else
On Thu, 10 May 2018, Richard Biener wrote: > > Just a quick first remark - how about putting this into libiberty? And then name it xqsort? I'm not sure. It has a weaker contract compared to qsort, and I believe functions in libiberty are understood to provide stronger/better replacements. Alexander
On Thu, May 10, 2018 at 5:56 PM, Alexander Monakov <amonakov@ispras.ru> wrote: > * sort.cc: New file. > * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort. > * vec.c (qsort_chk): Use gcc_qsort. Looks good to me. As additional enhancement we might want to provide (even unconditionally?) the glibc qsort_r() interface. I remember adding various globals to pass down state to the comparator... I agree self-tests might be good to have. Also it looks like the qsort-checking may now be somehow embedded within our qsort implementation? But all these things can be done as followup I think. Thanks, Richard. > --- > gcc/sort.cc | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > gcc/system.h | 7 +- > gcc/vec.c | 2 +- > 3 files changed, 238 insertions(+), 3 deletions(-) > create mode 100644 gcc/sort.cc > > diff --git a/gcc/sort.cc b/gcc/sort.cc > new file mode 100644 > index 00000000000..4faf6d45dc6 > --- /dev/null > +++ b/gcc/sort.cc > @@ -0,0 +1,232 @@ > +/* Platform-independent deterministic sort function. > + Copyright (C) 2018 Free Software Foundation, Inc. > + Contributed by Alexander Monakov. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it > +under the terms of the GNU General Public License as published by the > +Free Software Foundation; either version 3, or (at your option) any > +later version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT > +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +/* This implements a sort function suitable for GCC use cases: > + - signature-compatible to C qsort, but relaxed contract: > + - may apply the comparator to elements in a temporary buffer > + - may abort on allocation failure > + - deterministic (but not necessarily stable) > + - fast, especially for common cases (0-5 elements of size 8 or 4) > + > + The implementation uses a network sort for up to 5 elements and > + a merge sort on top of that. Neither stage has branches depending on > + comparator result, trading extra arithmetic for branch mispredictions. */ > + > +#ifdef GENERATOR_FILE > +#include "bconfig.h" > +#else > +#include "config.h" > +#endif > + > +#include "system.h" > + > +#define likely(cond) __builtin_expect ((cond), 1) > + > +#ifdef __GNUC__ > +#define noinline __attribute__ ((__noinline__)) > +#else > +#define noinline > +#endif > + > +/* C-style qsort comparator function type. */ > +typedef int cmp_fn (const void *, const void *); > + > +/* Structure holding read-mostly (read-only in netsort) context. */ > +struct sort_ctx > +{ > + cmp_fn *cmp; // pointer to comparator > + char *out; // output buffer > + size_t n; // number of elements > + size_t size; // element size > +}; > + > +/* Helper for netsort. Permute, possibly in-place, 2 or 3 elements, > + placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */ > +static void > +reorder23 (sort_ctx *c, char *e0, char *e1, char *e2) > +{ > +#define REORDER_23(SIZE, STRIDE, OFFSET) \ > +do { \ > + size_t t0, t1; \ > + memcpy (&t0, e0 + OFFSET, SIZE); \ > + memcpy (&t1, e1 + OFFSET, SIZE); \ > + char *out = c->out + OFFSET; \ > + if (likely (c->n == 3)) \ > + memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \ > + memcpy (out, &t0, SIZE); out += STRIDE; \ > + memcpy (out, &t1, SIZE); \ > +} while (0) > + > + if (sizeof (size_t) == 8 && likely (c->size == 8)) > + REORDER_23 (8, 8, 0); > + else if (likely (c->size == 4)) > + REORDER_23 (4, 4, 0); > + else > + { > + size_t offset = 0, step = sizeof (size_t); > + for (; offset + step <= c->size; offset += step) > + REORDER_23 (step, c->size, offset); > + for (; offset < c->size; offset++) > + REORDER_23 (1, c->size, offset); > + } > +} > + > +/* Like reorder23, but permute 4 or 5 elements. */ > +static void > +reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4) > +{ > +#define REORDER_45(SIZE, STRIDE, OFFSET) \ > +do { \ > + size_t t0, t1, t2, t3; \ > + memcpy (&t0, e0 + OFFSET, SIZE); \ > + memcpy (&t1, e1 + OFFSET, SIZE); \ > + memcpy (&t2, e2 + OFFSET, SIZE); \ > + memcpy (&t3, e3 + OFFSET, SIZE); \ > + char *out = c->out + OFFSET; \ > + if (likely (c->n == 5)) \ > + memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \ > + memcpy (out, &t0, SIZE); out += STRIDE; \ > + memcpy (out, &t1, SIZE); out += STRIDE; \ > + memcpy (out, &t2, SIZE); out += STRIDE; \ > + memcpy (out, &t3, SIZE); \ > +} while (0) > + > + if (sizeof (size_t) == 8 && likely (c->size == 8)) > + REORDER_45 (8, 8, 0); > + else if (likely(c->size == 4)) > + REORDER_45 (4, 4, 0); > + else > + { > + size_t offset = 0, step = sizeof (size_t); > + for (; offset + step <= c->size; offset += step) > + REORDER_45 (step, c->size, offset); > + for (; offset < c->size; offset++) > + REORDER_45 (1, c->size, offset); > + } > +} > + > +/* Helper for netsort. Invoke comparator CMP on E0 and E1. > + Return E0^E1 if E0 compares less than E1, zero otherwise. > + This is noinline to avoid code growth and confine invocation > + to a single call site, assisting indirect branch prediction. */ > +noinline static intptr_t > +cmp1 (char *e0, char *e1, cmp_fn *cmp) > +{ > + intptr_t x = (intptr_t)e0 ^ (intptr_t)e1; > + return x & (cmp (e0, e1) >> 31); > +} > + > +/* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT. > + IN may be equal to C->OUT, in which case elements are sorted in place. */ > +static void > +netsort (char *in, sort_ctx *c) > +{ > +#define CMP(e0, e1) \ > +do { \ > + intptr_t x = cmp1 (e1, e0, c->cmp); \ > + e0 = (char *)((intptr_t)e0 ^ x); \ > + e1 = (char *)((intptr_t)e1 ^ x); \ > +} while (0) > + > + char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size; > + CMP (e0, e1); > + if (likely (c->n == 3)) > + { > + CMP (e1, e2); > + CMP (e0, e1); > + } > + if (c->n <= 3) > + return reorder23 (c, e0, e1, e2); > + char *e3 = e2 + c->size, *e4 = e3 + c->size; > + if (likely (c->n == 5)) > + { > + CMP (e3, e4); > + CMP (e2, e4); > + } > + CMP (e2, e3); > + if (likely (c->n == 5)) > + { > + CMP (e0, e3); > + CMP (e1, e4); > + } > + CMP (e0, e2); > + CMP (e1, e3); > + CMP (e1, e2); > + reorder45 (c, e0, e1, e2, e3, e4); > +} > + > +/* Execute merge sort on N elements from IN, placing them into OUT, > + using TMP as temporary storage if IN is equal to OUT. > + This is a stable sort if netsort is used only for 2 or 3 elements. */ > +static void > +mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp) > +{ > + if (likely (n <= 5)) > + { > + c->out = out; > + c->n = n; > + return netsort (in, c); > + } > + size_t nl = n / 2, nr = n - nl, sz = nl * c->size; > + char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in; > + /* Sort the right half, outputting to right half of OUT. */ > + mergesort (mid, c, nr, r, tmp); > + /* Sort the left half, leaving left half of OUT free. */ > + mergesort (in, c, nl, l, mid); > + /* Merge sorted halves given by L, R to [OUT, END). */ > +#define MERGE_ELTSIZE(SIZE) \ > +do { \ > + intptr_t mr = c->cmp (r, l) >> 31; \ > + intptr_t lr = (intptr_t)l ^ (intptr_t)r; \ > + lr = (intptr_t)l ^ (lr & mr); \ > + out = (char *)memcpy (out, (char *)lr, SIZE); \ > + out += SIZE; \ > + r += mr & SIZE; \ > + if (r == out) return; \ > + l += ~mr & SIZE; \ > +} while (r != end) > + > + if (likely (c->cmp(r, l + (r - out) - c->size) < 0)) > + { > + char *end = out + n * c->size; > + if (sizeof (size_t) == 8 && likely (c->size == 8)) > + MERGE_ELTSIZE (8); > + else if (likely (c->size == 4)) > + MERGE_ELTSIZE (4); > + else > + MERGE_ELTSIZE (c->size); > + } > + memcpy (out, l, r - out); > +} > + > +void > +gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) > +{ > + if (n < 2) > + return; > + char *base = (char *)vbase; > + sort_ctx c = {cmp, base, n, size}; > + long long scratch[32]; > + size_t bufsz = (n / 2) * size; > + void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); > + mergesort (base, &c, n, base, (char *)buf); > + if (buf != scratch) > + free (buf); > +} > diff --git a/gcc/system.h b/gcc/system.h > index 4abc321c71d..88dffccb8ab 100644 > --- a/gcc/system.h > +++ b/gcc/system.h > @@ -1202,11 +1202,14 @@ helper_const_non_const_cast (const char *p) > /* qsort comparator consistency checking: except in release-checking compilers, > redirect 4-argument qsort calls to qsort_chk; keep 1-argument invocations > corresponding to vec::qsort (cmp): they use C qsort internally anyway. */ > -#if CHECKING_P > +void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *)); > +void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *)); > #define PP_5th(a1, a2, a3, a4, a5, ...) a5 > #undef qsort > +#if CHECKING_P > #define qsort(...) PP_5th (__VA_ARGS__, qsort_chk, 3, 2, qsort, 0) (__VA_ARGS__) > -void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *)); > +#else > +#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__) > #endif > > #endif /* ! GCC_SYSTEM_H */ > diff --git a/gcc/vec.c b/gcc/vec.c > index 11924a80a2d..2941715a34a 100644 > --- a/gcc/vec.c > +++ b/gcc/vec.c > @@ -215,7 +215,7 @@ void > qsort_chk (void *base, size_t n, size_t size, > int (*cmp)(const void *, const void *)) > { > - (qsort) (base, n, size, cmp); > + gcc_qsort (base, n, size, cmp); > #if 0 > #define LIM(n) (n) > #else > -- > 2.13.3 >
On Fri, 11 May 2018, Richard Biener wrote: > Looks good to me. As additional enhancement we might want to provide > (even unconditionally?) > the glibc qsort_r() interface. I remember adding various globals to > pass down state to the comparator... Thanks. I have no plans w.r.t qsort_r, but OTOH a stable sort interface can be added with tiny size/speed cost, and the sole in-tree use can be converted :) > I agree self-tests might be good to have. Also it looks like the > qsort-checking may now be somehow > embedded within our qsort implementation? I gave self-tests some thought after David's mail, and honestly I don't see much value in that, given that we run qsort_chk on everything. As for embedding, I don't think that's necessary. I prefer to keep them separate. Alexander
On Thu, May 10, 2018 at 8:56 AM, Alexander Monakov <amonakov@ispras.ru> wrote: > * sort.cc: New file. > * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort. > * vec.c (qsort_chk): Use gcc_qsort. > This breaks bootstrap on Fedora 28/i686: https://gcc.gnu.org/ml/gcc-regression/2018-05/msg00088.html ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:100:10: error: ‘void* memcpy(void*, const void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of object ‘t2’ with type ‘size_t’ {aka ‘unsigned int’} [-Werror=array-bounds] memcpy (&t2, e2 + OFFSET, SIZE); \ ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:97:18: note: ‘t2’ declared here size_t t0, t1, t2, t3; \ ^~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:101:10: error: ‘void* memcpy(void*, const void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of object ‘t3’ with type ‘size_t’ {aka ‘unsigned int’} [-Werror=array-bounds] memcpy (&t3, e3 + OFFSET, SIZE); \ ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:97:22: note: ‘t3’ declared here size_t t0, t1, t2, t3; \ ^~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:105:10: error: ‘void* memcpy(void*, const void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of object ‘t0’ with type ‘size_t’ {aka ‘unsigned int’} [-Werror=array-bounds] memcpy (out, &t0, SIZE); out += STRIDE; \ ~~~~~~~^~~~~~~~~~~~~~~~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:97:10: note: ‘t0’ declared here size_t t0, t1, t2, t3; \ ^~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:106:10: error: ‘void* memcpy(void*, const void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of object ‘t1’ with type ‘size_t’ {aka ‘unsigned int’} [-Werror=array-bounds] memcpy (out, &t1, SIZE); out += STRIDE; \ ~~~~~~~^~~~~~~~~~~~~~~~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:97:14: note: ‘t1’ declared here size_t t0, t1, t2, t3; \ ^~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:107:10: error: ‘void* memcpy(void*, const void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of object ‘t2’ with type ‘size_t’ {aka ‘unsigned int’} [-Werror=array-bounds] memcpy (out, &t2, SIZE); out += STRIDE; \ ~~~~~~~^~~~~~~~~~~~~~~~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:97:18: note: ‘t2’ declared here size_t t0, t1, t2, t3; \ ^~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:108:10: error: ‘void* memcpy(void*, const void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of object ‘t3’ with type ‘size_t’ {aka ‘unsigned int’} [-Werror=array-bounds] memcpy (out, &t3, SIZE); \ ~~~~~~~^~~~~~~~~~~~~~~~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~ ../../src-trunk/gcc/sort.cc:97:22: note: ‘t3’ declared here size_t t0, t1, t2, t3; \ ^~ ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ REORDER_45 (8, 8, 0); ^~~~~~~~~~
On Sun, 13 May 2018, H.J. Lu wrote: > This breaks bootstrap on Fedora 28/i686: > > https://gcc.gnu.org/ml/gcc-regression/2018-05/msg00088.html > > ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ > REORDER_45 (8, 8, 0); > ^~~~~~~~~~ > ../../src-trunk/gcc/sort.cc:100:10: error: ‘void* memcpy(void*, const > void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of > object ‘t2’ with type ‘size_t’ {aka ‘unsigned int’} > [-Werror=array-bounds] > memcpy (&t2, e2 + OFFSET, SIZE); \ > ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~ Hm, on 32-bit this is trivially dead code, I wonder why we issue the warning? In any case, due to PR 85757 it's desirable to use types with sizes matching the memcpy size; is the following OK to apply? Bootstrapped on 32-bit x86. * sort.cc (REORDER_23): Pass the type for the temporaries instead of intended memcpy size. (REORDER_45): Likewise. --- gcc/sort.cc | 72 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/gcc/sort.cc b/gcc/sort.cc index 4faf6d45dc6..c41683c91dd 100644 --- a/gcc/sort.cc +++ b/gcc/sort.cc @@ -62,29 +62,29 @@ struct sort_ctx static void reorder23 (sort_ctx *c, char *e0, char *e1, char *e2) { -#define REORDER_23(SIZE, STRIDE, OFFSET) \ -do { \ - size_t t0, t1; \ - memcpy (&t0, e0 + OFFSET, SIZE); \ - memcpy (&t1, e1 + OFFSET, SIZE); \ - char *out = c->out + OFFSET; \ - if (likely (c->n == 3)) \ - memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \ - memcpy (out, &t0, SIZE); out += STRIDE; \ - memcpy (out, &t1, SIZE); \ +#define REORDER_23(TYPE, STRIDE, OFFSET) \ +do { \ + TYPE t0, t1; \ + memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \ + memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \ + char *out = c->out + OFFSET; \ + if (likely (c->n == 3)) \ + memcpy (out + 2*STRIDE, e2 + OFFSET, sizeof (TYPE)); \ + memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \ + memcpy (out, &t1, sizeof (TYPE)); \ } while (0) - if (sizeof (size_t) == 8 && likely (c->size == 8)) - REORDER_23 (8, 8, 0); - else if (likely (c->size == 4)) - REORDER_23 (4, 4, 0); + if (likely (c->size == sizeof (size_t))) + REORDER_23 (size_t, sizeof (size_t), 0); + else if (likely (c->size == sizeof (int))) + REORDER_23 (int, sizeof (int), 0); else { size_t offset = 0, step = sizeof (size_t); for (; offset + step <= c->size; offset += step) - REORDER_23 (step, c->size, offset); + REORDER_23 (size_t, c->size, offset); for (; offset < c->size; offset++) - REORDER_23 (1, c->size, offset); + REORDER_23 (char, c->size, offset); } } @@ -92,33 +92,33 @@ do { \ static void reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4) { -#define REORDER_45(SIZE, STRIDE, OFFSET) \ -do { \ - size_t t0, t1, t2, t3; \ - memcpy (&t0, e0 + OFFSET, SIZE); \ - memcpy (&t1, e1 + OFFSET, SIZE); \ - memcpy (&t2, e2 + OFFSET, SIZE); \ - memcpy (&t3, e3 + OFFSET, SIZE); \ - char *out = c->out + OFFSET; \ - if (likely (c->n == 5)) \ - memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \ - memcpy (out, &t0, SIZE); out += STRIDE; \ - memcpy (out, &t1, SIZE); out += STRIDE; \ - memcpy (out, &t2, SIZE); out += STRIDE; \ - memcpy (out, &t3, SIZE); \ +#define REORDER_45(TYPE, STRIDE, OFFSET) \ +do { \ + TYPE t0, t1, t2, t3; \ + memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \ + memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \ + memcpy (&t2, e2 + OFFSET, sizeof (TYPE)); \ + memcpy (&t3, e3 + OFFSET, sizeof (TYPE)); \ + char *out = c->out + OFFSET; \ + if (likely (c->n == 5)) \ + memcpy (out + 4*STRIDE, e4 + OFFSET, sizeof (TYPE)); \ + memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \ + memcpy (out, &t1, sizeof (TYPE)); out += STRIDE; \ + memcpy (out, &t2, sizeof (TYPE)); out += STRIDE; \ + memcpy (out, &t3, sizeof (TYPE)); \ } while (0) - if (sizeof (size_t) == 8 && likely (c->size == 8)) - REORDER_45 (8, 8, 0); - else if (likely(c->size == 4)) - REORDER_45 (4, 4, 0); + if (likely (c->size == sizeof (size_t))) + REORDER_45 (size_t, sizeof (size_t), 0); + else if (likely(c->size == sizeof (int))) + REORDER_45 (int, sizeof (int), 0); else { size_t offset = 0, step = sizeof (size_t); for (; offset + step <= c->size; offset += step) - REORDER_45 (step, c->size, offset); + REORDER_45 (size_t, c->size, offset); for (; offset < c->size; offset++) - REORDER_45 (1, c->size, offset); + REORDER_45 (char, c->size, offset); } }
On Mon, May 14, 2018 at 8:37 AM, Alexander Monakov <amonakov@ispras.ru> wrote: > On Sun, 13 May 2018, H.J. Lu wrote: >> This breaks bootstrap on Fedora 28/i686: >> >> https://gcc.gnu.org/ml/gcc-regression/2018-05/msg00088.html >> >> ../../src-trunk/gcc/sort.cc:112:5: note: in expansion of macro ‘REORDER_45’ >> REORDER_45 (8, 8, 0); >> ^~~~~~~~~~ >> ../../src-trunk/gcc/sort.cc:100:10: error: ‘void* memcpy(void*, const >> void*, size_t)’ forming offset [5, 8] is out of the bounds [0, 4] of >> object ‘t2’ with type ‘size_t’ {aka ‘unsigned int’} >> [-Werror=array-bounds] >> memcpy (&t2, e2 + OFFSET, SIZE); \ >> ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~ > > Hm, on 32-bit this is trivially dead code, I wonder why we issue the warning? > > In any case, due to PR 85757 it's desirable to use types with sizes matching > the memcpy size; is the following OK to apply? Bootstrapped on 32-bit x86. OK. Richard. > * sort.cc (REORDER_23): Pass the type for the temporaries instead of > intended memcpy size. > (REORDER_45): Likewise. > --- > gcc/sort.cc | 72 ++++++++++++++++++++++++++++++------------------------------- > 1 file changed, 36 insertions(+), 36 deletions(-) > > diff --git a/gcc/sort.cc b/gcc/sort.cc > index 4faf6d45dc6..c41683c91dd 100644 > --- a/gcc/sort.cc > +++ b/gcc/sort.cc > @@ -62,29 +62,29 @@ struct sort_ctx > static void > reorder23 (sort_ctx *c, char *e0, char *e1, char *e2) > { > -#define REORDER_23(SIZE, STRIDE, OFFSET) \ > -do { \ > - size_t t0, t1; \ > - memcpy (&t0, e0 + OFFSET, SIZE); \ > - memcpy (&t1, e1 + OFFSET, SIZE); \ > - char *out = c->out + OFFSET; \ > - if (likely (c->n == 3)) \ > - memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \ > - memcpy (out, &t0, SIZE); out += STRIDE; \ > - memcpy (out, &t1, SIZE); \ > +#define REORDER_23(TYPE, STRIDE, OFFSET) \ > +do { \ > + TYPE t0, t1; \ > + memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \ > + memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \ > + char *out = c->out + OFFSET; \ > + if (likely (c->n == 3)) \ > + memcpy (out + 2*STRIDE, e2 + OFFSET, sizeof (TYPE)); \ > + memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \ > + memcpy (out, &t1, sizeof (TYPE)); \ > } while (0) > > - if (sizeof (size_t) == 8 && likely (c->size == 8)) > - REORDER_23 (8, 8, 0); > - else if (likely (c->size == 4)) > - REORDER_23 (4, 4, 0); > + if (likely (c->size == sizeof (size_t))) > + REORDER_23 (size_t, sizeof (size_t), 0); > + else if (likely (c->size == sizeof (int))) > + REORDER_23 (int, sizeof (int), 0); > else > { > size_t offset = 0, step = sizeof (size_t); > for (; offset + step <= c->size; offset += step) > - REORDER_23 (step, c->size, offset); > + REORDER_23 (size_t, c->size, offset); > for (; offset < c->size; offset++) > - REORDER_23 (1, c->size, offset); > + REORDER_23 (char, c->size, offset); > } > } > > @@ -92,33 +92,33 @@ do { \ > static void > reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4) > { > -#define REORDER_45(SIZE, STRIDE, OFFSET) \ > -do { \ > - size_t t0, t1, t2, t3; \ > - memcpy (&t0, e0 + OFFSET, SIZE); \ > - memcpy (&t1, e1 + OFFSET, SIZE); \ > - memcpy (&t2, e2 + OFFSET, SIZE); \ > - memcpy (&t3, e3 + OFFSET, SIZE); \ > - char *out = c->out + OFFSET; \ > - if (likely (c->n == 5)) \ > - memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \ > - memcpy (out, &t0, SIZE); out += STRIDE; \ > - memcpy (out, &t1, SIZE); out += STRIDE; \ > - memcpy (out, &t2, SIZE); out += STRIDE; \ > - memcpy (out, &t3, SIZE); \ > +#define REORDER_45(TYPE, STRIDE, OFFSET) \ > +do { \ > + TYPE t0, t1, t2, t3; \ > + memcpy (&t0, e0 + OFFSET, sizeof (TYPE)); \ > + memcpy (&t1, e1 + OFFSET, sizeof (TYPE)); \ > + memcpy (&t2, e2 + OFFSET, sizeof (TYPE)); \ > + memcpy (&t3, e3 + OFFSET, sizeof (TYPE)); \ > + char *out = c->out + OFFSET; \ > + if (likely (c->n == 5)) \ > + memcpy (out + 4*STRIDE, e4 + OFFSET, sizeof (TYPE)); \ > + memcpy (out, &t0, sizeof (TYPE)); out += STRIDE; \ > + memcpy (out, &t1, sizeof (TYPE)); out += STRIDE; \ > + memcpy (out, &t2, sizeof (TYPE)); out += STRIDE; \ > + memcpy (out, &t3, sizeof (TYPE)); \ > } while (0) > > - if (sizeof (size_t) == 8 && likely (c->size == 8)) > - REORDER_45 (8, 8, 0); > - else if (likely(c->size == 4)) > - REORDER_45 (4, 4, 0); > + if (likely (c->size == sizeof (size_t))) > + REORDER_45 (size_t, sizeof (size_t), 0); > + else if (likely(c->size == sizeof (int))) > + REORDER_45 (int, sizeof (int), 0); > else > { > size_t offset = 0, step = sizeof (size_t); > for (; offset + step <= c->size; offset += step) > - REORDER_45 (step, c->size, offset); > + REORDER_45 (size_t, c->size, offset); > for (; offset < c->size; offset++) > - REORDER_45 (1, c->size, offset); > + REORDER_45 (char, c->size, offset); > } > } > > -- > 2.13.3
diff --git a/gcc/sort.cc b/gcc/sort.cc new file mode 100644 index 00000000000..4faf6d45dc6 --- /dev/null +++ b/gcc/sort.cc @@ -0,0 +1,232 @@ +/* Platform-independent deterministic sort function. + Copyright (C) 2018 Free Software Foundation, Inc. + Contributed by Alexander Monakov. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* This implements a sort function suitable for GCC use cases: + - signature-compatible to C qsort, but relaxed contract: + - may apply the comparator to elements in a temporary buffer + - may abort on allocation failure + - deterministic (but not necessarily stable) + - fast, especially for common cases (0-5 elements of size 8 or 4) + + The implementation uses a network sort for up to 5 elements and + a merge sort on top of that. Neither stage has branches depending on + comparator result, trading extra arithmetic for branch mispredictions. */ + +#ifdef GENERATOR_FILE +#include "bconfig.h" +#else +#include "config.h" +#endif + +#include "system.h" + +#define likely(cond) __builtin_expect ((cond), 1) + +#ifdef __GNUC__ +#define noinline __attribute__ ((__noinline__)) +#else +#define noinline +#endif + +/* C-style qsort comparator function type. */ +typedef int cmp_fn (const void *, const void *); + +/* Structure holding read-mostly (read-only in netsort) context. */ +struct sort_ctx +{ + cmp_fn *cmp; // pointer to comparator + char *out; // output buffer + size_t n; // number of elements + size_t size; // element size +}; + +/* Helper for netsort. Permute, possibly in-place, 2 or 3 elements, + placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */ +static void +reorder23 (sort_ctx *c, char *e0, char *e1, char *e2) +{ +#define REORDER_23(SIZE, STRIDE, OFFSET) \ +do { \ + size_t t0, t1; \ + memcpy (&t0, e0 + OFFSET, SIZE); \ + memcpy (&t1, e1 + OFFSET, SIZE); \ + char *out = c->out + OFFSET; \ + if (likely (c->n == 3)) \ + memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \ + memcpy (out, &t0, SIZE); out += STRIDE; \ + memcpy (out, &t1, SIZE); \ +} while (0) + + if (sizeof (size_t) == 8 && likely (c->size == 8)) + REORDER_23 (8, 8, 0); + else if (likely (c->size == 4)) + REORDER_23 (4, 4, 0); + else + { + size_t offset = 0, step = sizeof (size_t); + for (; offset + step <= c->size; offset += step) + REORDER_23 (step, c->size, offset); + for (; offset < c->size; offset++) + REORDER_23 (1, c->size, offset); + } +} + +/* Like reorder23, but permute 4 or 5 elements. */ +static void +reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4) +{ +#define REORDER_45(SIZE, STRIDE, OFFSET) \ +do { \ + size_t t0, t1, t2, t3; \ + memcpy (&t0, e0 + OFFSET, SIZE); \ + memcpy (&t1, e1 + OFFSET, SIZE); \ + memcpy (&t2, e2 + OFFSET, SIZE); \ + memcpy (&t3, e3 + OFFSET, SIZE); \ + char *out = c->out + OFFSET; \ + if (likely (c->n == 5)) \ + memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \ + memcpy (out, &t0, SIZE); out += STRIDE; \ + memcpy (out, &t1, SIZE); out += STRIDE; \ + memcpy (out, &t2, SIZE); out += STRIDE; \ + memcpy (out, &t3, SIZE); \ +} while (0) + + if (sizeof (size_t) == 8 && likely (c->size == 8)) + REORDER_45 (8, 8, 0); + else if (likely(c->size == 4)) + REORDER_45 (4, 4, 0); + else + { + size_t offset = 0, step = sizeof (size_t); + for (; offset + step <= c->size; offset += step) + REORDER_45 (step, c->size, offset); + for (; offset < c->size; offset++) + REORDER_45 (1, c->size, offset); + } +} + +/* Helper for netsort. Invoke comparator CMP on E0 and E1. + Return E0^E1 if E0 compares less than E1, zero otherwise. + This is noinline to avoid code growth and confine invocation + to a single call site, assisting indirect branch prediction. */ +noinline static intptr_t +cmp1 (char *e0, char *e1, cmp_fn *cmp) +{ + intptr_t x = (intptr_t)e0 ^ (intptr_t)e1; + return x & (cmp (e0, e1) >> 31); +} + +/* Execute network sort on 2 to 5 elements from IN, placing them into C->OUT. + IN may be equal to C->OUT, in which case elements are sorted in place. */ +static void +netsort (char *in, sort_ctx *c) +{ +#define CMP(e0, e1) \ +do { \ + intptr_t x = cmp1 (e1, e0, c->cmp); \ + e0 = (char *)((intptr_t)e0 ^ x); \ + e1 = (char *)((intptr_t)e1 ^ x); \ +} while (0) + + char *e0 = in, *e1 = e0 + c->size, *e2 = e1 + c->size; + CMP (e0, e1); + if (likely (c->n == 3)) + { + CMP (e1, e2); + CMP (e0, e1); + } + if (c->n <= 3) + return reorder23 (c, e0, e1, e2); + char *e3 = e2 + c->size, *e4 = e3 + c->size; + if (likely (c->n == 5)) + { + CMP (e3, e4); + CMP (e2, e4); + } + CMP (e2, e3); + if (likely (c->n == 5)) + { + CMP (e0, e3); + CMP (e1, e4); + } + CMP (e0, e2); + CMP (e1, e3); + CMP (e1, e2); + reorder45 (c, e0, e1, e2, e3, e4); +} + +/* Execute merge sort on N elements from IN, placing them into OUT, + using TMP as temporary storage if IN is equal to OUT. + This is a stable sort if netsort is used only for 2 or 3 elements. */ +static void +mergesort (char *in, sort_ctx *c, size_t n, char *out, char *tmp) +{ + if (likely (n <= 5)) + { + c->out = out; + c->n = n; + return netsort (in, c); + } + size_t nl = n / 2, nr = n - nl, sz = nl * c->size; + char *mid = in + sz, *r = out + sz, *l = in == out ? tmp : in; + /* Sort the right half, outputting to right half of OUT. */ + mergesort (mid, c, nr, r, tmp); + /* Sort the left half, leaving left half of OUT free. */ + mergesort (in, c, nl, l, mid); + /* Merge sorted halves given by L, R to [OUT, END). */ +#define MERGE_ELTSIZE(SIZE) \ +do { \ + intptr_t mr = c->cmp (r, l) >> 31; \ + intptr_t lr = (intptr_t)l ^ (intptr_t)r; \ + lr = (intptr_t)l ^ (lr & mr); \ + out = (char *)memcpy (out, (char *)lr, SIZE); \ + out += SIZE; \ + r += mr & SIZE; \ + if (r == out) return; \ + l += ~mr & SIZE; \ +} while (r != end) + + if (likely (c->cmp(r, l + (r - out) - c->size) < 0)) + { + char *end = out + n * c->size; + if (sizeof (size_t) == 8 && likely (c->size == 8)) + MERGE_ELTSIZE (8); + else if (likely (c->size == 4)) + MERGE_ELTSIZE (4); + else + MERGE_ELTSIZE (c->size); + } + memcpy (out, l, r - out); +} + +void +gcc_qsort (void *vbase, size_t n, size_t size, cmp_fn *cmp) +{ + if (n < 2) + return; + char *base = (char *)vbase; + sort_ctx c = {cmp, base, n, size}; + long long scratch[32]; + size_t bufsz = (n / 2) * size; + void *buf = bufsz <= sizeof scratch ? scratch : xmalloc (bufsz); + mergesort (base, &c, n, base, (char *)buf); + if (buf != scratch) + free (buf); +} diff --git a/gcc/system.h b/gcc/system.h index 4abc321c71d..88dffccb8ab 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -1202,11 +1202,14 @@ helper_const_non_const_cast (const char *p) /* qsort comparator consistency checking: except in release-checking compilers, redirect 4-argument qsort calls to qsort_chk; keep 1-argument invocations corresponding to vec::qsort (cmp): they use C qsort internally anyway. */ -#if CHECKING_P +void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *)); +void gcc_qsort (void *, size_t, size_t, int (*)(const void *, const void *)); #define PP_5th(a1, a2, a3, a4, a5, ...) a5 #undef qsort +#if CHECKING_P #define qsort(...) PP_5th (__VA_ARGS__, qsort_chk, 3, 2, qsort, 0) (__VA_ARGS__) -void qsort_chk (void *, size_t, size_t, int (*)(const void *, const void *)); +#else +#define qsort(...) PP_5th (__VA_ARGS__, gcc_qsort, 3, 2, qsort, 0) (__VA_ARGS__) #endif #endif /* ! GCC_SYSTEM_H */ diff --git a/gcc/vec.c b/gcc/vec.c index 11924a80a2d..2941715a34a 100644 --- a/gcc/vec.c +++ b/gcc/vec.c @@ -215,7 +215,7 @@ void qsort_chk (void *base, size_t n, size_t size, int (*cmp)(const void *, const void *)) { - (qsort) (base, n, size, cmp); + gcc_qsort (base, n, size, cmp); #if 0 #define LIM(n) (n) #else