diff mbox series

Benchtests: Improve large memcpy/memset benchmarks

Message ID PAWPR08MB8982673949A0B2FECD47F92983879@PAWPR08MB8982.eurprd08.prod.outlook.com
State New
Headers show
Series Benchtests: Improve large memcpy/memset benchmarks | expand

Commit Message

Wilco Dijkstra March 23, 2023, 11:41 a.m. UTC
Adjust sizes between 64KB and 16MB and iterations based on length.  
Remove incorrect uses of alloc_bufs (since we're not interested in measuring
Linux clear_page time).

---

Comments

Noah Goldstein March 23, 2023, 6:17 p.m. UTC | #1
On Thu, Mar 23, 2023 at 6:41 AM Wilco Dijkstra via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
>
> Adjust sizes between 64KB and 16MB and iterations based on length.
> Remove incorrect uses of alloc_bufs (since we're not interested in measuring
> Linux clear_page time).
>
> ---
>
> diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
> index 7076c0a17b00d76c26aa6adb4c3cc0aedcf69955..dea414ec8d1bb160409725996244178ee4fa93fd 100644
> --- a/benchtests/bench-bzero-large.c
> +++ b/benchtests/bench-bzero-large.c
> @@ -22,9 +22,8 @@
>  #else
>  # define TEST_NAME "bzero"
>  #endif
> -#define START_SIZE (128 * 1024)
> -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> -#define TIMEOUT (20 * 60)
> +#define START_SIZE (64 * 1024)
> +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #include "bench-string.h"
>
>  #include "json-lib.h"
> @@ -52,7 +51,7 @@ IMPL (memset_zero, 0)
>  static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -74,20 +73,13 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
>  static void
>  do_test (json_ctx_t *json_ctx, size_t align, size_t len)
>  {
> -  align &= 63;
Can you replace with `align &= getpagesize () - 1`?
> -  if ((align + len) * sizeof (CHAR) > page_size)
> -    return;

Is there a reason to remove the out of bounds check?
> -
>    json_element_object_begin (json_ctx);
>    json_attr_uint (json_ctx, "length", len);
>    json_attr_uint (json_ctx, "alignment", align);
>    json_array_begin (json_ctx, "timings");
>
>    FOR_EACH_IMPL (impl, 0)
> -    {
> -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
> -      alloc_bufs ();
> -    }
> +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
>
>    json_array_end (json_ctx);
>    json_element_object_end (json_ctx);
> diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
> index 7b2c5530af3883acc4b3895cb11667e6ff1a55ff..9e544a4729197f886f6cb092ec225004b586d197 100644
> --- a/benchtests/bench-memcpy-large.c
> +++ b/benchtests/bench-memcpy-large.c
> @@ -19,10 +19,9 @@
>  #ifndef MEMCPY_RESULT
>  # define MEMCPY_RESULT(dst, len) dst
>  # define START_SIZE (64 * 1024)
> -# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
> +# define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  # define TEST_MAIN
>  # define TEST_NAME "memcpy"
> -# define TIMEOUT (20 * 60)
>  # include "bench-string.h"
>
>  IMPL (memcpy, 1)
> @@ -36,7 +35,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src,
>              size_t len)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -59,12 +58,7 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len,
>    char *s1, *s2;
>    size_t repeats;
>    align1 &= 4095;
> -  if (align1 + len >= page_size)
> -    return;
> -
>    align2 &= 4095;
Might as well fix these up, can you replace 4095 with `getpagesize () - 1`?
> -  if (align2 + len >= page_size)
> -    return;
>
>    s1 = (char *) (buf1 + align1);
>    s2 = (char *) (buf2 + align2);
> diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
> index a09dd3678848a3bf8612732439700eb8ef5d82ea..fd504653f681b172800cc34e054cc745180aa4fa 100644
> --- a/benchtests/bench-memmove-large.c
> +++ b/benchtests/bench-memmove-large.c
> @@ -16,12 +16,10 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define BASE_PAGE_SIZE (1024 * 1024)
> -#define START_SIZE (4 * 1024)
> +#define START_SIZE (64 * 1024)
>  #define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #define TEST_MAIN
>  #define TEST_NAME "memmove"
> -#define TIMEOUT (20 * 60)
>  #include "bench-string.h"
>  #include "json-lib.h"
>
> @@ -33,7 +31,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
>              size_t len)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -54,13 +52,8 @@ do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len)
>    size_t i, j;
>    char *s1, *s2;
>
> -  align1 &= 127;
> -  if (align1 + len >= page_size)
> -    return;
> -
> -  align2 &= 127;
> -  if (align2 + len >= page_size)
> -    return;
> +  align1 &= 4095;
> +  align2 &= 4095;
`getpagesize () - 1`
>
>    s1 = (char *) (buf2 + align1);
>    s2 = (char *) (buf2 + align2);
> diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
> index a1f33245d4960c1e82fc441b5faf322204035202..c3f9ee0cd75d3cb30f8b3df15672f895c111fc28 100644
> --- a/benchtests/bench-memset-large.c
> +++ b/benchtests/bench-memset-large.c
> @@ -18,9 +18,8 @@
>
>  #define TEST_MAIN
>  #define TEST_NAME "memset"
> -#define START_SIZE (128 * 1024)
> -#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
> -#define TIMEOUT (20 * 60)
> +#define START_SIZE (64 * 1024)
> +#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
>  #include "bench-string.h"
>
>  #include "json-lib.h"
> @@ -35,7 +34,7 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
>              int c __attribute ((unused)), size_t n)
>  {
> -  size_t i, iters = 16;
> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
this takes [.5, 10] seconds?
>    timing_t start, stop, cur;
>
>    TIMING_NOW (start);
> @@ -53,10 +52,6 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
>  static void
>  do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
>  {
> -  align &= 63;
> -  if ((align + len) * sizeof (CHAR) > page_size)
> -    return;
> -
>    json_element_object_begin (json_ctx);
>    json_attr_uint (json_ctx, "length", len);
>    json_attr_uint (json_ctx, "alignment", align);
> @@ -64,10 +59,7 @@ do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
>    json_array_begin (json_ctx, "timings");
>
>    FOR_EACH_IMPL (impl, 0)
> -    {
> -      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
> -      alloc_bufs ();
> -    }
> +    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
>
>    json_array_end (json_ctx);
>    json_element_object_end (json_ctx);
>
>
Wilco Dijkstra March 23, 2023, 11:01 p.m. UTC | #2
Hi Noah,


>> -  align &= 63;
>
> Can you replace with `align &= getpagesize () - 1`?

Yes that would make more sense indeed.

>> -  if ((align + len) * sizeof (CHAR) > page_size)
>> -    return;
>
> Is there a reason to remove the out of bounds check?

Well it can't ever go out of bounds. The allocated buffer size is
2 * page_size (ie. 4 times real page size or 2 times MIN_PAGE_SIZE
due to the incorrect logic in init_sizes).

And if it could go out of bounds it should be an assert so we don't
silently benchmark a bounds check!

>>    align2 &= 4095;
>
> Might as well fix these up, can you replace 4095 with `getpagesize () - 1`?

Sure.

>> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
>
> this takes [.5, 10] seconds?

Yes, it's only 1 GB written per test. It takes ~8.5 seconds on an old slow
Cortex-A72.

Cheers,
Wilco
Noah Goldstein March 24, 2023, 4:29 p.m. UTC | #3
On Thu, Mar 23, 2023 at 6:01 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> Hi Noah,
>
>
> >> -  align &= 63;
> >
> > Can you replace with `align &= getpagesize () - 1`?
>
> Yes that would make more sense indeed.
>
> >> -  if ((align + len) * sizeof (CHAR) > page_size)
> >> -    return;
> >
> > Is there a reason to remove the out of bounds check?
>
> Well it can't ever go out of bounds. The allocated buffer size is
> 2 * page_size (ie. 4 times real page size or 2 times MIN_PAGE_SIZE
> due to the incorrect logic in init_sizes).
>

I disagree, we have a bounds check in nearly all other benchmarks
and I think its 1) a convenience so you don't need to add the equivalent
code in many other places and 2) a portability thing as page size varies
by system but people often write benchmark bounds with constant size
bounds.

> And if it could go out of bounds it should be an assert so we don't
> silently benchmark a bounds check!

Then I think the approach would be to output the json parameters and
report invalid in some way for the times.

In other places benchmarks rely on this bounds check and it seems more
error prone than anything else to just leave it unchecked. I would much
rather the benchmark not run or fail in some way than report a potentially
incorrect time as true.

>
> >>    align2 &= 4095;
> >
> > Might as well fix these up, can you replace 4095 with `getpagesize () - 1`?
>
> Sure.
>
> >> +  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
> >
> > this takes [.5, 10] seconds?
>
> Yes, it's only 1 GB written per test. It takes ~8.5 seconds on an old slow
> Cortex-A72.
>
> Cheers,
> Wilco
>
>
diff mbox series

Patch

diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
index 7076c0a17b00d76c26aa6adb4c3cc0aedcf69955..dea414ec8d1bb160409725996244178ee4fa93fd 100644
--- a/benchtests/bench-bzero-large.c
+++ b/benchtests/bench-bzero-large.c
@@ -22,9 +22,8 @@ 
 #else
 # define TEST_NAME "bzero"
 #endif
-#define START_SIZE (128 * 1024)
-#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
-#define TIMEOUT (20 * 60)
+#define START_SIZE (64 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #include "bench-string.h"
 
 #include "json-lib.h"
@@ -52,7 +51,7 @@  IMPL (memset_zero, 0)
 static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -74,20 +73,13 @@  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, size_t n)
 static void
 do_test (json_ctx_t *json_ctx, size_t align, size_t len)
 {
-  align &= 63;
-  if ((align + len) * sizeof (CHAR) > page_size)
-    return;
-
   json_element_object_begin (json_ctx);
   json_attr_uint (json_ctx, "length", len);
   json_attr_uint (json_ctx, "alignment", align);
   json_array_begin (json_ctx, "timings");
 
   FOR_EACH_IMPL (impl, 0)
-    {
-      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
-      alloc_bufs ();
-    }
+    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, len);
 
   json_array_end (json_ctx);
   json_element_object_end (json_ctx);
diff --git a/benchtests/bench-memcpy-large.c b/benchtests/bench-memcpy-large.c
index 7b2c5530af3883acc4b3895cb11667e6ff1a55ff..9e544a4729197f886f6cb092ec225004b586d197 100644
--- a/benchtests/bench-memcpy-large.c
+++ b/benchtests/bench-memcpy-large.c
@@ -19,10 +19,9 @@ 
 #ifndef MEMCPY_RESULT
 # define MEMCPY_RESULT(dst, len) dst
 # define START_SIZE (64 * 1024)
-# define MIN_PAGE_SIZE (getpagesize () + 32 * 1024 * 1024)
+# define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 # define TEST_MAIN
 # define TEST_NAME "memcpy"
-# define TIMEOUT (20 * 60)
 # include "bench-string.h"
 
 IMPL (memcpy, 1)
@@ -36,7 +35,7 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, const char *src,
 	     size_t len)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -59,12 +58,7 @@  do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len,
   char *s1, *s2;
   size_t repeats;
   align1 &= 4095;
-  if (align1 + len >= page_size)
-    return;
-
   align2 &= 4095;
-  if (align2 + len >= page_size)
-    return;
 
   s1 = (char *) (buf1 + align1);
   s2 = (char *) (buf2 + align2);
diff --git a/benchtests/bench-memmove-large.c b/benchtests/bench-memmove-large.c
index a09dd3678848a3bf8612732439700eb8ef5d82ea..fd504653f681b172800cc34e054cc745180aa4fa 100644
--- a/benchtests/bench-memmove-large.c
+++ b/benchtests/bench-memmove-large.c
@@ -16,12 +16,10 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define BASE_PAGE_SIZE (1024 * 1024)
-#define START_SIZE (4 * 1024)
+#define START_SIZE (64 * 1024)
 #define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #define TEST_MAIN
 #define TEST_NAME "memmove"
-#define TIMEOUT (20 * 60)
 #include "bench-string.h"
 #include "json-lib.h"
 
@@ -33,7 +31,7 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
 	     size_t len)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 8) / len;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -54,13 +52,8 @@  do_test (json_ctx_t *json_ctx, size_t align1, size_t align2, size_t len)
   size_t i, j;
   char *s1, *s2;
 
-  align1 &= 127;
-  if (align1 + len >= page_size)
-    return;
-
-  align2 &= 127;
-  if (align2 + len >= page_size)
-    return;
+  align1 &= 4095;
+  align2 &= 4095;
 
   s1 = (char *) (buf2 + align1);
   s2 = (char *) (buf2 + align2);
diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
index a1f33245d4960c1e82fc441b5faf322204035202..c3f9ee0cd75d3cb30f8b3df15672f895c111fc28 100644
--- a/benchtests/bench-memset-large.c
+++ b/benchtests/bench-memset-large.c
@@ -18,9 +18,8 @@ 
 
 #define TEST_MAIN
 #define TEST_NAME "memset"
-#define START_SIZE (128 * 1024)
-#define MIN_PAGE_SIZE (getpagesize () + 64 * 1024 * 1024)
-#define TIMEOUT (20 * 60)
+#define START_SIZE (64 * 1024)
+#define MIN_PAGE_SIZE (getpagesize () + 16 * 1024 * 1024)
 #include "bench-string.h"
 
 #include "json-lib.h"
@@ -35,7 +34,7 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
 	     int c __attribute ((unused)), size_t n)
 {
-  size_t i, iters = 16;
+  size_t i, iters = (MIN_PAGE_SIZE * 64) / n;
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
@@ -53,10 +52,6 @@  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s,
 static void
 do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
 {
-  align &= 63;
-  if ((align + len) * sizeof (CHAR) > page_size)
-    return;
-
   json_element_object_begin (json_ctx);
   json_attr_uint (json_ctx, "length", len);
   json_attr_uint (json_ctx, "alignment", align);
@@ -64,10 +59,7 @@  do_test (json_ctx_t *json_ctx, size_t align, int c, size_t len)
   json_array_begin (json_ctx, "timings");
 
   FOR_EACH_IMPL (impl, 0)
-    {
-      do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
-      alloc_bufs ();
-    }
+    do_one_test (json_ctx, impl, (CHAR *) (buf1) + align, c, len);
 
   json_array_end (json_ctx);
   json_element_object_end (json_ctx);