[1/3] benchtests: Fix walking sizes and directions for *-walk benchmarks

Message ID 1510204408-1739-2-git-send-email-siddhesh@sourceware.org
State New
Headers show
Series
  • memset zva optimization
Related show

Commit Message

Siddhesh Poyarekar Nov. 9, 2017, 5:13 a.m.
Make the walking benchmarks walk only backwards since copying both
ways is biased in favour of implementations that use non-temporal
stores for larger sizes; falkor is one of them.  This also fixes up
bugs in computation of the result which ended up multiplying the
length with the timing result unnecessarily.

	* benchtests/bench-memcpy-walk.c (do_one_test): Copy only
	backwards.  Fix timing computation.
	* benchtests/bench-memmove-walk.c (do_one_test): Likewise.
	* benchtests/bench-memset-walk.c (do_one_test): Walk backwards
	on memset by N at a time.  Fix timing computation.
---
 benchtests/bench-memcpy-walk.c  | 14 +++++---------
 benchtests/bench-memmove-walk.c | 15 +++++----------
 benchtests/bench-memset-walk.c  |  4 ++--
 3 files changed, 12 insertions(+), 21 deletions(-)

Comments

Siddhesh Poyarekar Nov. 14, 2017, 9:18 a.m. | #1
Any thoughts on this benchmark fix?  I'll push it by the end of the week
if there are no objections.

Siddhesh

On Thursday 09 November 2017 10:43 AM, Siddhesh Poyarekar wrote:
> Make the walking benchmarks walk only backwards since copying both
> ways is biased in favour of implementations that use non-temporal
> stores for larger sizes; falkor is one of them.  This also fixes up
> bugs in computation of the result which ended up multiplying the
> length with the timing result unnecessarily.
> 
> 	* benchtests/bench-memcpy-walk.c (do_one_test): Copy only
> 	backwards.  Fix timing computation.
> 	* benchtests/bench-memmove-walk.c (do_one_test): Likewise.
> 	* benchtests/bench-memset-walk.c (do_one_test): Walk backwards
> 	on memset by N at a time.  Fix timing computation.
> ---
>  benchtests/bench-memcpy-walk.c  | 14 +++++---------
>  benchtests/bench-memmove-walk.c | 15 +++++----------
>  benchtests/bench-memset-walk.c  |  4 ++--
>  3 files changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/benchtests/bench-memcpy-walk.c b/benchtests/bench-memcpy-walk.c
> index 69d467d..5b56341 100644
> --- a/benchtests/bench-memcpy-walk.c
> +++ b/benchtests/bench-memcpy-walk.c
> @@ -47,26 +47,22 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
>  	     size_t len)
>  {
> -  size_t i, iters = MIN_PAGE_SIZE / len;
> +  size_t i = 0;
>    timing_t start, stop, cur;
>  
>    char *dst_end = dst + MIN_PAGE_SIZE - len;
>    char *src_end = src + MIN_PAGE_SIZE - len;
>  
>    TIMING_NOW (start);
> -  /* Copy the entire buffer back and forth, LEN at a time.  */
> -  for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--)
> -    {
> -      CALL (impl, dst_end, src, len);
> -      CALL (impl, src, dst_end, len);
> -      i += 2;
> -    }
> +  /* Copy the entire buffer backwards, LEN at a time.  */
> +  for (; src_end >= src && dst_end >= dst; src_end -= len, dst_end -= len, i++)
> +    CALL (impl, src_end, dst_end, len);
>    TIMING_NOW (stop);
>  
>    TIMING_DIFF (cur, start, stop);
>  
>    /* Get time taken per function call.  */
> -  json_element_double (json_ctx, (double) cur * len / i);
> +  json_element_double (json_ctx, (double) cur / i);
>  }
>  
>  static void
> diff --git a/benchtests/bench-memmove-walk.c b/benchtests/bench-memmove-walk.c
> index 54dcd64..969ddd9 100644
> --- a/benchtests/bench-memmove-walk.c
> +++ b/benchtests/bench-memmove-walk.c
> @@ -47,26 +47,22 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
>  	     size_t len)
>  {
> -  size_t i, iters = MIN_PAGE_SIZE / len;
> +  size_t i = 0;
>    timing_t start, stop, cur;
>  
>    char *dst_end = dst + MIN_PAGE_SIZE - len;
>    char *src_end = src + MIN_PAGE_SIZE - len;
>  
>    TIMING_NOW (start);
> -  /* Copy the entire buffer back and forth, LEN at a time.  */
> -  for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--)
> -    {
> -      CALL (impl, dst_end, src, len);
> -      CALL (impl, src, dst_end, len);
> -      i += 2;
> -    }
> +  /* Copy the entire buffer backwards, LEN at a time.  */
> +  for (; src_end >= src && dst <= dst_end; dst += len, src_end -= len, i++)
> +    CALL (impl, dst, src_end, len);
>    TIMING_NOW (stop);
>  
>    TIMING_DIFF (cur, start, stop);
>  
>    /* Get time taken per function call.  */
> -  json_element_double (json_ctx, (double) cur * len / i);
> +  json_element_double (json_ctx, (double) cur / i);
>  }
>  
>  static void
> @@ -79,7 +75,6 @@ do_test (json_ctx_t *json_ctx, size_t len, bool overlap)
>    if (overlap)
>      buf2 = buf1;
>  
> -  /* First the non-overlapping moves.  */
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (json_ctx, impl, (char *) buf2, (char *) buf1, len);
>  
> diff --git a/benchtests/bench-memset-walk.c b/benchtests/bench-memset-walk.c
> index 59d2626..80fbe09 100644
> --- a/benchtests/bench-memset-walk.c
> +++ b/benchtests/bench-memset-walk.c
> @@ -66,14 +66,14 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, CHAR *s_end,
>    timing_t start, stop, cur;
>  
>    TIMING_NOW (start);
> -  for (i = 0; i < iters && s <= s_end; s++, i++)
> +  for (i = 0; i < iters && s <= s_end; s_end -= n, i++)
>      CALL (impl, s, c, n);
>    TIMING_NOW (stop);
>  
>    TIMING_DIFF (cur, start, stop);
>  
>    /* Get time taken per function call.  */
> -  json_element_double (json_ctx, (double) cur * n / i);
> +  json_element_double (json_ctx, (double) cur / i);
>  }
>  
>  static void
>
Siddhesh Poyarekar Nov. 20, 2017, 12:34 p.m. | #2
... and now pushed.

Siddhesh

On Thursday 09 November 2017 10:43 AM, Siddhesh Poyarekar wrote:
> Make the walking benchmarks walk only backwards since copying both
> ways is biased in favour of implementations that use non-temporal
> stores for larger sizes; falkor is one of them.  This also fixes up
> bugs in computation of the result which ended up multiplying the
> length with the timing result unnecessarily.
> 
> 	* benchtests/bench-memcpy-walk.c (do_one_test): Copy only
> 	backwards.  Fix timing computation.
> 	* benchtests/bench-memmove-walk.c (do_one_test): Likewise.
> 	* benchtests/bench-memset-walk.c (do_one_test): Walk backwards
> 	on memset by N at a time.  Fix timing computation.
> ---
>  benchtests/bench-memcpy-walk.c  | 14 +++++---------
>  benchtests/bench-memmove-walk.c | 15 +++++----------
>  benchtests/bench-memset-walk.c  |  4 ++--
>  3 files changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/benchtests/bench-memcpy-walk.c b/benchtests/bench-memcpy-walk.c
> index 69d467d..5b56341 100644
> --- a/benchtests/bench-memcpy-walk.c
> +++ b/benchtests/bench-memcpy-walk.c
> @@ -47,26 +47,22 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
>  	     size_t len)
>  {
> -  size_t i, iters = MIN_PAGE_SIZE / len;
> +  size_t i = 0;
>    timing_t start, stop, cur;
>  
>    char *dst_end = dst + MIN_PAGE_SIZE - len;
>    char *src_end = src + MIN_PAGE_SIZE - len;
>  
>    TIMING_NOW (start);
> -  /* Copy the entire buffer back and forth, LEN at a time.  */
> -  for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--)
> -    {
> -      CALL (impl, dst_end, src, len);
> -      CALL (impl, src, dst_end, len);
> -      i += 2;
> -    }
> +  /* Copy the entire buffer backwards, LEN at a time.  */
> +  for (; src_end >= src && dst_end >= dst; src_end -= len, dst_end -= len, i++)
> +    CALL (impl, src_end, dst_end, len);
>    TIMING_NOW (stop);
>  
>    TIMING_DIFF (cur, start, stop);
>  
>    /* Get time taken per function call.  */
> -  json_element_double (json_ctx, (double) cur * len / i);
> +  json_element_double (json_ctx, (double) cur / i);
>  }
>  
>  static void
> diff --git a/benchtests/bench-memmove-walk.c b/benchtests/bench-memmove-walk.c
> index 54dcd64..969ddd9 100644
> --- a/benchtests/bench-memmove-walk.c
> +++ b/benchtests/bench-memmove-walk.c
> @@ -47,26 +47,22 @@ static void
>  do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
>  	     size_t len)
>  {
> -  size_t i, iters = MIN_PAGE_SIZE / len;
> +  size_t i = 0;
>    timing_t start, stop, cur;
>  
>    char *dst_end = dst + MIN_PAGE_SIZE - len;
>    char *src_end = src + MIN_PAGE_SIZE - len;
>  
>    TIMING_NOW (start);
> -  /* Copy the entire buffer back and forth, LEN at a time.  */
> -  for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--)
> -    {
> -      CALL (impl, dst_end, src, len);
> -      CALL (impl, src, dst_end, len);
> -      i += 2;
> -    }
> +  /* Copy the entire buffer backwards, LEN at a time.  */
> +  for (; src_end >= src && dst <= dst_end; dst += len, src_end -= len, i++)
> +    CALL (impl, dst, src_end, len);
>    TIMING_NOW (stop);
>  
>    TIMING_DIFF (cur, start, stop);
>  
>    /* Get time taken per function call.  */
> -  json_element_double (json_ctx, (double) cur * len / i);
> +  json_element_double (json_ctx, (double) cur / i);
>  }
>  
>  static void
> @@ -79,7 +75,6 @@ do_test (json_ctx_t *json_ctx, size_t len, bool overlap)
>    if (overlap)
>      buf2 = buf1;
>  
> -  /* First the non-overlapping moves.  */
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (json_ctx, impl, (char *) buf2, (char *) buf1, len);
>  
> diff --git a/benchtests/bench-memset-walk.c b/benchtests/bench-memset-walk.c
> index 59d2626..80fbe09 100644
> --- a/benchtests/bench-memset-walk.c
> +++ b/benchtests/bench-memset-walk.c
> @@ -66,14 +66,14 @@ do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, CHAR *s_end,
>    timing_t start, stop, cur;
>  
>    TIMING_NOW (start);
> -  for (i = 0; i < iters && s <= s_end; s++, i++)
> +  for (i = 0; i < iters && s <= s_end; s_end -= n, i++)
>      CALL (impl, s, c, n);
>    TIMING_NOW (stop);
>  
>    TIMING_DIFF (cur, start, stop);
>  
>    /* Get time taken per function call.  */
> -  json_element_double (json_ctx, (double) cur * n / i);
> +  json_element_double (json_ctx, (double) cur / i);
>  }
>  
>  static void
>

Patch

diff --git a/benchtests/bench-memcpy-walk.c b/benchtests/bench-memcpy-walk.c
index 69d467d..5b56341 100644
--- a/benchtests/bench-memcpy-walk.c
+++ b/benchtests/bench-memcpy-walk.c
@@ -47,26 +47,22 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
 	     size_t len)
 {
-  size_t i, iters = MIN_PAGE_SIZE / len;
+  size_t i = 0;
   timing_t start, stop, cur;
 
   char *dst_end = dst + MIN_PAGE_SIZE - len;
   char *src_end = src + MIN_PAGE_SIZE - len;
 
   TIMING_NOW (start);
-  /* Copy the entire buffer back and forth, LEN at a time.  */
-  for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--)
-    {
-      CALL (impl, dst_end, src, len);
-      CALL (impl, src, dst_end, len);
-      i += 2;
-    }
+  /* Copy the entire buffer backwards, LEN at a time.  */
+  for (; src_end >= src && dst_end >= dst; src_end -= len, dst_end -= len, i++)
+    CALL (impl, src_end, dst_end, len);
   TIMING_NOW (stop);
 
   TIMING_DIFF (cur, start, stop);
 
   /* Get time taken per function call.  */
-  json_element_double (json_ctx, (double) cur * len / i);
+  json_element_double (json_ctx, (double) cur / i);
 }
 
 static void
diff --git a/benchtests/bench-memmove-walk.c b/benchtests/bench-memmove-walk.c
index 54dcd64..969ddd9 100644
--- a/benchtests/bench-memmove-walk.c
+++ b/benchtests/bench-memmove-walk.c
@@ -47,26 +47,22 @@  static void
 do_one_test (json_ctx_t *json_ctx, impl_t *impl, char *dst, char *src,
 	     size_t len)
 {
-  size_t i, iters = MIN_PAGE_SIZE / len;
+  size_t i = 0;
   timing_t start, stop, cur;
 
   char *dst_end = dst + MIN_PAGE_SIZE - len;
   char *src_end = src + MIN_PAGE_SIZE - len;
 
   TIMING_NOW (start);
-  /* Copy the entire buffer back and forth, LEN at a time.  */
-  for (i = 0; i < iters && dst_end >= dst && src <= src_end; src++, dst_end--)
-    {
-      CALL (impl, dst_end, src, len);
-      CALL (impl, src, dst_end, len);
-      i += 2;
-    }
+  /* Copy the entire buffer backwards, LEN at a time.  */
+  for (; src_end >= src && dst <= dst_end; dst += len, src_end -= len, i++)
+    CALL (impl, dst, src_end, len);
   TIMING_NOW (stop);
 
   TIMING_DIFF (cur, start, stop);
 
   /* Get time taken per function call.  */
-  json_element_double (json_ctx, (double) cur * len / i);
+  json_element_double (json_ctx, (double) cur / i);
 }
 
 static void
@@ -79,7 +75,6 @@  do_test (json_ctx_t *json_ctx, size_t len, bool overlap)
   if (overlap)
     buf2 = buf1;
 
-  /* First the non-overlapping moves.  */
   FOR_EACH_IMPL (impl, 0)
     do_one_test (json_ctx, impl, (char *) buf2, (char *) buf1, len);
 
diff --git a/benchtests/bench-memset-walk.c b/benchtests/bench-memset-walk.c
index 59d2626..80fbe09 100644
--- a/benchtests/bench-memset-walk.c
+++ b/benchtests/bench-memset-walk.c
@@ -66,14 +66,14 @@  do_one_test (json_ctx_t *json_ctx, impl_t *impl, CHAR *s, CHAR *s_end,
   timing_t start, stop, cur;
 
   TIMING_NOW (start);
-  for (i = 0; i < iters && s <= s_end; s++, i++)
+  for (i = 0; i < iters && s <= s_end; s_end -= n, i++)
     CALL (impl, s, c, n);
   TIMING_NOW (stop);
 
   TIMING_DIFF (cur, start, stop);
 
   /* Get time taken per function call.  */
-  json_element_double (json_ctx, (double) cur * n / i);
+  json_element_double (json_ctx, (double) cur / i);
 }
 
 static void