diff mbox series

[v4] string: Adds tests for test-strncasecmp and test-strncpy

Message ID 20200610185518.15281-1-rzinsly@linux.ibm.com
State New
Headers show
Series [v4] string: Adds tests for test-strncasecmp and test-strncpy | expand

Commit Message

Raphael M Zinsly June 10, 2020, 6:55 p.m. UTC
A couple more changes based on Adhemerval's review, thanks.

Changes since v3:
	- Using a cache line size of 64 if the system doesn't have meaningful
	information on _SC_LEVEL1_DCACHE_LINESIZE.
	- Using buf2 instead of a buf1 copy for s2 at
	string/test-strncasecmp.c.

--- >8 ---

Adds tests to check if strings placed at page bondaries are
handled correctly by strncasecmp and strncpy similar to tests
for strncmp and strnlen.
---
 string/test-strncasecmp.c | 43 +++++++++++++++++++++++++++++++++++++++
 string/test-strncpy.c     | 41 +++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

Comments

Paul A. Clarke June 11, 2020, 4:33 p.m. UTC | #1
On Wed, Jun 10, 2020 at 03:55:18PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote:
> A couple more changes based on Adhemerval's review, thanks.
> 
> Changes since v3:
> 	- Using a cache line size of 64 if the system doesn't have meaningful
> 	information on _SC_LEVEL1_DCACHE_LINESIZE.
> 	- Using buf2 instead of a buf1 copy for s2 at
> 	string/test-strncasecmp.c.
> 
> --- >8 ---
> 
> Adds tests to check if strings placed at page bondaries are
> handled correctly by strncasecmp and strncpy similar to tests
> for strncmp and strnlen.
> ---
>  string/test-strncasecmp.c | 43 +++++++++++++++++++++++++++++++++++++++
>  string/test-strncpy.c     | 41 +++++++++++++++++++++++++++++++++++++
>  2 files changed, 84 insertions(+)
> 
> diff --git a/string/test-strncasecmp.c b/string/test-strncasecmp.c
> index 6a9c27beae..63f6d59ef2 100644
> --- a/string/test-strncasecmp.c
> +++ b/string/test-strncasecmp.c
> @@ -137,6 +137,48 @@ do_test (size_t align1, size_t align2, size_t n, size_t len, int max_char,
>      do_one_test (impl, s1, s2, n, exp_result);
>  }
> 
> +static void
> +do_page_tests (void)
> +{
> +  size_t i, offset, cacheline_size;
> +  char *s1, *s2;
> +  int exp_result;
> +
> +  offset = page_size - 1;
> +  s1 = (char *) buf1;
> +  s2 = (char *) buf2;
> +  memset (s1, '\1', offset);
> +  memset (s2, '\1', offset);
> +  s1[offset] = '\0';
> +  s2[offset] = '\0';
> +
> +  /* s2 has a fixed offset, half page long.
> +     page_size is actually 2 * getpagesize.  */
> +  offset = page_size / 4;
> +  s2 += offset;
> +  /* Start offset for s1 at half of the second page.  */
> +  offset = 3 * page_size / 4;
> +  s1 += offset;
> +
> +  /* Try to cross the page boundary at every offset of a cache line.  */
> +  cacheline_size = sysconf (_SC_LEVEL1_DCACHE_LINESIZE);
> +  /* Use 64 as default.  */
> +  if(cacheline_size == 0)
> +    cacheline_size = 64;

A little bikeshedding here, but why not pick the largest common cache line
size, which would obvioulsy be a superset of all the rest?  Say, 128?

Also, the comment is probably not needed.  Indeed, you could coalesce:
  if ((cacheline_size = sysconf (_SC_LEVEL1_DCACHE_LINESIZE)) == 0)
    cacheline_size = 128;

> +  for (i = 0; i < cacheline_size; ++i)
> +    {
> +      exp_result = *s1;
> +
> +      FOR_EACH_IMPL (impl, 0)
> +	{
> +	  check_result (impl, s1, s2, page_size, -exp_result);
> +	  check_result (impl, s2, s1, page_size, exp_result);
> +	}
> +
> +      s1++;
> +    }
> +}
> +
>  static void
>  do_random_tests (void)
>  {
> @@ -334,6 +376,7 @@ test_locale (const char *locale)
>      }
> 
>    do_random_tests ();
> +  do_page_tests ();
>  }
> 
>  int
> diff --git a/string/test-strncpy.c b/string/test-strncpy.c
> index c978753ad8..051257f200 100644
> --- a/string/test-strncpy.c
> +++ b/string/test-strncpy.c
> @@ -155,6 +155,46 @@ do_test (size_t align1, size_t align2, size_t len, size_t n, int max_char)
>      do_one_test (impl, s2, s1, len, n);
>  }
> 
> +static void
> +do_page_tests (void)
> +{
> +  size_t i, small_len, big_len, short_offset, long_offset, cacheline_size;
> +  CHAR *s1, *s2;
> +  /* Calculate the null character offset.  */
> +  size_t last_offset = (page_size / sizeof (CHAR)) - 1;
> +
> +  s2 = (CHAR *) buf1;
> +  s1 = (CHAR *) buf2;
> +  MEMSET (s1, '\1', last_offset);
> +  s1[last_offset] = '\0';
> +
> +  long_offset = (last_offset + 1) / 2;
> +  short_offset = last_offset;
> +
> +  /* Try to cross the page boundary at every offset of a cache line.  */
> +  cacheline_size = sysconf (_SC_LEVEL1_DCACHE_LINESIZE);
> +  /* Use 64 as default.  */
> +  if(cacheline_size == 0)
> +    cacheline_size = 64;

same.

> +  for (i = 0; i < cacheline_size; i++)
> +    {
> +      /* Place long strings ending at page boundary.  */
> +      long_offset++;
> +      big_len = last_offset - long_offset;
> +      /* Place short strings ending at page boundary.  */
> +      short_offset--;
> +      small_len = last_offset - short_offset;
> +
> +      FOR_EACH_IMPL (impl, 0)
> +        {
> +	  do_one_test (impl, s2, (CHAR *) (s1 + short_offset), small_len,
> +	               small_len);
> +	  do_one_test (impl, s2, (CHAR *) (s1 + long_offset), page_size,
> +	               big_len);
> +	}
> +    }
> +}
> +
>  static void
>  do_random_tests (void)
>  {
> @@ -317,6 +357,7 @@ test_main (void)
>      }
> 
>    do_random_tests ();
> +  do_page_tests ();
>    return ret;
>  }
> 

LGTM with above nits addressed.

PC
Adhemerval Zanella Netto June 11, 2020, 5:36 p.m. UTC | #2
On 11/06/2020 13:33, Paul A. Clarke via Libc-alpha wrote:
> On Wed, Jun 10, 2020 at 03:55:18PM -0300, Raphael Moreira Zinsly via Libc-alpha wrote:
>> A couple more changes based on Adhemerval's review, thanks.
>>
>> Changes since v3:
>> 	- Using a cache line size of 64 if the system doesn't have meaningful
>> 	information on _SC_LEVEL1_DCACHE_LINESIZE.
>> 	- Using buf2 instead of a buf1 copy for s2 at
>> 	string/test-strncasecmp.c.
>>
>> --- >8 ---
>>
>> Adds tests to check if strings placed at page bondaries are
>> handled correctly by strncasecmp and strncpy similar to tests
>> for strncmp and strnlen.
>> ---
>>  string/test-strncasecmp.c | 43 +++++++++++++++++++++++++++++++++++++++
>>  string/test-strncpy.c     | 41 +++++++++++++++++++++++++++++++++++++
>>  2 files changed, 84 insertions(+)
>>
>> diff --git a/string/test-strncasecmp.c b/string/test-strncasecmp.c
>> index 6a9c27beae..63f6d59ef2 100644
>> --- a/string/test-strncasecmp.c
>> +++ b/string/test-strncasecmp.c
>> @@ -137,6 +137,48 @@ do_test (size_t align1, size_t align2, size_t n, size_t len, int max_char,
>>      do_one_test (impl, s1, s2, n, exp_result);
>>  }
>>
>> +static void
>> +do_page_tests (void)
>> +{
>> +  size_t i, offset, cacheline_size;
>> +  char *s1, *s2;
>> +  int exp_result;
>> +
>> +  offset = page_size - 1;
>> +  s1 = (char *) buf1;
>> +  s2 = (char *) buf2;
>> +  memset (s1, '\1', offset);
>> +  memset (s2, '\1', offset);
>> +  s1[offset] = '\0';
>> +  s2[offset] = '\0';
>> +
>> +  /* s2 has a fixed offset, half page long.
>> +     page_size is actually 2 * getpagesize.  */
>> +  offset = page_size / 4;
>> +  s2 += offset;
>> +  /* Start offset for s1 at half of the second page.  */
>> +  offset = 3 * page_size / 4;
>> +  s1 += offset;
>> +
>> +  /* Try to cross the page boundary at every offset of a cache line.  */
>> +  cacheline_size = sysconf (_SC_LEVEL1_DCACHE_LINESIZE);
>> +  /* Use 64 as default.  */
>> +  if(cacheline_size == 0)
>> +    cacheline_size = 64;
> 
> A little bikeshedding here, but why not pick the largest common cache line
> size, which would obvioulsy be a superset of all the rest?  Say, 128?
> 
> Also, the comment is probably not needed.  Indeed, you could coalesce:
>   if ((cacheline_size = sysconf (_SC_LEVEL1_DCACHE_LINESIZE)) == 0)
>     cacheline_size = 128;

In fact I am not sure how relevant is dcache line size on most string
implementations, even for powerpc only some memset variants does use
this information to push for some optimizations.

I think what we need to test here is multiple string offsets based on
usual register sizes used for implementations (say SSE, AVX, AVX2,
Altivec, VSX, etc.) with multiple permutations on both input sizes.

Something like:

  /* Assumes up to 512-bit wide read/stores.  */
  const size_t maxoffset = 64;

  s1 = buf1 + (BUF1PAGES) * page_size - maxoffset;
  memset (s1, 'a', maxoffset - 1);
  s1[maxoffset - 1] = '\0';

  s2 = buf2 + page_size - maxoffset;
  memset (s2, 'a', maxoffset - 1);
  s2[maxoffset - 1] = '\0';

  /* At this point s1 and s2 points to distinct memory regions containing
     "aa..." with size of 127 plus '\0'.  Also, both string are bounded
     to a page with write/read access and the next page is protected with 
     PROT_NONE (meaning that any access outside of the page regions will 
     trigger an invalid memory access).

     The loop checks for all possible offset up to maxoffset for both
     inputs with a size larger than the string (so memory access outsize
     the expected memory regions might trigger invalid access).  */
  
  for (size_t off1 = 0; off1 < maxoffset; off1++)
    {
      for (size_t off2 = 0; off2 < maxoffset; off2++)
        {
           exp_result = off1 == off2
			? 0
			: off1 < off2
			  ? 'a'
			  : -'a';

	   FOR_EACH_IMPL (impl, 0)
	     check_result (impl, s1, s2, maxoffset + 1, exp_result)
        }
    }
  

> 
>> +  for (i = 0; i < cacheline_size; ++i)
>> +    {
>> +      exp_result = *s1;
>> +
>> +      FOR_EACH_IMPL (impl, 0)
>> +	{
>> +	  check_result (impl, s1, s2, page_size, -exp_result);
>> +	  check_result (impl, s2, s1, page_size, exp_result);
>> +	}
>> +
>> +      s1++;
>> +    }
>> +}
>> +
>>  static void
>>  do_random_tests (void)
>>  {
>> @@ -334,6 +376,7 @@ test_locale (const char *locale)
>>      }
>>
>>    do_random_tests ();
>> +  do_page_tests ();
>>  }
>>
>>  int
>> diff --git a/string/test-strncpy.c b/string/test-strncpy.c
>> index c978753ad8..051257f200 100644
>> --- a/string/test-strncpy.c
>> +++ b/string/test-strncpy.c
>> @@ -155,6 +155,46 @@ do_test (size_t align1, size_t align2, size_t len, size_t n, int max_char)
>>      do_one_test (impl, s2, s1, len, n);
>>  }
>>
>> +static void
>> +do_page_tests (void)
>> +{
>> +  size_t i, small_len, big_len, short_offset, long_offset, cacheline_size;
>> +  CHAR *s1, *s2;
>> +  /* Calculate the null character offset.  */
>> +  size_t last_offset = (page_size / sizeof (CHAR)) - 1;
>> +
>> +  s2 = (CHAR *) buf1;
>> +  s1 = (CHAR *) buf2;
>> +  MEMSET (s1, '\1', last_offset);
>> +  s1[last_offset] = '\0';
>> +
>> +  long_offset = (last_offset + 1) / 2;
>> +  short_offset = last_offset;
>> +
>> +  /* Try to cross the page boundary at every offset of a cache line.  */
>> +  cacheline_size = sysconf (_SC_LEVEL1_DCACHE_LINESIZE);
>> +  /* Use 64 as default.  */
>> +  if(cacheline_size == 0)
>> +    cacheline_size = 64;
> 
> same.
> 
>> +  for (i = 0; i < cacheline_size; i++)
>> +    {
>> +      /* Place long strings ending at page boundary.  */
>> +      long_offset++;
>> +      big_len = last_offset - long_offset;
>> +      /* Place short strings ending at page boundary.  */
>> +      short_offset--;
>> +      small_len = last_offset - short_offset;
>> +
>> +      FOR_EACH_IMPL (impl, 0)
>> +        {
>> +	  do_one_test (impl, s2, (CHAR *) (s1 + short_offset), small_len,
>> +	               small_len);
>> +	  do_one_test (impl, s2, (CHAR *) (s1 + long_offset), page_size,
>> +	               big_len);
>> +	}
>> +    }
>> +}
>> +
>>  static void
>>  do_random_tests (void)
>>  {
>> @@ -317,6 +357,7 @@ test_main (void)
>>      }
>>
>>    do_random_tests ();
>> +  do_page_tests ();
>>    return ret;
>>  }
>>
> 
> LGTM with above nits addressed.
> 
> PC
>
diff mbox series

Patch

diff --git a/string/test-strncasecmp.c b/string/test-strncasecmp.c
index 6a9c27beae..63f6d59ef2 100644
--- a/string/test-strncasecmp.c
+++ b/string/test-strncasecmp.c
@@ -137,6 +137,48 @@  do_test (size_t align1, size_t align2, size_t n, size_t len, int max_char,
     do_one_test (impl, s1, s2, n, exp_result);
 }
 
+static void
+do_page_tests (void)
+{
+  size_t i, offset, cacheline_size;
+  char *s1, *s2;
+  int exp_result;
+
+  offset = page_size - 1;
+  s1 = (char *) buf1;
+  s2 = (char *) buf2;
+  memset (s1, '\1', offset);
+  memset (s2, '\1', offset);
+  s1[offset] = '\0';
+  s2[offset] = '\0';
+
+  /* s2 has a fixed offset, half page long.
+     page_size is actually 2 * getpagesize.  */
+  offset = page_size / 4;
+  s2 += offset;
+  /* Start offset for s1 at half of the second page.  */
+  offset = 3 * page_size / 4;
+  s1 += offset;
+
+  /* Try to cross the page boundary at every offset of a cache line.  */
+  cacheline_size = sysconf (_SC_LEVEL1_DCACHE_LINESIZE);
+  /* Use 64 as default.  */
+  if(cacheline_size == 0)
+    cacheline_size = 64;
+  for (i = 0; i < cacheline_size; ++i)
+    {
+      exp_result = *s1;
+
+      FOR_EACH_IMPL (impl, 0)
+	{
+	  check_result (impl, s1, s2, page_size, -exp_result);
+	  check_result (impl, s2, s1, page_size, exp_result);
+	}
+
+      s1++;
+    }
+}
+
 static void
 do_random_tests (void)
 {
@@ -334,6 +376,7 @@  test_locale (const char *locale)
     }
 
   do_random_tests ();
+  do_page_tests ();
 }
 
 int
diff --git a/string/test-strncpy.c b/string/test-strncpy.c
index c978753ad8..051257f200 100644
--- a/string/test-strncpy.c
+++ b/string/test-strncpy.c
@@ -155,6 +155,46 @@  do_test (size_t align1, size_t align2, size_t len, size_t n, int max_char)
     do_one_test (impl, s2, s1, len, n);
 }
 
+static void
+do_page_tests (void)
+{
+  size_t i, small_len, big_len, short_offset, long_offset, cacheline_size;
+  CHAR *s1, *s2;
+  /* Calculate the null character offset.  */
+  size_t last_offset = (page_size / sizeof (CHAR)) - 1;
+
+  s2 = (CHAR *) buf1;
+  s1 = (CHAR *) buf2;
+  MEMSET (s1, '\1', last_offset);
+  s1[last_offset] = '\0';
+
+  long_offset = (last_offset + 1) / 2;
+  short_offset = last_offset;
+
+  /* Try to cross the page boundary at every offset of a cache line.  */
+  cacheline_size = sysconf (_SC_LEVEL1_DCACHE_LINESIZE);
+  /* Use 64 as default.  */
+  if(cacheline_size == 0)
+    cacheline_size = 64;
+  for (i = 0; i < cacheline_size; i++)
+    {
+      /* Place long strings ending at page boundary.  */
+      long_offset++;
+      big_len = last_offset - long_offset;
+      /* Place short strings ending at page boundary.  */
+      short_offset--;
+      small_len = last_offset - short_offset;
+
+      FOR_EACH_IMPL (impl, 0)
+        {
+	  do_one_test (impl, s2, (CHAR *) (s1 + short_offset), small_len,
+	               small_len);
+	  do_one_test (impl, s2, (CHAR *) (s1 + long_offset), page_size,
+	               big_len);
+	}
+    }
+}
+
 static void
 do_random_tests (void)
 {
@@ -317,6 +357,7 @@  test_main (void)
     }
 
   do_random_tests ();
+  do_page_tests ();
   return ret;
 }