diff mbox series

[v2,3/3] powerpc:selftest update memcmp_64 selftest for VMX implementation

Message ID 1505950480-14830-4-git-send-email-wei.guo.simon@gmail.com (mailing list archive)
State Superseded
Headers show
Series powerpc/64: memcmp() optimization | expand

Commit Message

Simon Guo Sept. 20, 2017, 11:34 p.m. UTC
From: Simon Guo <wei.guo.simon@gmail.com>

This patch adjust selftest memcmp_64 so that memcmp selftest can be
compiled successfully.

It also adds testcases for memcmp over 4K bytes size.

Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
---
 .../selftests/powerpc/copyloops/asm/ppc_asm.h      |  2 +-
 .../selftests/powerpc/stringloops/asm/ppc_asm.h    | 31 +++++++++++
 .../testing/selftests/powerpc/stringloops/memcmp.c | 63 +++++++++++++++-------
 3 files changed, 75 insertions(+), 21 deletions(-)

Comments

Simon Guo Sept. 24, 2017, 6:19 a.m. UTC | #1
Hi David,
On Mon, Sep 25, 2017 at 09:30:28AM +0000, David Laight wrote:
> From: wei.guo.simon@gmail.com
> > Sent: 21 September 2017 00:35
> > This patch adjust selftest memcmp_64 so that memcmp selftest can be
> > compiled successfully.
> ...
> >  #define ITERATIONS 10000
> > 
> > +#define LARGE_SIZE (5 * 1024)
> > +#define LARGE_ITERATIONS 1000
> ...
> 
> Measuring performance by doing a lot of iterations isn't ideal
> and is pretty pointless.
> Cold cache performance can be more useful.
> Also you don't really want any dynamic branch prediction logic
> tuned to the exact test you keep doing.

I think the (orignal) selftest aims at full coverage of functionality
correctness, since each iteration generates a new data set by random.

Thanks,
- Simon
David Laight Sept. 25, 2017, 9:30 a.m. UTC | #2
From: wei.guo.simon@gmail.com
> Sent: 21 September 2017 00:35
> This patch adjust selftest memcmp_64 so that memcmp selftest can be
> compiled successfully.
...
>  #define ITERATIONS 10000
> 
> +#define LARGE_SIZE (5 * 1024)
> +#define LARGE_ITERATIONS 1000
...

Measuring performance by doing a lot of iterations isn't ideal
and is pretty pointless.
Cold cache performance can be more useful.
Also you don't really want any dynamic branch prediction logic
tuned to the exact test you keep doing.

	David
diff mbox series

Patch

diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
index 80d34a9..a9da02d 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -35,7 +35,7 @@ 
 	li	r3,0
 	blr
 
-FUNC_START(enter_vmx_copy)
+FUNC_START(enter_vmx_ops)
 	li	r3,1
 	blr
 
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
index 11bece8..c8bb360 100644
--- a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
@@ -1,3 +1,5 @@ 
+#ifndef _PPC_ASM_H
+#define __PPC_ASM_H
 #include <ppc-asm.h>
 
 #ifndef r1
@@ -5,3 +7,32 @@ 
 #endif
 
 #define _GLOBAL(A) FUNC_START(test_ ## A)
+
+#define CONFIG_ALTIVEC
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE	256
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
+#define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) _GLOBAL(A)
+
+#define PPC_MTOCRF(A, B)	mtocrf A, B
+
+FUNC_START(enter_vmx_ops)
+	li      r3, 1
+	blr
+
+#endif
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index 30b1222..4826669 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -1,20 +1,27 @@ 
 #include <malloc.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 #include "utils.h"
 
 #define SIZE 256
 #define ITERATIONS 10000
 
+#define LARGE_SIZE (5 * 1024)
+#define LARGE_ITERATIONS 1000
+#define LARGE_MAX_OFFSET 16
+#define LARGE_SIZE_START 4096
+
 int test_memcmp(const void *s1, const void *s2, size_t n);
 
 /* test all offsets and lengths */
-static void test_one(char *s1, char *s2)
+static void test_one(char *s1, char *s2, unsigned long max_offset,
+		unsigned long size_start)
 {
 	unsigned long offset, size;
 
-	for (offset = 0; offset < SIZE; offset++) {
-		for (size = 0; size < (SIZE-offset); size++) {
+	for (offset = 0; offset < max_offset; offset++) {
+		for (size = size_start; size < (SIZE-offset); size++) {
 			int x, y;
 			unsigned long i;
 
@@ -38,66 +45,82 @@  static void test_one(char *s1, char *s2)
 	}
 }
 
-static int testcase(void)
+static int testcase(bool islarge)
 {
 	char *s1;
 	char *s2;
 	unsigned long i;
 
-	s1 = memalign(128, SIZE);
+	unsigned long alloc_size = islarge ? LARGE_SIZE : SIZE;
+	int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+	s1 = memalign(128, alloc_size);
 	if (!s1) {
 		perror("memalign");
 		exit(1);
 	}
 
-	s2 = memalign(128, SIZE);
+	s2 = memalign(128, alloc_size);
 	if (!s2) {
 		perror("memalign");
 		exit(1);
 	}
 
-	srandom(1);
+	srandom(time(0));
 
-	for (i = 0; i < ITERATIONS; i++) {
+	for (i = 0; i < iterations; i++) {
 		unsigned long j;
 		unsigned long change;
 
-		for (j = 0; j < SIZE; j++)
+		for (j = 0; j < alloc_size; j++)
 			s1[j] = random();
 
-		memcpy(s2, s1, SIZE);
+		memcpy(s2, s1, alloc_size);
 
 		/* change one byte */
-		change = random() % SIZE;
+		change = random() % alloc_size;
 		s2[change] = random() & 0xff;
 
-		test_one(s1, s2);
+		if (islarge)
+			test_one(s1, s2, LARGE_MAX_OFFSET, LARGE_SIZE_START);
+		else
+			test_one(s1, s2, SIZE, 0);
 	}
 
-	srandom(1);
+	srandom(time(0));
 
-	for (i = 0; i < ITERATIONS; i++) {
+	for (i = 0; i < iterations; i++) {
 		unsigned long j;
 		unsigned long change;
 
-		for (j = 0; j < SIZE; j++)
+		for (j = 0; j < alloc_size; j++)
 			s1[j] = random();
 
-		memcpy(s2, s1, SIZE);
+		memcpy(s2, s1, alloc_size);
 
 		/* change multiple bytes, 1/8 of total */
-		for (j = 0; j < SIZE / 8; j++) {
-			change = random() % SIZE;
+		for (j = 0; j < alloc_size / 8; j++) {
+			change = random() % alloc_size;
 			s2[change] = random() & 0xff;
 		}
 
-		test_one(s1, s2);
+		if (islarge)
+			test_one(s1, s2, LARGE_MAX_OFFSET, LARGE_SIZE_START);
+		else
+			test_one(s1, s2, SIZE, 0);
 	}
 
 	return 0;
 }
 
+static int testcases(void)
+{
+	testcase(0);
+	testcase(1);
+	return 0;
+}
+
 int main(void)
 {
-	return test_harness(testcase, "memcmp");
+	return test_harness(testcases, "memcmp");
 }