diff mbox series

rs6000: Add execution tests for mma builtins [v4]

Message ID 20200710204902.58429-1-acsawdey@linux.ibm.com
State New
Headers show
Series rs6000: Add execution tests for mma builtins [v4] | expand

Commit Message

Aaron Sawdey July 10, 2020, 8:49 p.m. UTC
This patch adds execution tests that use the MMA builtins and
check for the right answer, and new tests that checks whether
__builtin_cpu_supports and __builtin_cpu_is return sane
answers for power10.

I've now cleaned up and separated things out so there are 4 test cases:
* MMA single precision execution test
* MMA double precision execution test
* test that if effective-target is power10_hw, __builtin_cpu_is("power10")
  is also true.
* test that if effective-target is power10_hw,
  __builtin_cpu_supports("arch_3_1") is also true.

This establishes that the test environment correctly identifies itself,
and that it can execute MMA code and get the right answer.

A future patch will add an effective-target test for powerpc_mma_hw,
which these mma tests will also need to check for.

OK for trunk and backport to 10?

2020-06-30  Rajalakshmi Srinivasaraghavan  <rajis@linux.vnet.ibm.com>
	    Aaron Sawdey  <acsawdey@linux.ibm.com>

gcc/testsuite/
	* gcc.target/powerpc/p10-identify.c: New file.
	* gcc.target/powerpc/p10-arch31.c: New file.
	* gcc.target/powerpc/mma-single-test.c: New file.
	* gcc.target/powerpc/mma-double-test.c: New file.
---
 .../gcc.target/powerpc/mma-double-test.c      | 185 +++++++++++++++++
 .../gcc.target/powerpc/mma-single-test.c      | 193 ++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/p10-arch31.c |  25 +++
 .../gcc.target/powerpc/p10-identify.c         |  26 +++
 4 files changed, 429 insertions(+)
 create mode 100755 gcc/testsuite/gcc.target/powerpc/mma-double-test.c
 create mode 100755 gcc/testsuite/gcc.target/powerpc/mma-single-test.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-arch31.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-identify.c

Comments

Segher Boessenkool July 10, 2020, 9:06 p.m. UTC | #1
Hi!

On Fri, Jul 10, 2020 at 03:49:02PM -0500, Aaron Sawdey via Gcc-patches wrote:
> This patch adds execution tests that use the MMA builtins and
> check for the right answer, and new tests that checks whether
> __builtin_cpu_supports and __builtin_cpu_is return sane
> answers for power10.

> 2020-06-30  Rajalakshmi Srinivasaraghavan  <rajis@linux.vnet.ibm.com>
> 	    Aaron Sawdey  <acsawdey@linux.ibm.com>
> 
> gcc/testsuite/
> 	* gcc.target/powerpc/p10-identify.c: New file.
> 	* gcc.target/powerpc/p10-arch31.c: New file.
> 	* gcc.target/powerpc/mma-single-test.c: New file.
> 	* gcc.target/powerpc/mma-double-test.c: New file.

Okay for trunk, and for GCC 10 backport as well.  Thanks!


Segher
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
new file mode 100755
index 00000000000..9ba0010978f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -0,0 +1,185 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[3] ; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[2] ; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[1] ; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[0] ;
+
+void
+MMA (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+    {
+      double *CO;
+      double *AO;
+      AO = A;
+      CO = C;
+      C += m * 4;
+      for (int j = 0; j < m; j += 16)
+	{
+	  double *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      __vector_pair rowB;
+	      vec_t *rb = (vec_t *) & BO[i * 4];
+	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC (&acc1, m, 2);
+	  SAVE_ACC (&acc3, m, 6);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC (&acc5, m, 10);
+	  SAVE_ACC (&acc7, m, 14);
+	  AO += k * 16;
+	  BO += k * 4;
+	  CO += 16;
+	}
+      B += k * 4;
+    }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 4; t1 <= 16; t1 += 4)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  double A[rowsA * common];
+	  double B[common * colsB];
+	  double C[rowsA * colsB];
+	  double D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  MMA (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+  
+#ifdef VERBOSE
+  if (ret)
+    printf ("MMA double test fail: %d errors\n",ret);
+  else
+    printf ("MMA single test success: 0 MMA errors\n");
+#endif
+      
+  return ret;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
new file mode 100755
index 00000000000..aa71fa7f0af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
@@ -0,0 +1,193 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef float v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc,J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[3] ; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[2] ; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[1] ; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[0] ;
+
+#define SAVE_ACC1(ACC,ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[4* ldc+J]; \
+          rowC[0] += result[3] ; \
+          rowC = (v4sf_t *) &CO[5*ldc+J]; \
+          rowC[0] += result[2] ; \
+          rowC = (v4sf_t *) &CO[6*ldc+J]; \
+          rowC[0] += result[1] ; \
+          rowC = (v4sf_t *) &CO[7*ldc+J]; \
+	  rowC[0] += result[0] ;
+void
+MMA (int m, int n, int k, float *A, float *B, float *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 8)
+    {
+      float *CO;
+      float *AO;
+      AO = A;
+      CO = C;
+      C += m * 8;
+      for (int j = 0; j < m; j += 16)
+	{
+	  float *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      vec_t *rowB = (vec_t *) & BO[i * 8];
+	      __builtin_mma_xvf32gerpp (&acc0, rowB[0], rowA[0]);
+	      __builtin_mma_xvf32gerpp (&acc1, rowB[1], rowA[0]);
+	      __builtin_mma_xvf32gerpp (&acc2, rowB[0], rowA[1]);
+	      __builtin_mma_xvf32gerpp (&acc3, rowB[1], rowA[1]);
+	      __builtin_mma_xvf32gerpp (&acc4, rowB[0], rowA[2]);
+	      __builtin_mma_xvf32gerpp (&acc5, rowB[1], rowA[2]);
+	      __builtin_mma_xvf32gerpp (&acc6, rowB[0], rowA[3]);
+	      __builtin_mma_xvf32gerpp (&acc7, rowB[1], rowA[3]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC1 (&acc1, m, 0);
+	  SAVE_ACC1 (&acc3, m, 4);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC1 (&acc5, m, 8);
+	  SAVE_ACC1 (&acc7, m, 12);
+	  AO += k * 16;
+	  BO += k * 8;
+	  CO += 16;
+	}
+      B += k * 8;
+    }
+}
+
+void
+init (float *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (float *matrix, float *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const float *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 8; t1 <= 16; t1 += 8)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  float A[rowsA * common];
+	  float B[common * colsB];
+	  float C[rowsA * colsB];
+	  float D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  MMA (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+
+#ifdef VERBOSE
+  if (ret)
+    printf ("MMA single test fail: %d errors\n",ret);
+  else
+    printf ("MMA single test success: 0 MMA errors\n");
+#endif
+      
+  return ret;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-arch31.c b/gcc/testsuite/gcc.target/powerpc/p10-arch31.c
new file mode 100644
index 00000000000..a9a75ec83f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-arch31.c
@@ -0,0 +1,25 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* This test will only run when the power10_hw_available test passes.
+   If that test passes, then we expect to see that ISA 3.1 is
+   supported.  If this is not the case, then the test environment has
+   problems.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main (int argc, char *argv[])
+{
+  int ret = 0;
+#ifdef __BUILTIN_CPU_SUPPORTS__
+  if ( !__builtin_cpu_supports ("arch_3_1"))
+    {
+      printf ("Error: __builtin_cpu_supports says arch_3_1 not supported, but power10_hw test passed.\n");
+      ret++;
+    }
+#endif
+  return ret;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-identify.c b/gcc/testsuite/gcc.target/powerpc/p10-identify.c
new file mode 100644
index 00000000000..85326976a5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-identify.c
@@ -0,0 +1,26 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* This test will only run when the power10_hw_available test passes.
+   If that test passes, then we expect to see that the cpu is Power10.
+   If this is not the case, then the test environment has problems.
+   If in the future there are cpus that pass the power10_hw test but
+   are not power10, they will need to be added to this check. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main (int argc, char *argv[])
+{
+  int ret = 0;
+#ifdef __BUILTIN_CPU_SUPPORTS__
+  if ( !__builtin_cpu_is ("power10"))
+    {
+      printf ("Error: __builtin_cpu_is says this is not power10, but power10_hw test passed.\n");
+      ret++;
+    }
+#endif
+  return ret;
+}