diff mbox

[AArch64] PR target/71663 Improve Vector Initializtion

Message ID CO2PR07MB2694C0205E7F6A6AD3AFDFDB83870@CO2PR07MB2694.namprd07.prod.outlook.com
State New
Headers show

Commit Message

Hurugalawadi, Naveen Dec. 9, 2016, 7:02 a.m. UTC
Hi,

Sorry. Missed out the testcase in patch submission.
Added the missing testcase along with the ChangeLog.
Please review the same and let us know if thats okay?

2016-12-09  Andrew PInski  <apinski@cavium.com>

gcc
        * config/aarch64/aarch64.c (aarch64_expand_vector_init):
        Improve vector initialization code gen.    
gcc/testsuite
        * gcc.target/aarch64/pr71663.c: New Testcase.

Comments

Hurugalawadi, Naveen Feb. 6, 2017, 6:46 a.m. UTC | #1
Hi,

Please consider this as a personal reminder to review the patch
at following link and let me know your comments on the same.

https://gcc.gnu.org/ml/gcc-patches/2016-12/msg00718.html

Thanks,
Naveen
Hurugalawadi, Naveen April 25, 2017, 6:29 a.m. UTC | #2
Hi,

Please consider this as a personal reminder to review the patch
at following link and let me know your comments on the same.

https://gcc.gnu.org/ml/gcc-patches/2016-12/msg00718.html

Thanks,
Naveen
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e87831f..da5b6fa 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11609,11 +11609,54 @@  aarch64_expand_vector_init (rtx target, rtx vals)
       aarch64_expand_vector_init (target, copy);
     }
 
-  /* Insert the variable lanes directly.  */
-
   enum insn_code icode = optab_handler (vec_set_optab, mode);
   gcc_assert (icode != CODE_FOR_nothing);
 
+  /* If there is only varables, try to optimize
+     the inseration using dup for the most common element
+     followed by insertations. */
+  if (n_var == n_elts && n_elts <= 16)
+    {
+      int matches[16][2];
+      int nummatches = 0;
+      memset (matches, 0, sizeof(matches));
+      for(int i = 0; i < n_elts; i++)
+	{
+	  for (int j = 0; j <= i; j++)
+	    {
+	      if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
+		{
+		  matches[i][0] = j;
+		  matches[j][1]++;
+		  if (i != j)
+		    nummatches++;
+		  break;
+		}
+	    }
+	}
+      int maxelement = 0;
+      int maxv = 0;
+      for (int i = 0; i < n_elts; i++)
+	if (matches[i][1] > maxv)
+	  maxelement = i, maxv = matches[i][1];
+
+      /* Create a duplicate of the most common element. */
+      rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
+      aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
+      /* Insert the rest. */
+      for (int i = 0; i < n_elts; i++)
+	{
+	  rtx x = XVECEXP (vals, 0, i);
+	  if (matches[i][0] == maxelement)
+	    continue;
+	  x = copy_to_mode_reg (inner_mode, x);
+	  emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
+	}
+      return;
+    }
+
+  /* Insert the variable lanes directly.  */
+
   for (int i = 0; i < n_elts; i++)
     {
       rtx x = XVECEXP (vals, 0, i);
diff --git a/gcc/testsuite/gcc.target/aarch64/pr71663.c b/gcc/testsuite/gcc.target/aarch64/pr71663.c
new file mode 100644
index 0000000..c8df847
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr71663.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(16)))
+
+vector float combine (float a, float b, float c, float d)
+{
+  return (vector float) { a, b, c, d };
+}
+
+/* { dg-final { scan-assembler-not "movi\t" } } */
+/* { dg-final { scan-assembler-not "orr\t" } } */
+/* { dg-final { scan-assembler-times "ins\t" 3 } } */
+/* { dg-final { scan-assembler-times "dup\t" 1 } } */