@@ -11707,6 +11707,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
return;
}
+ enum insn_code icode = optab_handler (vec_set_optab, mode);
+ gcc_assert (icode != CODE_FOR_nothing);
+
+ /* If there are only variable elements, try to optimize
+ the insertion using dup for the most common element
+ followed by insertions. */
+
+ /* The algorithm will fill matches[*][0] with the earliest matching element,
+ and matches[X][1] with the count of duplicate elements (if X is the
+ earliest element which has duplicates). */
+
+ if (n_var == n_elts && n_elts <= 16)
+ {
+ int matches[16][2] = {0};
+ for (int i = 0; i < n_elts; i++)
+ {
+ for (int j = 0; j <= i; j++)
+ {
+ if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
+ {
+ matches[i][0] = j;
+ matches[j][1]++;
+ break;
+ }
+ }
+ }
+ int maxelement = 0;
+ int maxv = 0;
+ for (int i = 0; i < n_elts; i++)
+ if (matches[i][1] > maxv)
+ {
+ maxelement = i;
+ maxv = matches[i][1];
+ }
+
+ /* Create a duplicate of the most common element. */
+ rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
+ aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
+
+ /* Insert the rest. */
+ for (int i = 0; i < n_elts; i++)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+ if (matches[i][0] == maxelement)
+ continue;
+ x = copy_to_mode_reg (inner_mode, x);
+ emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
+ }
+ return;
+ }
+
/* Initialise a vector which is part-variable. We want to first try
to build those lanes which are constant in the most efficient way we
can. */
@@ -11740,10 +11791,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
}
/* Insert the variable lanes directly. */
-
- enum insn_code icode = optab_handler (vec_set_optab, mode);
- gcc_assert (icode != CODE_FOR_nothing);
-
for (int i = 0; i < n_elts; i++)
{
rtx x = XVECEXP (vals, 0, i);
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(16)))
+
+vector float combine (float a, float b, float c, float d)
+{
+ return (vector float) { a, b, c, d };
+}
+
+/* { dg-final { scan-assembler-not "movi\t" } } */
+/* { dg-final { scan-assembler-not "orr\t" } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(16)))
+
+vector float combine (float a, float b, float d)
+{
+ return (vector float) { a, b, a, d };
+}
+
+/* { dg-final { scan-assembler-not "movi\t" } } */
+/* { dg-final { scan-assembler-not "orr\t" } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(16)))
+
+vector float combine (float a, float b)
+{
+ return (vector float) { a, b, a, b };
+}
+
+/* { dg-final { scan-assembler-not "movi\t" } } */
+/* { dg-final { scan-assembler-not "orr\t" } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(16)))
+
+vector float combine (float a, float b)
+{
+ return (vector float) { a, b, b, a };
+}
+
+/* { dg-final { scan-assembler-not "movi\t" } } */
+/* { dg-final { scan-assembler-not "orr\t" } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(16)))
+
+vector float combine (float a, float b)
+{
+ return (vector float) { a, b, a, a };
+}
+
+/* { dg-final { scan-assembler-not "movi\t" } } */
+/* { dg-final { scan-assembler-not "orr\t" } } */
Hi James, >> Could you make the testcase a bit more comprehensive? Modified the testcase considering all the possible cases. Split up the test based on different scenarios. Please review the patch and let us know if its okay? Thanks, Naveen