From b2bc0bf3a8ee17d53bf39f0aeabe7025b33e9c96 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 5 Feb 2019 15:39:27 -0800
Subject: [PATCH] Optimize vector constructor
We can optimize vector constructor with vector copy or permute followed
by a single scalar insert:
__v4sf y;
__v4sf D.1930;
float _1;
float _2;
float _3;
<bb 2> :
_1 = BIT_FIELD_REF <x_9(D), 32, 96>;
_2 = BIT_FIELD_REF <x_9(D), 32, 64>;
_3 = BIT_FIELD_REF <x_9(D), 32, 32>;
y_6 = {f_5(D), _3, _2, _1};
return y_6;
with
__v4sf y;
__v4sf D.1930;
float _1;
float _2;
float _3;
vector(4) float _8;
<bb 2> :
_1 = BIT_FIELD_REF <x_9(D), 32, 96>;
_2 = BIT_FIELD_REF <x_9(D), 32, 64>;
_3 = BIT_FIELD_REF <x_9(D), 32, 32>;
_8 = x_9(D);
y_6 = BIT_INSERT_EXPR <x_9(D), f_5(D), 0 (32 bits)>;
return y_6;
gcc/
PR tree-optimization/88828
* tree-ssa-forwprop.c (simplify_vector_constructor): Optimize
vector init constructor with vector copy or permute followed
by a single scalar insert.
gcc/testsuite/
PR tree-optimization/88828
* gcc.target/i386/pr88828-1.c: New test.
* gcc.target/i386/pr88828-1a.c: Likewise.
* gcc.target/i386/pr88828-1b.c: Likewise.
* gcc.target/i386/pr88828-1c.c: Likewise.
* gcc.target/i386/pr88828-2.c: Likewise.
* gcc.target/i386/pr88828-2a.c: Likewise.
* gcc.target/i386/pr88828-2b.c: Likewise.
* gcc.target/i386/pr88828-2c.c: Likewise.
* gcc.target/i386/pr88828-2d.c: Likewise.
* gcc.target/i386/pr88828-3.c: Likewise.
* gcc.target/i386/pr88828-3a.c: Likewise.
* gcc.target/i386/pr88828-3b.c: Likewise.
* gcc.target/i386/pr88828-3c.c: Likewise.
* gcc.target/i386/pr88828-3d.c: Likewise.
* gcc.target/i386/pr88828-4a.c: Likewise.
* gcc.target/i386/pr88828-4b.c: Likewise.
* gcc.target/i386/pr88828-5a.c: Likewise.
* gcc.target/i386/pr88828-5b.c: Likewise.
---
gcc/testsuite/gcc.target/i386/pr88828-1.c | 49 +++++++++++++
gcc/testsuite/gcc.target/i386/pr88828-1a.c | 17 +++++
gcc/testsuite/gcc.target/i386/pr88828-1b.c | 23 ++++++
gcc/testsuite/gcc.target/i386/pr88828-1c.c | 18 +++++
gcc/testsuite/gcc.target/i386/pr88828-2.c | 51 +++++++++++++
gcc/testsuite/gcc.target/i386/pr88828-2a.c | 17 +++++
gcc/testsuite/gcc.target/i386/pr88828-2b.c | 19 +++++
gcc/testsuite/gcc.target/i386/pr88828-2c.c | 23 ++++++
gcc/testsuite/gcc.target/i386/pr88828-2d.c | 25 +++++++
gcc/testsuite/gcc.target/i386/pr88828-3.c | 54 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr88828-3a.c | 17 +++++
gcc/testsuite/gcc.target/i386/pr88828-3b.c | 19 +++++
gcc/testsuite/gcc.target/i386/pr88828-3c.c | 25 +++++++
gcc/testsuite/gcc.target/i386/pr88828-4a.c | 18 +++++
gcc/testsuite/gcc.target/i386/pr88828-4b.c | 21 ++++++
gcc/testsuite/gcc.target/i386/pr88828-5a.c | 18 +++++
gcc/testsuite/gcc.target/i386/pr88828-5b.c | 20 +++++
gcc/tree-ssa-forwprop.c | 85 +++++++++++++++++++---
18 files changed, 509 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1c.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-2a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-2b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-2c.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-2d.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3c.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-4a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-4b.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-5a.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-5b.c
new file mode 100644
@@ -0,0 +1,49 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "pr88828-1a.c"
+#include "pr88828-1b.c"
+#include "pr88828-1c.c"
+
+extern void abort ();
+
+void
+do_check (__v4sf y, float f[4], float z)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (i == 0)
+ {
+ if (y[i] != z)
+ abort ();
+ }
+ else
+ {
+ if (y[i] != f[i])
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ float f[4] = { -11, 2, 55553, -4 };
+ float z = 134567;
+ __v4sf x = { f[0], f[1], f[2], f[3] };
+ __v4sf y;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (x[i] != f[i])
+ abort ();
+
+ y = foo1 (x, z);
+ do_check (y, f, z);
+ y = foo2 (x, z);
+ do_check (y, f, z);
+ y = foo3 (x, z);
+ do_check (y, f, z);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo1 (__v4sf x, float f)
+{
+ __v4sf y = { f, x[1], x[2], x[3] };
+ return y;
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+ __v4sf y = { f0, f1, f2, f3 };
+ return y;
+}
+
+__attribute__((noinline, noclone))
+__v4sf
+foo2 (__v4sf x, float f)
+{
+ return vector_init (f, x[1], x[2], x[3]) ;
+}
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo3 (__v4sf x, float f)
+{
+ __v4sf y = x;
+ y[0] = f;
+ return y;
+}
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "pr88828-2a.c"
+#include "pr88828-2c.c"
+
+extern void abort ();
+
+void
+do_check (__v4sf y, float f[4], float z)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (i == 0)
+ {
+ if (y[i] != z)
+ abort ();
+ }
+ else if (i == 1)
+ {
+ if (y[i] != f[0])
+ abort ();
+ }
+ else
+ {
+ if (y[i] != f[i])
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ float f[4] = { -11, 2, 55553, -4 };
+ float z = 134567;
+ __v4sf x = { f[0], f[1], f[2], f[3] };
+ __v4sf y;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (x[i] != f[i])
+ abort ();
+
+ y = foo1 (x, z);
+ do_check (y, f, z);
+ y = foo2 (x, z);
+ do_check (y, f, z);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo1 (__v4sf x, float f)
+{
+ __v4sf y = { f, x[0], x[2], x[3] };
+ return y;
+}
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo1 (__v4sf x, float f)
+{
+ __v4sf y = { f, x[0], x[2], x[3] };
+ return y;
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+ __v4sf y = { f0, f1, f2, f3 };
+ return y;
+}
+
+__attribute__((noinline, noclone))
+__v4sf
+foo2 (__v4sf x, float f)
+{
+ return vector_init (f, x[0], x[2], x[3]) ;
+}
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+ __v4sf y = { f0, f1, f2, f3 };
+ return y;
+}
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+ return vector_init (f, x[0], x[2], x[3]) ;
+}
new file mode 100644
@@ -0,0 +1,54 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "pr88828-3a.c"
+#include "pr88828-3b.c"
+#include "pr88828-3c.c"
+
+extern void abort ();
+
+void
+do_check (__v4sf y, float f[4], float z)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (i == 3)
+ {
+ if (y[i] != z)
+ abort ();
+ }
+ else if (i == 0)
+ {
+ if (y[i] != f[i])
+ abort ();
+ }
+ else
+ {
+ if (y[i] != f[i + 1])
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ float f[4] = { -11, 2, 55553, -4 };
+ float z = 134567;
+ __v4sf x = { f[0], f[1], f[2], f[3] };
+ __v4sf y;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (x[i] != f[i])
+ abort ();
+
+ y = foo1 (x, z);
+ do_check (y, f, z);
+ y = foo2 (x, z);
+ do_check (y, f, z);
+ y = foo3 (x, z);
+ do_check (y, f, z);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 2 } } */
+/* { dg-final { scan-assembler-times "movaps" 1 } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo1 (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], f };
+ return y;
+}
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertps" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo2 (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], f };
+ return y;
+}
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertps" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+ __v4sf y = { f0, f1, f2, f3 };
+ return y;
+}
+
+__attribute__((noinline, noclone))
+__v4sf
+foo3 (__v4sf x, float f)
+{
+ return vector_init (x[0], x[2], x[3], f);
+}
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], x[1] };
+ y[0] = f;
+ return y;
+}
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], x[1] };
+ y[0] = f;
+ return y;
+}
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 2 } } */
+/* { dg-final { scan-assembler-times "movaps" 1 } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], x[0] };
+ y[3] = f;
+ return y;
+}
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertps" 1 } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], x[0] };
+ y[3] = f;
+ return y;
+}
@@ -2008,7 +2008,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
unsigned elem_size, i;
unsigned HOST_WIDE_INT nelts;
enum tree_code code, conv_code;
- constructor_elt *elt;
+ constructor_elt *ce;
bool maybe_ident;
gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
@@ -2027,18 +2027,42 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
orig[1] = NULL;
conv_code = ERROR_MARK;
maybe_ident = true;
- FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
+
+ tree rhs_vector = NULL;
+ /* The single scalar element. */
+ tree scalar_element = NULL;
+ unsigned int scalar_idx = 0;
+ bool insert = false;
+ unsigned int nscalars = 0;
+ unsigned int nvectors = 0;
+ FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, ce)
{
tree ref, op1;
if (i >= nelts)
return false;
- if (TREE_CODE (elt->value) != SSA_NAME)
+ if (TREE_CODE (ce->value) != SSA_NAME)
return false;
- def_stmt = get_prop_source_stmt (elt->value, false, NULL);
+ def_stmt = get_prop_source_stmt (ce->value, false, NULL);
if (!def_stmt)
- return false;
+ {
+ if (gimple_nop_p (SSA_NAME_DEF_STMT (ce->value)))
+ {
+ /* Only allow one scalar insert. */
+ if (nscalars != 0)
+ return false;
+
+ nscalars = 1;
+ insert = true;
+ scalar_idx = i;
+ sel.quick_push (i);
+ scalar_element = ce->value;
+ continue;
+ }
+ else
+ return false;
+ }
code = gimple_assign_rhs_code (def_stmt);
if (code == FLOAT_EXPR
|| code == FIX_TRUNC_EXPR)
@@ -2046,7 +2070,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
op1 = gimple_assign_rhs1 (def_stmt);
if (conv_code == ERROR_MARK)
{
- if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (elt->value))),
+ if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (ce->value))),
GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1)))))
return false;
conv_code = code;
@@ -2095,11 +2119,29 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
elt += nelts;
if (elt != i)
maybe_ident = false;
+
+ if (type == TREE_TYPE (ref))
+ {
+ /* The RHS vector has the same type as LHS. */
+ if (rhs_vector == NULL)
+ rhs_vector = ref;
+ /* Check if all RHS vector elements come fome the same
+ vector. */
+ if (rhs_vector == ref)
+ nvectors++;
+ }
+
sel.quick_push (elt);
}
if (i < nelts)
return false;
+ if (insert
+ && (nvectors == 0
+ || (TYPE_VECTOR_SUBPARTS (type).to_constant ()
+ != (nscalars + nvectors))))
+ return false;
+
if (! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
|| maybe_ne (TYPE_VECTOR_SUBPARTS (type),
TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0]))))
@@ -2127,18 +2169,26 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts);
if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
- return false;
+ {
+ if (insert)
+ gcc_unreachable ();
+ return false;
+ }
mask_type
= build_vector_type (build_nonstandard_integer_type (elem_size, 1),
nelts);
if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT
|| maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)),
GET_MODE_SIZE (TYPE_MODE (type))))
- return false;
+ {
+ if (insert)
+ gcc_unreachable ();
+ return false;
+ }
op2 = vec_perm_indices_to_tree (mask_type, indices);
if (!orig[1])
orig[1] = orig[0];
- if (conv_code == ERROR_MARK)
+ if (conv_code == ERROR_MARK && !insert)
gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
orig[1], op2);
else
@@ -2148,10 +2198,25 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
VEC_PERM_EXPR, orig[0], orig[1], op2);
orig[0] = gimple_assign_lhs (perm);
gsi_insert_before (gsi, perm, GSI_SAME_STMT);
- gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
+ gimple_assign_set_rhs_with_ops (gsi,
+ (conv_code != ERROR_MARK
+ ? conv_code
+ : NOP_EXPR),
+ orig[0],
NULL_TREE, NULL_TREE);
}
}
+ if (insert)
+ {
+ /* Generate a single scalar insert. */
+ tree var = make_ssa_name (type);
+ tree val = gimple_assign_rhs1 (stmt);
+ gimple *copy = gimple_build_assign (var, val);
+ gsi_insert_before (gsi, copy, GSI_SAME_STMT);
+ tree bitpos = bitsize_int (scalar_idx * elem_size);
+ gimple_assign_set_rhs_with_ops (gsi, BIT_INSERT_EXPR, var,
+ scalar_element, bitpos);
+ }
update_stmt (gsi_stmt (*gsi));
return true;
}
--
2.20.1