diff mbox series

[committed] middle-end slp: Don't traverse tree on (nil) nodes.

Message ID patch-14232-tamar@arm.com
State New
Headers show
Series [committed] middle-end slp: Don't traverse tree on (nil) nodes. | expand

Commit Message

Tamar Christina Feb. 25, 2021, 4:23 p.m. UTC
Hi All,

The given testcase shows that one of the children of the complex MUL contains a
PHI node.  This results in the vectorizer having a child that's (nil).

The pattern matcher handles this correctly, but optimize_load_redistribution_1
needs to not traverse/inspect the NULL nodes.

This however does high-light a missed opportunity.  This testcase seems to
result in a different canonicalization than normally.

Normally the expressions are right leaning.  But sometimes, especially when type
casts are introduced the trees suddenly become left leaning. For instance this
testcase (even without type casts) won't detect the FMA form because the addition
gets the MUL node in the left and not right node as it expects.

Checking all forms would be quite expensive so for GCC 12 it probably makes sense to make
forms with type casts in them have the same form as those without?

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Committed under the GCC obvious rule.

Thanks,
Tamar

gcc/ChangeLog:

	* tree-vect-slp.c (optimize_load_redistribution_1): Abort on NULL nodes.

gcc/testsuite/ChangeLog:

	* g++.dg/vect/complex-num-null-node.cc: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc b/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc
new file mode 100644
index 0000000000000000000000000000000000000000..eddf0ad8710138c3b5ac31706df02109d8164329


--

Comments

Richard Biener Feb. 26, 2021, 8 a.m. UTC | #1
On Thu, 25 Feb 2021, Tamar Christina wrote:

> Hi All,
> 
> The given testcase shows that one of the children of the complex MUL contains a
> PHI node.  This results in the vectorizer having a child that's (nil).
> 
> The pattern matcher handles this correctly, but optimize_load_redistribution_1
> needs to not traverse/inspect the NULL nodes.
> 
> This however does high-light a missed opportunity.  This testcase seems to
> result in a different canonicalization than normally.
> 
> Normally the expressions are right leaning.  But sometimes, especially when type
> casts are introduced the trees suddenly become left leaning. For instance this
> testcase (even without type casts) won't detect the FMA form because the addition
> gets the MUL node in the left and not right node as it expects.

There's not really any canonicalization rule that would enforce either 
order.  The reassoc pass might in practice produce such ordering but
you can't rely on it.

> Checking all forms would be quite expensive so for GCC 12 it probably makes sense to make
> forms with type casts in them have the same form as those without?

Is it really expensive?

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Committed under the GCC obvious rule.

Thanks.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* tree-vect-slp.c (optimize_load_redistribution_1): Abort on NULL nodes.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* g++.dg/vect/complex-num-null-node.cc: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc b/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc
> new file mode 100644
> index 0000000000000000000000000000000000000000..eddf0ad8710138c3b5ac31706df02109d8164329
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc
> @@ -0,0 +1,78 @@
> +/* { dg-do compile { target { aarch64-*-* } } } */
> +/* { dg-skip-if "incorrect syntax for c++98" { *-*-* } { "-std=c++98" } { "" } } */
> +/* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-vect-all" } */
> +/* { dg-require-effective-target le } */
> +
> +typedef struct {
> +  float b;
> +  float c;
> +} d;
> +namespace {
> +typedef int e;
> +template <typename, typename> struct f;
> +template <template <typename> class g, typename h, typename k, typename... l>
> +struct f<g<k, l...>, h> {
> +  using m = g<h>;
> +};
> +} // namespace
> +namespace aa {
> +template <typename k> class o {
> +public:
> +  typedef k p;
> +};
> +} // namespace aa
> +namespace ab {
> +template <typename k> using r = aa::o<k>;
> +template <typename k> class ac : public r<k> {};
> +struct s {
> +  template <typename k, typename h> struct ad : f<k, h> {};
> +};
> +template <typename t, typename h> using ae = typename s::ad<t, h>::m;
> +template <typename t> struct af {
> +  typedef typename t::p p;
> +  template <typename k> using u = ae<t, k>;
> +};
> +} // namespace ab
> +namespace aa {
> +template <typename t> struct ag {
> +  typedef ab::af<t> v;
> +  typedef typename v::p &ah;
> +  template <typename k> struct ai { typedef typename v::u<k> aj; };
> +};
> +} // namespace aa
> +namespace ab {
> +template <typename k, typename t> struct w {
> +  typedef typename aa::ag<t>::ai<k>::aj x;
> +  struct y {};
> +  typedef t ak;
> +  w(e, ak);
> +  y a;
> +};
> +template <typename k, typename t = ac<k>> class al : w<k, t> {
> +  typedef w<k, t> am;
> +  typedef typename am::x x;
> +  typedef aa::ag<x> an;
> +
> +public:
> +  typedef typename an::ah ah;
> +  typedef e ao;
> +  typedef t ak;
> +  al(ao ap, ak aq = ak()) : am(ar(ap, aq), aq) {}
> +  ah operator[](ao);
> +  ao ar(ao ap, ak) { return ap; }
> +};
> +} // namespace ab
> +void as(int n, d *a, d *q) {
> +  ab::al<d> z(n);
> +  d acc;
> +  for (int j = 0; j < n; ++j) {
> +    auto at = a[j];
> +    auto au = q[j];
> +    acc.b += at.b * au.b - at.c * au.c;
> +    acc.c += at.b * au.c + at.c * au.b;
> +  }
> +  z[0] = acc;
> +}
> +
> +
> +/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_MUL" 1 "vect" } } */
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index 09a0f42410e3542c7823d70757550c22b8ef676f..aed0c7798b6203de6bce7a4764aeedd9a554670d 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -2300,7 +2300,7 @@ optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
>    unsigned i;
>  
>    /* For now, we don't know anything about externals so do not do anything.  */
> -  if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
> +  if (!root || SLP_TREE_DEF_TYPE (root) != vect_internal_def)
>      return NULL;
>    else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
>      {
> 
> 
>
diff mbox series

Patch

diff --git a/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc b/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc
new file mode 100644
index 0000000000000000000000000000000000000000..eddf0ad8710138c3b5ac31706df02109d8164329
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc
@@ -0,0 +1,78 @@ 
+/* { dg-do compile { target { aarch64-*-* } } } */
+/* { dg-skip-if "incorrect syntax for c++98" { *-*-* } { "-std=c++98" } { "" } } */
+/* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-vect-all" } */
+/* { dg-require-effective-target le } */
+
+typedef struct {
+  float b;
+  float c;
+} d;
+namespace {
+typedef int e;
+template <typename, typename> struct f;
+template <template <typename> class g, typename h, typename k, typename... l>
+struct f<g<k, l...>, h> {
+  using m = g<h>;
+};
+} // namespace
+namespace aa {
+template <typename k> class o {
+public:
+  typedef k p;
+};
+} // namespace aa
+namespace ab {
+template <typename k> using r = aa::o<k>;
+template <typename k> class ac : public r<k> {};
+struct s {
+  template <typename k, typename h> struct ad : f<k, h> {};
+};
+template <typename t, typename h> using ae = typename s::ad<t, h>::m;
+template <typename t> struct af {
+  typedef typename t::p p;
+  template <typename k> using u = ae<t, k>;
+};
+} // namespace ab
+namespace aa {
+template <typename t> struct ag {
+  typedef ab::af<t> v;
+  typedef typename v::p &ah;
+  template <typename k> struct ai { typedef typename v::u<k> aj; };
+};
+} // namespace aa
+namespace ab {
+template <typename k, typename t> struct w {
+  typedef typename aa::ag<t>::ai<k>::aj x;
+  struct y {};
+  typedef t ak;
+  w(e, ak);
+  y a;
+};
+template <typename k, typename t = ac<k>> class al : w<k, t> {
+  typedef w<k, t> am;
+  typedef typename am::x x;
+  typedef aa::ag<x> an;
+
+public:
+  typedef typename an::ah ah;
+  typedef e ao;
+  typedef t ak;
+  al(ao ap, ak aq = ak()) : am(ar(ap, aq), aq) {}
+  ah operator[](ao);
+  ao ar(ao ap, ak) { return ap; }
+};
+} // namespace ab
+void as(int n, d *a, d *q) {
+  ab::al<d> z(n);
+  d acc;
+  for (int j = 0; j < n; ++j) {
+    auto at = a[j];
+    auto au = q[j];
+    acc.b += at.b * au.b - at.c * au.c;
+    acc.c += at.b * au.c + at.c * au.b;
+  }
+  z[0] = acc;
+}
+
+
+/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_MUL" 1 "vect" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 09a0f42410e3542c7823d70757550c22b8ef676f..aed0c7798b6203de6bce7a4764aeedd9a554670d 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2300,7 +2300,7 @@  optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
   unsigned i;
 
   /* For now, we don't know anything about externals so do not do anything.  */
-  if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
+  if (!root || SLP_TREE_DEF_TYPE (root) != vect_internal_def)
     return NULL;
   else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
     {