[target/92295] Fix inefficient vector constructor
diff mbox series

Message ID CAMZc-by2k43VtjGzg3-AES+T8vvZOe0LYC8WUJqjVz54mM=80A@mail.gmail.com
State New
Headers show
Series
  • [target/92295] Fix inefficient vector constructor
Related show

Commit Message

Hongtao Liu Nov. 1, 2019, 1:12 a.m. UTC
Hi uros:
  This patch is about to fix inefficient vector constructor.
  Currently in ix86_expand_vector_init_concat, vector are initialized
per 2 elements which can miss some optimization opportunity like
pr92295.

  Bootstrap and i386 regression test is ok.
  Ok for trunk?

Changelog
gcc/
        PR target/92295
        * config/i386/i386-expand.c (ix86_expand_vector_init_concat)
        Enhance ix86_expand_vector_init_concat.

gcc/testsuite
        * gcc.target/i386/pr92295.c: New test.

Comments

Hongtao Liu Nov. 2, 2019, 1:38 p.m. UTC | #1
Hi Jakub:
  Could you help reviewing this patch.

PS: Since this patch is related to vectors(avx512f), and Uros
mentioned before that he has no intension to maintain avx512f.

On Fri, Nov 1, 2019 at 9:12 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> Hi uros:
>   This patch is about to fix inefficient vector constructor.
>   Currently in ix86_expand_vector_init_concat, vector are initialized
> per 2 elements which can miss some optimization opportunity like
> pr92295.
>
>   Bootstrap and i386 regression test is ok.
>   Ok for trunk?
>
> Changelog
> gcc/
>         PR target/92295
>         * config/i386/i386-expand.c (ix86_expand_vector_init_concat)
>         Enhance ix86_expand_vector_init_concat.
>
> gcc/testsuite
>         * gcc.target/i386/pr92295.c: New test.
>
> --
> BR,
> Hongtao
Hongtao Liu Nov. 7, 2019, 7:01 a.m. UTC | #2
Ping!

On Sat, Nov 2, 2019 at 9:38 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> Hi Jakub:
>   Could you help reviewing this patch.
>
> PS: Since this patch is related to vectors(avx512f), and Uros
> mentioned before that he has no intension to maintain avx512f.
>
> On Fri, Nov 1, 2019 at 9:12 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > Hi uros:
> >   This patch is about to fix inefficient vector constructor.
> >   Currently in ix86_expand_vector_init_concat, vector are initialized
> > per 2 elements which can miss some optimization opportunity like
> > pr92295.
> >
> >   Bootstrap and i386 regression test is ok.
> >   Ok for trunk?
> >
> > Changelog
> > gcc/
> >         PR target/92295
> >         * config/i386/i386-expand.c (ix86_expand_vector_init_concat)
> >         Enhance ix86_expand_vector_init_concat.
> >
> > gcc/testsuite
> >         * gcc.target/i386/pr92295.c: New test.
> >
> > --
> > BR,
> > Hongtao
>
>
>
> --
> BR,
> Hongtao
Richard Biener Nov. 7, 2019, 9:44 a.m. UTC | #3
On Thu, Nov 7, 2019 at 7:58 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> Ping!

OK.

Thanks,
Richard.

> On Sat, Nov 2, 2019 at 9:38 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > Hi Jakub:
> >   Could you help reviewing this patch.
> >
> > PS: Since this patch is related to vectors(avx512f), and Uros
> > mentioned before that he has no intension to maintain avx512f.
> >
> > On Fri, Nov 1, 2019 at 9:12 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > Hi uros:
> > >   This patch is about to fix inefficient vector constructor.
> > >   Currently in ix86_expand_vector_init_concat, vector are initialized
> > > per 2 elements which can miss some optimization opportunity like
> > > pr92295.
> > >
> > >   Bootstrap and i386 regression test is ok.
> > >   Ok for trunk?
> > >
> > > Changelog
> > > gcc/
> > >         PR target/92295
> > >         * config/i386/i386-expand.c (ix86_expand_vector_init_concat)
> > >         Enhance ix86_expand_vector_init_concat.
> > >
> > > gcc/testsuite
> > >         * gcc.target/i386/pr92295.c: New test.
> > >
> > > --
> > > BR,
> > > Hongtao
> >
> >
> >
> > --
> > BR,
> > Hongtao
>
>
>
> --
> BR,
> Hongtao

Patch
diff mbox series

From 408fb093993f9df4da42d8daf2e6996f087c4618 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Thu, 31 Oct 2019 15:14:00 +0000
Subject: [PATCH] Enhance ix86_expand_vector_init_concat.

Changelog
gcc/
	PR target/92295
	* config/i386/i386-expand.c (ix86_expand_vector_init_concat)
	Enhance ix86_expand_vector_init_concat.

gcc/testsuite
	* gcc.target/i386/pr92295.c: New test.
---
 gcc/config/i386/i386-expand.c           | 130 ++++++++++--------------
 gcc/testsuite/gcc.target/i386/pr92295.c |  13 +++
 2 files changed, 65 insertions(+), 78 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92295.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 6d3d14c37dd..be040a1bc3e 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -13654,8 +13654,8 @@  static void
 ix86_expand_vector_init_concat (machine_mode mode,
 				rtx target, rtx *ops, int n)
 {
-  machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
-  rtx first[16], second[8], third[4];
+  machine_mode half_mode = VOIDmode;
+  rtx half[2];
   rtvec v;
   int i, j;
 
@@ -13665,55 +13665,55 @@  ix86_expand_vector_init_concat (machine_mode mode,
       switch (mode)
 	{
 	case E_V16SImode:
-	  cmode = V8SImode;
+	  half_mode = V8SImode;
 	  break;
 	case E_V16SFmode:
-	  cmode = V8SFmode;
+	  half_mode = V8SFmode;
 	  break;
 	case E_V8DImode:
-	  cmode = V4DImode;
+	  half_mode = V4DImode;
 	  break;
 	case E_V8DFmode:
-	  cmode = V4DFmode;
+	  half_mode = V4DFmode;
 	  break;
 	case E_V8SImode:
-	  cmode = V4SImode;
+	  half_mode = V4SImode;
 	  break;
 	case E_V8SFmode:
-	  cmode = V4SFmode;
+	  half_mode = V4SFmode;
 	  break;
 	case E_V4DImode:
-	  cmode = V2DImode;
+	  half_mode = V2DImode;
 	  break;
 	case E_V4DFmode:
-	  cmode = V2DFmode;
+	  half_mode = V2DFmode;
 	  break;
 	case E_V4SImode:
-	  cmode = V2SImode;
+	  half_mode = V2SImode;
 	  break;
 	case E_V4SFmode:
-	  cmode = V2SFmode;
+	  half_mode = V2SFmode;
 	  break;
 	case E_V2DImode:
-	  cmode = DImode;
+	  half_mode = DImode;
 	  break;
 	case E_V2SImode:
-	  cmode = SImode;
+	  half_mode = SImode;
 	  break;
 	case E_V2DFmode:
-	  cmode = DFmode;
+	  half_mode = DFmode;
 	  break;
 	case E_V2SFmode:
-	  cmode = SFmode;
+	  half_mode = SFmode;
 	  break;
 	default:
 	  gcc_unreachable ();
 	}
 
-      if (!register_operand (ops[1], cmode))
-	ops[1] = force_reg (cmode, ops[1]);
-      if (!register_operand (ops[0], cmode))
-	ops[0] = force_reg (cmode, ops[0]);
+      if (!register_operand (ops[1], half_mode))
+	ops[1] = force_reg (half_mode, ops[1]);
+      if (!register_operand (ops[0], half_mode))
+	ops[0] = force_reg (half_mode, ops[0]);
       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
 							  ops[1])));
       break;
@@ -13722,16 +13722,16 @@  ix86_expand_vector_init_concat (machine_mode mode,
       switch (mode)
 	{
 	case E_V4DImode:
-	  cmode = V2DImode;
+	  half_mode = V2DImode;
 	  break;
 	case E_V4DFmode:
-	  cmode = V2DFmode;
+	  half_mode = V2DFmode;
 	  break;
 	case E_V4SImode:
-	  cmode = V2SImode;
+	  half_mode = V2SImode;
 	  break;
 	case E_V4SFmode:
-	  cmode = V2SFmode;
+	  half_mode = V2SFmode;
 	  break;
 	default:
 	  gcc_unreachable ();
@@ -13742,20 +13742,16 @@  ix86_expand_vector_init_concat (machine_mode mode,
       switch (mode)
 	{
 	case E_V8DImode:
-	  cmode = V2DImode;
-	  hmode = V4DImode;
+	  half_mode = V4DImode;
 	  break;
 	case E_V8DFmode:
-	  cmode = V2DFmode;
-	  hmode = V4DFmode;
+	  half_mode = V4DFmode;
 	  break;
 	case E_V8SImode:
-	  cmode = V2SImode;
-	  hmode = V4SImode;
+	  half_mode = V4SImode;
 	  break;
 	case E_V8SFmode:
-	  cmode = V2SFmode;
-	  hmode = V4SFmode;
+	  half_mode = V4SFmode;
 	  break;
 	default:
 	  gcc_unreachable ();
@@ -13766,14 +13762,10 @@  ix86_expand_vector_init_concat (machine_mode mode,
       switch (mode)
 	{
 	case E_V16SImode:
-	  cmode = V2SImode;
-	  hmode = V4SImode;
-	  gmode = V8SImode;
+	  half_mode = V8SImode;
 	  break;
 	case E_V16SFmode:
-	  cmode = V2SFmode;
-	  hmode = V4SFmode;
-	  gmode = V8SFmode;
+	  half_mode = V8SFmode;
 	  break;
 	default:
 	  gcc_unreachable ();
@@ -13783,50 +13775,32 @@  ix86_expand_vector_init_concat (machine_mode mode,
 half:
       /* FIXME: We process inputs backward to help RA.  PR 36222.  */
       i = n - 1;
-      j = (n >> 1) - 1;
-      for (; i > 0; i -= 2, j--)
-	{
-	  first[j] = gen_reg_rtx (cmode);
-	  v = gen_rtvec (2, ops[i - 1], ops[i]);
-	  ix86_expand_vector_init (false, first[j],
-				   gen_rtx_PARALLEL (cmode, v));
-	}
-
-      n >>= 1;
-      if (n > 4)
-	{
-	  gcc_assert (hmode != VOIDmode);
-	  gcc_assert (gmode != VOIDmode);
-	  for (i = j = 0; i < n; i += 2, j++)
-	    {
-	      second[j] = gen_reg_rtx (hmode);
-	      ix86_expand_vector_init_concat (hmode, second [j],
-					      &first [i], 2);
-	    }
-	  n >>= 1;
-	  for (i = j = 0; i < n; i += 2, j++)
-	    {
-	      third[j] = gen_reg_rtx (gmode);
-	      ix86_expand_vector_init_concat (gmode, third[j],
-					      &second[i], 2);
-	    }
-	  n >>= 1;
-	  ix86_expand_vector_init_concat (mode, target, third, n);
-	}
-      else if (n > 2)
+      for (j = 1; j != -1; j--)
 	{
-	  gcc_assert (hmode != VOIDmode);
-	  for (i = j = 0; i < n; i += 2, j++)
+	  half[j] = gen_reg_rtx (half_mode);
+	  switch (n >> 1)
 	    {
-	      second[j] = gen_reg_rtx (hmode);
-	      ix86_expand_vector_init_concat (hmode, second [j],
-					      &first [i], 2);
+	    case 2:
+	      v = gen_rtvec (2, ops[i-1], ops[i]);
+	      i -= 2;
+	      break;
+	    case 4:
+	      v = gen_rtvec (4, ops[i-3], ops[i-2], ops[i-1], ops[i]);
+	      i -= 4;
+	      break;
+	    case 8:
+	      v = gen_rtvec (8, ops[i-7], ops[i-6], ops[i-5], ops[i-4],
+			     ops[i-3], ops[i-2], ops[i-1], ops[i]);
+	      i -= 8;
+	      break;
+	    default:
+	      gcc_unreachable ();
 	    }
-	  n >>= 1;
-	  ix86_expand_vector_init_concat (mode, target, second, n);
+	  ix86_expand_vector_init (false, half[j],
+				   gen_rtx_PARALLEL (half_mode, v));
 	}
-      else
-	ix86_expand_vector_init_concat (mode, target, first, n);
+
+      ix86_expand_vector_init_concat (mode, target, half, 2);
       break;
 
     default:
diff --git a/gcc/testsuite/gcc.target/i386/pr92295.c b/gcc/testsuite/gcc.target/i386/pr92295.c
new file mode 100644
index 00000000000..179dc487b98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92295.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+typedef int X __attribute__((vector_size (32)));
+
+X
+foo (int x, int z)
+{
+  X y = { x, x, x, x, z, z, z, z };
+  return y;
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcast" "2" } } */
-- 
2.19.1