Patchwork If !-prefer-avx128, prefer using even 32-byte integer vector modes

login
register
mail settings
Submitter Jakub Jelinek
Date Nov. 8, 2011, 6:44 a.m.
Message ID <20111108064455.GA17997@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/124275/
State New
Headers show

Comments

Jakub Jelinek - Nov. 8, 2011, 6:44 a.m.
Hi!

Working virtually out of Pago Pago.

Is there a reason why we don't prefer 32-byte integer vector modes
even for AVX?  If a vectorized loop needs some operation that is only
supported by AVX2 we would retry whenever seeing such stmt, so
what I think this costs us mainly some small amount of time
during vectorization analysis.  Examples of functions that can
be vectorized with this patch using 32-byte vectors (and the runtime
of the testcase decreased from ~ 0m2.876s to ~ 0m2.258s).

#define N 1024
int a[N], b[N];
float c[N];

__attribute__((noinline, noclone)) void
f1 ()
{
  int i;
  for (i = 0; i < N; i++)
    a[i] = b[i];
}

__attribute__((noinline, noclone)) void
f2 ()
{
  a[0] = b[0];
  a[1] = b[1];
  a[2] = b[2];
  a[3] = b[3];
  a[4] = b[4];
  a[5] = b[5];
  a[6] = b[6];
  a[7] = b[7];
}

__attribute__((noinline, noclone)) void
f3 ()
{
  int i;
  for (i = 0; i < N; i++)
    c[i] = a[i];
}

__attribute__((noinline, noclone)) void
f4 ()
{
  int i;
  for (i = 0; i < N; i++)
    a[i] = 19;
}

int
main ()
{
  int i;
  for (i = 0; i < N; i++)
    b[i] = i;
  for (i = 0; i < 10000000; i++)
    {
      f1 ();
      f2 ();
      f3 ();
      f4 ();
    }
  return 0;
}


2011-11-08  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (ix86_preferred_simd_mode): Even for TARGET_AVX
	if not TARGET_PREFER_AVX128 return 32-byte SI/DI vectors.


	Jakub
Uros Bizjak - Nov. 8, 2011, 10:33 a.m.
On Tue, Nov 8, 2011 at 7:44 AM, Jakub Jelinek <jakub@redhat.com> wrote:

> Working virtually out of Pago Pago.

Hm...?

> Is there a reason why we don't prefer 32-byte integer vector modes
> even for AVX?  If a vectorized loop needs some operation that is only
> supported by AVX2 we would retry whenever seeing such stmt, so
> what I think this costs us mainly some small amount of time
> during vectorization analysis.  Examples of functions that can
> be vectorized with this patch using 32-byte vectors (and the runtime
> of the testcase decreased from ~ 0m2.876s to ~ 0m2.258s).

Nobody though of that yet ;)

> 2011-11-08  Jakub Jelinek  <jakub@redhat.com>
>
>        * config/i386/i386.c (ix86_preferred_simd_mode): Even for TARGET_AVX
>        if not TARGET_PREFER_AVX128 return 32-byte SI/DI vectors.

OK for mainline.

Thanks,
Uros.
Richard Guenther - Nov. 8, 2011, 11:14 a.m.
On Tue, Nov 8, 2011 at 7:44 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> Working virtually out of Pago Pago.
>
> Is there a reason why we don't prefer 32-byte integer vector modes
> even for AVX?  If a vectorized loop needs some operation that is only
> supported by AVX2 we would retry whenever seeing such stmt, so
> what I think this costs us mainly some small amount of time
> during vectorization analysis.  Examples of functions that can
> be vectorized with this patch using 32-byte vectors (and the runtime
> of the testcase decreased from ~ 0m2.876s to ~ 0m2.258s).
>
> #define N 1024
> int a[N], b[N];
> float c[N];
>
> __attribute__((noinline, noclone)) void
> f1 ()
> {
>  int i;
>  for (i = 0; i < N; i++)
>    a[i] = b[i];
> }
>
> __attribute__((noinline, noclone)) void
> f2 ()
> {
>  a[0] = b[0];
>  a[1] = b[1];
>  a[2] = b[2];
>  a[3] = b[3];
>  a[4] = b[4];
>  a[5] = b[5];
>  a[6] = b[6];
>  a[7] = b[7];
> }

Other examples would for example be bitwise operations.

Richard.

> __attribute__((noinline, noclone)) void
> f3 ()
> {
>  int i;
>  for (i = 0; i < N; i++)
>    c[i] = a[i];
> }
>
> __attribute__((noinline, noclone)) void
> f4 ()
> {
>  int i;
>  for (i = 0; i < N; i++)
>    a[i] = 19;
> }
>
> int
> main ()
> {
>  int i;
>  for (i = 0; i < N; i++)
>    b[i] = i;
>  for (i = 0; i < 10000000; i++)
>    {
>      f1 ();
>      f2 ();
>      f3 ();
>      f4 ();
>    }
>  return 0;
> }
>
>
> 2011-11-08  Jakub Jelinek  <jakub@redhat.com>
>
>        * config/i386/i386.c (ix86_preferred_simd_mode): Even for TARGET_AVX
>        if not TARGET_PREFER_AVX128 return 32-byte SI/DI vectors.
>
> --- gcc/config/i386/i386.c.jj   2011-11-07 08:32:09.000000000 -1100
> +++ gcc/config/i386/i386.c      2011-11-07 19:19:57.000000000 -1100
> @@ -37881,9 +37881,9 @@ ix86_preferred_simd_mode (enum machine_m
>     case HImode:
>       return TARGET_AVX2 ? V16HImode : V8HImode;
>     case SImode:
> -      return TARGET_AVX2 ? V8SImode : V4SImode;
> +      return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
>     case DImode:
> -      return TARGET_AVX2 ? V4DImode : V2DImode;
> +      return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
>
>     case SFmode:
>       if (TARGET_AVX && !TARGET_PREFER_AVX128)
>
>        Jakub
>

Patch

--- gcc/config/i386/i386.c.jj	2011-11-07 08:32:09.000000000 -1100
+++ gcc/config/i386/i386.c	2011-11-07 19:19:57.000000000 -1100
@@ -37881,9 +37881,9 @@  ix86_preferred_simd_mode (enum machine_m
     case HImode:
       return TARGET_AVX2 ? V16HImode : V8HImode;
     case SImode:
-      return TARGET_AVX2 ? V8SImode : V4SImode;
+      return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
     case DImode:
-      return TARGET_AVX2 ? V4DImode : V2DImode;
+      return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
 
     case SFmode:
       if (TARGET_AVX && !TARGET_PREFER_AVX128)