Patchwork [PR,44423] Disallow SRAing of scalar accesses with scalar sub-accesses

login
register
mail settings
Submitter Martin Jambor
Date June 9, 2010, 9:27 a.m.
Message ID <20100609092715.GC15581@virgil.suse.cz>
Download mbox | patch
Permalink /patch/55060/
State New
Headers show

Comments

Martin Jambor - June 9, 2010, 9:27 a.m.
Hi,

the patch below fixes the run-time regression reported as PR 44423.
More details are in bugzilla, but the basic idea is that scalarizing
accesses that have a scalar parent in the access tree can wreck havoc
to the final emitted code when the parent is an SSE vector which is
initialized through a union.  The gains of such transformations are
questionable so the best bet is do disable it.

Bootstrapped and tested on x86_64-linux, OK for trunk and later for
the 4.5 branch?

Thanks,

Martin


2010-06-08  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/44423
	* tree-sra.c (dump_access): Dump also grp_assignment_read.
	(analyze_access_subtree): Pass negative allow_replacements to children
	if the current type is scalar.
Richard Guenther - June 9, 2010, 10:15 a.m.
On Wed, 9 Jun 2010, Martin Jambor wrote:

> Hi,
> 
> the patch below fixes the run-time regression reported as PR 44423.
> More details are in bugzilla, but the basic idea is that scalarizing
> accesses that have a scalar parent in the access tree can wreck havoc
> to the final emitted code when the parent is an SSE vector which is
> initialized through a union.  The gains of such transformations are
> questionable so the best bet is do disable it.
> 
> Bootstrapped and tested on x86_64-linux, OK for trunk and later for
> the 4.5 branch?

Ok.

Thanks,
Richard.

> 
> Thanks,
> 
> Martin
> 
> 
> 2010-06-08  Martin Jambor  <mjambor@suse.cz>
> 
> 	PR tree-optimization/44423
> 	* tree-sra.c (dump_access): Dump also grp_assignment_read.
> 	(analyze_access_subtree): Pass negative allow_replacements to children
> 	if the current type is scalar.
> 
> Index: mine/gcc/testsuite/gcc.dg/tree-ssa/pr44423.c
> ===================================================================
> --- /dev/null
> +++ mine/gcc/testsuite/gcc.dg/tree-ssa/pr44423.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile { target x86_64-*-* } } */
> +/* { dg-options "-O2 -fdump-tree-esra-details" } */
> +
> +#include "xmmintrin.h"
> +
> +typedef __m128 v4sf; // vector of 4 floats (SSE1)
> +
> +#define ARRSZ 1024
> +
> +typedef union {
> +  float f[4];
> +  v4sf  v;
> +} V4SF;
> +
> +struct COLOUR {
> +  float r,g,b;
> +};
> +
> +void func (float *pre1, float pre2, struct COLOUR *a, V4SF *lpic)
> +  {
> +  V4SF va;
> +  int y;
> +  va.f[0]=a->r;va.f[1]=a->g;va.f[2]=a->b;va.f[3]=0.f;
> +  for (y=0; y<20; ++y)
> +    {
> +    float att = pre1[y]*pre2;
> +    v4sf tmpatt=_mm_load1_ps(&att);
> +    tmpatt=_mm_mul_ps(tmpatt,va.v);
> +    lpic[y].v=_mm_add_ps(tmpatt,lpic[y].v);
> +    }
> +  }
> +
> +int main()
> +  {
> +  V4SF lpic[ARRSZ];
> +  float pre1[ARRSZ];
> +  int i;
> +  struct COLOUR col={0.,2.,4.};
> +  for (i=0; i<20; ++i)
> +    pre1[i]=0.4;
> +  for (i=0;i<10000000;++i)
> +    func(&pre1[0],0.3,&col,&lpic[0]);
> +  return 0;
> +  }
> +
> +/* { dg-final { scan-tree-dump-times "Created a replacement" 0 "esra"} } */
> +/* { dg-final { cleanup-tree-dump "esra" } } */
> Index: mine/gcc/tree-sra.c
> ===================================================================
> --- mine.orig/gcc/tree-sra.c
> +++ mine/gcc/tree-sra.c
> @@ -356,13 +356,13 @@ dump_access (FILE *f, struct access *acc
>    print_generic_expr (f, access->type, 0);
>    if (grp)
>      fprintf (f, ", grp_write = %d, total_scalarization = %d, "
> -	     "grp_read = %d, grp_hint = %d, "
> +	     "grp_read = %d, grp_hint = %d, grp_assignment_read = %d,"
>  	     "grp_covered = %d, grp_unscalarizable_region = %d, "
>  	     "grp_unscalarized_data = %d, grp_partial_lhs = %d, "
>  	     "grp_to_be_replaced = %d, grp_maybe_modified = %d, "
>  	     "grp_not_necessarilly_dereferenced = %d\n",
>  	     access->grp_write, access->total_scalarization,
> -	     access->grp_read, access->grp_hint,
> +	     access->grp_read, access->grp_hint, access->grp_assignment_read,
>  	     access->grp_covered, access->grp_unscalarizable_region,
>  	     access->grp_unscalarized_data, access->grp_partial_lhs,
>  	     access->grp_to_be_replaced, access->grp_maybe_modified,
> @@ -1791,7 +1790,8 @@ analyze_access_subtree (struct access *r
>        else
>  	covered_to += child->size;
>  
> -      sth_created |= analyze_access_subtree (child, allow_replacements,
> +      sth_created |= analyze_access_subtree (child,
> +					     allow_replacements && !scalar,
>  					     mark_read, mark_write);
>  
>        root->grp_unscalarized_data |= child->grp_unscalarized_data;
> 
> 
> 
>

Patch

Index: mine/gcc/testsuite/gcc.dg/tree-ssa/pr44423.c
===================================================================
--- /dev/null
+++ mine/gcc/testsuite/gcc.dg/tree-ssa/pr44423.c
@@ -0,0 +1,47 @@ 
+/* { dg-do compile { target x86_64-*-* } } */
+/* { dg-options "-O2 -fdump-tree-esra-details" } */
+
+#include "xmmintrin.h"
+
+typedef __m128 v4sf; // vector of 4 floats (SSE1)
+
+#define ARRSZ 1024
+
+typedef union {
+  float f[4];
+  v4sf  v;
+} V4SF;
+
+struct COLOUR {
+  float r,g,b;
+};
+
+void func (float *pre1, float pre2, struct COLOUR *a, V4SF *lpic)
+  {
+  V4SF va;
+  int y;
+  va.f[0]=a->r;va.f[1]=a->g;va.f[2]=a->b;va.f[3]=0.f;
+  for (y=0; y<20; ++y)
+    {
+    float att = pre1[y]*pre2;
+    v4sf tmpatt=_mm_load1_ps(&att);
+    tmpatt=_mm_mul_ps(tmpatt,va.v);
+    lpic[y].v=_mm_add_ps(tmpatt,lpic[y].v);
+    }
+  }
+
+int main()
+  {
+  V4SF lpic[ARRSZ];
+  float pre1[ARRSZ];
+  int i;
+  struct COLOUR col={0.,2.,4.};
+  for (i=0; i<20; ++i)
+    pre1[i]=0.4;
+  for (i=0;i<10000000;++i)
+    func(&pre1[0],0.3,&col,&lpic[0]);
+  return 0;
+  }
+
+/* { dg-final { scan-tree-dump-times "Created a replacement" 0 "esra"} } */
+/* { dg-final { cleanup-tree-dump "esra" } } */
Index: mine/gcc/tree-sra.c
===================================================================
--- mine.orig/gcc/tree-sra.c
+++ mine/gcc/tree-sra.c
@@ -356,13 +356,13 @@  dump_access (FILE *f, struct access *acc
   print_generic_expr (f, access->type, 0);
   if (grp)
     fprintf (f, ", grp_write = %d, total_scalarization = %d, "
-	     "grp_read = %d, grp_hint = %d, "
+	     "grp_read = %d, grp_hint = %d, grp_assignment_read = %d,"
 	     "grp_covered = %d, grp_unscalarizable_region = %d, "
 	     "grp_unscalarized_data = %d, grp_partial_lhs = %d, "
 	     "grp_to_be_replaced = %d, grp_maybe_modified = %d, "
 	     "grp_not_necessarilly_dereferenced = %d\n",
 	     access->grp_write, access->total_scalarization,
-	     access->grp_read, access->grp_hint,
+	     access->grp_read, access->grp_hint, access->grp_assignment_read,
 	     access->grp_covered, access->grp_unscalarizable_region,
 	     access->grp_unscalarized_data, access->grp_partial_lhs,
 	     access->grp_to_be_replaced, access->grp_maybe_modified,
@@ -1791,7 +1790,8 @@  analyze_access_subtree (struct access *r
       else
 	covered_to += child->size;
 
-      sth_created |= analyze_access_subtree (child, allow_replacements,
+      sth_created |= analyze_access_subtree (child,
+					     allow_replacements && !scalar,
 					     mark_read, mark_write);
 
       root->grp_unscalarized_data |= child->grp_unscalarized_data;