diff mbox

Fix PR44900 (SRA miscompilation)

Message ID 20100720133034.GA24667@virgil.arch.suse.de
State New
Headers show

Commit Message

Martin Jambor July 20, 2010, 1:30 p.m. UTC
Hi,

the simple patch below fixes PR 44900.  Apparently we rarely happen to
land in the SRA_UDH_LEFT refreshing mode in
load_assign_lhs_subreplacements because cleaning up after it has not
worked for a long time.  But it does require a special combination of
assignments between special unions to trigger, such as the one in the
testcase.

This patch is aginst the 4.5 branch but the one for trunk is exactly
the same.  I have bootstrapped and regression tested both without any
problems.  OK for trunk and the branch?

Thanks,

Martin



2010-07-20  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/44900
	* tree-sra.c (load_assign_lhs_subreplacements): Updated comments.
	(sra_modify_assign): Move gsi to the next statmenent unconditionally.

Comments

Richard Biener July 20, 2010, 2:02 p.m. UTC | #1
On Tue, 20 Jul 2010, Martin Jambor wrote:

> Hi,
> 
> the simple patch below fixes PR 44900.  Apparently we rarely happen to
> land in the SRA_UDH_LEFT refreshing mode in
> load_assign_lhs_subreplacements because cleaning up after it has not
> worked for a long time.  But it does require a special combination of
> assignments between special unions to trigger, such as the one in the
> testcase.
> 
> This patch is aginst the 4.5 branch but the one for trunk is exactly
> the same.  I have bootstrapped and regression tested both without any
> problems.  OK for trunk and the branch?

I belive you can remove test_assert (and thus the printf decl and
the test macro) from the testcase.

Ok with that change.

Thanks,
RIchard.

> Thanks,
> 
> Martin
> 
> 
> 
> 2010-07-20  Martin Jambor  <mjambor@suse.cz>
> 
> 	PR tree-optimization/44900
> 	* tree-sra.c (load_assign_lhs_subreplacements): Updated comments.
> 	(sra_modify_assign): Move gsi to the next statmenent unconditionally.
> 
> Index: gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C
> ===================================================================
> --- /dev/null
> +++ gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C
> @@ -0,0 +1,91 @@
> +/* { dg-do run { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-msse" } */
> +/* { dg-require-effective-target sse } */
> +
> +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
> +typedef float __v4sf __attribute__ ((__vector_size__ (16)));
> +
> +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__,
> +__artificial__))
> +_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
> +{
> +  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
> +}
> +
> +struct vec
> +{
> +        union {
> +                __m128 v;
> +                float  e[4];
> +        };
> +
> +        static const vec & zero()
> +        {
> +                static const vec v = _mm_set_ps(0, 0, 0, 0);
> +                return v;
> +        }
> +
> +        vec() {}
> +        vec(const __m128 & a) : v(a) {}
> +
> +        operator const __m128&() const { return v; }
> +};
> +
> +struct vec2
> +{
> +        vec _v1;
> +        vec _v2;
> +
> +        vec2() {}
> +        vec2(const vec & a, const vec & b) : _v1(a), _v2(b) {}
> +
> +        static vec2 load(const float * a)
> +        {
> +                return vec2(
> +                        __builtin_ia32_loadups(&a[0]),
> +                        __builtin_ia32_loadups(&a[4]));
> +        }
> +
> +        const vec & v1() const { return _v1; }
> +        const vec & v2() const { return _v2; }
> +};
> +
> +extern "C" {
> +        int  printf (const char*, ...);
> +        void abort(void);
> +}
> +
> +inline bool test_assert( bool is_succeed, const char * file_name, int line_num
> +)
> +{
> +        if ( !is_succeed )
> +        {
> +                printf("error: %s(%d)\n", file_name, line_num);
> +        }
> +
> +        return is_succeed;
> +}
> +
> +inline bool operator==(const vec & a, const vec & b)
> +{ return 0xf == __builtin_ia32_movmskps(__builtin_ia32_cmpeqps(a, b)); }
> +
> +#define test(x, y) test_assert( (x)==(y), __FILE__, __LINE__ )
> +
> +int main( int argc, char * argv[] )
> +{
> +        __attribute__((aligned(16))) float data[] =
> +        { 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5 };
> +
> +        float * p = &data[2];
> +        vec2 a;
> +
> +        a = vec2::load(p);
> +
> +        vec v1 = a.v1();
> +        vec v2 = a.v2();
> +
> +	if (v2.e[3] != 7.0)
> +	  abort();
> +
> +        return 0;
> +}
> Index: gcc-4_5-branch/gcc/tree-sra.c
> ===================================================================
> --- gcc-4_5-branch.orig/gcc/tree-sra.c
> +++ gcc-4_5-branch/gcc/tree-sra.c
> @@ -2445,9 +2445,11 @@ handle_unscalarized_data_in_subtree (str
>     (sub)tree.  If that is not possible, refresh the TOP_RACC base aggregate and
>     load the accesses from it.  LEFT_OFFSET is the offset of the left whole
>     subtree being copied, RIGHT_OFFSET is the same thing for the right subtree.
> -   GSI is stmt iterator used for statement insertions.  *REFRESHED is true iff
> -   the rhs top aggregate has already been refreshed by contents of its scalar
> -   reductions and is set to true if this function has to do it.  */
> +   NEW_GSI is stmt iterator used for statement insertions after the original
> +   assignment, OLD_GSI is used to insert statements before the assignment.
> +   *REFRESHED keeps the information whether we have needed to refresh
> +   replacements of the LHS and from which side of the assignments this takes
> +   place.  */
>  
>  static void
>  load_assign_lhs_subreplacements (struct access *lacc, struct access *top_racc,
> @@ -2747,9 +2749,7 @@ sra_modify_assign (gimple *stmt, gimple_
>  					   &orig_gsi, gsi, &refreshed, lhs);
>  	  if (refreshed != SRA_UDH_RIGHT)
>  	    {
> -	      if (*stmt == gsi_stmt (*gsi))
> -		gsi_next (gsi);
> -
> +	      gsi_next (gsi);
>  	      unlink_stmt_vdef (*stmt);
>  	      gsi_remove (&orig_gsi, true);
>  	      sra_stats.deleted++;
> 
>
diff mbox

Patch

Index: gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C
===================================================================
--- /dev/null
+++ gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C
@@ -0,0 +1,91 @@ 
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-msse" } */
+/* { dg-require-effective-target sse } */
+
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__,
+__artificial__))
+_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
+}
+
+struct vec
+{
+        union {
+                __m128 v;
+                float  e[4];
+        };
+
+        static const vec & zero()
+        {
+                static const vec v = _mm_set_ps(0, 0, 0, 0);
+                return v;
+        }
+
+        vec() {}
+        vec(const __m128 & a) : v(a) {}
+
+        operator const __m128&() const { return v; }
+};
+
+struct vec2
+{
+        vec _v1;
+        vec _v2;
+
+        vec2() {}
+        vec2(const vec & a, const vec & b) : _v1(a), _v2(b) {}
+
+        static vec2 load(const float * a)
+        {
+                return vec2(
+                        __builtin_ia32_loadups(&a[0]),
+                        __builtin_ia32_loadups(&a[4]));
+        }
+
+        const vec & v1() const { return _v1; }
+        const vec & v2() const { return _v2; }
+};
+
+extern "C" {
+        int  printf (const char*, ...);
+        void abort(void);
+}
+
+inline bool test_assert( bool is_succeed, const char * file_name, int line_num
+)
+{
+        if ( !is_succeed )
+        {
+                printf("error: %s(%d)\n", file_name, line_num);
+        }
+
+        return is_succeed;
+}
+
+inline bool operator==(const vec & a, const vec & b)
+{ return 0xf == __builtin_ia32_movmskps(__builtin_ia32_cmpeqps(a, b)); }
+
+#define test(x, y) test_assert( (x)==(y), __FILE__, __LINE__ )
+
+int main( int argc, char * argv[] )
+{
+        __attribute__((aligned(16))) float data[] =
+        { 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5 };
+
+        float * p = &data[2];
+        vec2 a;
+
+        a = vec2::load(p);
+
+        vec v1 = a.v1();
+        vec v2 = a.v2();
+
+	if (v2.e[3] != 7.0)
+	  abort();
+
+        return 0;
+}
Index: gcc-4_5-branch/gcc/tree-sra.c
===================================================================
--- gcc-4_5-branch.orig/gcc/tree-sra.c
+++ gcc-4_5-branch/gcc/tree-sra.c
@@ -2445,9 +2445,11 @@  handle_unscalarized_data_in_subtree (str
    (sub)tree.  If that is not possible, refresh the TOP_RACC base aggregate and
    load the accesses from it.  LEFT_OFFSET is the offset of the left whole
    subtree being copied, RIGHT_OFFSET is the same thing for the right subtree.
-   GSI is stmt iterator used for statement insertions.  *REFRESHED is true iff
-   the rhs top aggregate has already been refreshed by contents of its scalar
-   reductions and is set to true if this function has to do it.  */
+   NEW_GSI is stmt iterator used for statement insertions after the original
+   assignment, OLD_GSI is used to insert statements before the assignment.
+   *REFRESHED keeps the information whether we have needed to refresh
+   replacements of the LHS and from which side of the assignments this takes
+   place.  */
 
 static void
 load_assign_lhs_subreplacements (struct access *lacc, struct access *top_racc,
@@ -2747,9 +2749,7 @@  sra_modify_assign (gimple *stmt, gimple_
 					   &orig_gsi, gsi, &refreshed, lhs);
 	  if (refreshed != SRA_UDH_RIGHT)
 	    {
-	      if (*stmt == gsi_stmt (*gsi))
-		gsi_next (gsi);
-
+	      gsi_next (gsi);
 	      unlink_stmt_vdef (*stmt);
 	      gsi_remove (&orig_gsi, true);
 	      sra_stats.deleted++;