diff mbox

[PR,49094] Refrain from creating misaligned accesses in SRA

Message ID 20110630133955.GB13681@virgil.arch.suse.de
State New
Headers show

Commit Message

Martin Jambor June 30, 2011, 1:39 p.m. UTC
Hi,

I had to add a test that the analyzed expression is not an SSA name.
This has been approved by Rchi on IRC yesterday.  Thus, I have
committed the following as revision 175703 after successful run of c
and c++ testsuite on sparc64 (and a bootstrap and test with another
patch on x86_64-linux).

Thanks,

Martin


2011-06-30  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/49094
	* tree-sra.c (tree_non_mode_aligned_mem_p): New function.
	(build_accesses_from_assign): Use it.

	* testsuite/gcc.dg/tree-ssa/pr49094.c: New test.

Comments

Martin Jambor June 30, 2011, 4:36 p.m. UTC | #1
Hi,

On Thu, Jun 30, 2011 at 03:39:55PM +0200, Martin Jambor wrote:
> Hi,
> 
> I had to add a test that the analyzed expression is not an SSA name.
> This has been approved by Rchi on IRC yesterday.  Thus, I have
> committed the following as revision 175703 after successful run of c
> and c++ testsuite on sparc64 (and a bootstrap and test with another
> patch on x86_64-linux).

I also tested fortran on sparc64 but missed a regression there
(gfortran.dg/pr25923.f90).  The problem is that *arg_1(D) is also
scrutinized, get_object_alignment returns 8 for it and that is
obviously not enough for SImode required alignment.

I assume such loads have to be aligned for the mode on strict aligned
targets and therefore they are OK.  The question is, is that true for
all MEM_REFs or only for those with zero offset?  Since the original
problem was that the expander expanded

MEM[(struct ip *)ip_3 + 7B].s_addr;

as if it was aligned, I suppose that MEM_REFs are generally fine and
we can avoid this issue by skipping them just like the SSA_NAMEs.  Is
my reasoning correct?

Thanks,

Martin


> 
> Thanks,
> 
> Martin
> 
> 
> 2011-06-30  Martin Jambor  <mjambor@suse.cz>
> 
> 	PR tree-optimization/49094
> 	* tree-sra.c (tree_non_mode_aligned_mem_p): New function.
> 	(build_accesses_from_assign): Use it.
> 
> 	* testsuite/gcc.dg/tree-ssa/pr49094.c: New test.
> 
> 
> Index: src/gcc/tree-sra.c
> ===================================================================
> --- src.orig/gcc/tree-sra.c
> +++ src/gcc/tree-sra.c
> @@ -1050,6 +1050,26 @@ disqualify_ops_if_throwing_stmt (gimple
>    return false;
>  }
>  
> +/* Return true iff type of EXP is not sufficiently aligned.  */
> +
> +static bool
> +tree_non_mode_aligned_mem_p (tree exp)
> +{
> +  enum machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
> +  unsigned int align;
> +
> +  if (TREE_CODE (exp) == SSA_NAME
> +      || mode == BLKmode
> +      || !STRICT_ALIGNMENT)
> +    return false;
> +
> +  align = get_object_alignment (exp, BIGGEST_ALIGNMENT);
> +  if (GET_MODE_ALIGNMENT (mode) > align)
> +    return true;
> +
> +  return false;
> +}
> +
>  /* Scan expressions occuring in STMT, create access structures for all accesses
>     to candidates for scalarization and remove those candidates which occur in
>     statements or expressions that prevent them from being split apart.  Return
> @@ -1074,7 +1094,10 @@ build_accesses_from_assign (gimple stmt)
>    lacc = build_access_from_expr_1 (lhs, stmt, true);
>  
>    if (lacc)
> -    lacc->grp_assignment_write = 1;
> +    {
> +      lacc->grp_assignment_write = 1;
> +      lacc->grp_unscalarizable_region |= tree_non_mode_aligned_mem_p (rhs);
> +    }
>  
>    if (racc)
>      {
> @@ -1082,6 +1105,7 @@ build_accesses_from_assign (gimple stmt)
>        if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
>  	  && !is_gimple_reg_type (racc->type))
>  	bitmap_set_bit (should_scalarize_away_bitmap, DECL_UID (racc->base));
> +      racc->grp_unscalarizable_region |= tree_non_mode_aligned_mem_p (lhs);
>      }
>  
>    if (lacc && racc
> Index: src/gcc/testsuite/gcc.dg/tree-ssa/pr49094.c
> ===================================================================
> --- /dev/null
> +++ src/gcc/testsuite/gcc.dg/tree-ssa/pr49094.c
> @@ -0,0 +1,38 @@
> +/* { dg-do run } */
> +/* { dg-options "-O" } */
> +
> +struct in_addr {
> +	unsigned int s_addr;
> +};
> +
> +struct ip {
> +	unsigned char ip_p;
> +	unsigned short ip_sum;
> +	struct	in_addr ip_src,ip_dst;
> +} __attribute__ ((aligned(1), packed));
> +
> +struct ip ip_fw_fwd_addr;
> +
> +int test_alignment( char *m )
> +{
> +  struct ip *ip = (struct ip *) m;
> +  struct in_addr pkt_dst;
> +  pkt_dst = ip->ip_dst ;
> +  if( pkt_dst.s_addr == 0 )
> +    return 1;
> +  else
> +    return 0;
> +}
> +
> +int __attribute__ ((noinline, noclone))
> +intermediary (char *p)
> +{
> +  return test_alignment (p);
> +}
> +
> +int
> +main (int argc, char *argv[])
> +{
> +  ip_fw_fwd_addr.ip_dst.s_addr = 1;
> +  return intermediary ((void *) &ip_fw_fwd_addr);
> +}
Richard Biener July 1, 2011, 7:02 a.m. UTC | #2
On Thu, 30 Jun 2011, Martin Jambor wrote:

> Hi,
> 
> On Thu, Jun 30, 2011 at 03:39:55PM +0200, Martin Jambor wrote:
> > Hi,
> > 
> > I had to add a test that the analyzed expression is not an SSA name.
> > This has been approved by Rchi on IRC yesterday.  Thus, I have
> > committed the following as revision 175703 after successful run of c
> > and c++ testsuite on sparc64 (and a bootstrap and test with another
> > patch on x86_64-linux).
> 
> I also tested fortran on sparc64 but missed a regression there
> (gfortran.dg/pr25923.f90).  The problem is that *arg_1(D) is also
> scrutinized, get_object_alignment returns 8 for it and that is
> obviously not enough for SImode required alignment.
> 
> I assume such loads have to be aligned for the mode on strict aligned
> targets and therefore they are OK.  The question is, is that true for
> all MEM_REFs or only for those with zero offset?  Since the original
> problem was that the expander expanded
> 
> MEM[(struct ip *)ip_3 + 7B].s_addr;
> 
> as if it was aligned, I suppose that MEM_REFs are generally fine and
> we can avoid this issue by skipping them just like the SSA_NAMEs.  Is
> my reasoning correct?

Not really.  This just highlights the issue that alignment on
strict-align targets is broken for any non-component-ref style
access.  As we happily fold component-refs into the MEM_REFs offset
the issue is now more appearant.

For MEM_REFs the alignment is supposed to be at least that of
TYPE_ALIGN (TREE_TYPE (mem-ref)), even though the expanders
would ignore that.

Richard.

> Thanks,
> 
> Martin
> 
> 
> > 
> > Thanks,
> > 
> > Martin
> > 
> > 
> > 2011-06-30  Martin Jambor  <mjambor@suse.cz>
> > 
> > 	PR tree-optimization/49094
> > 	* tree-sra.c (tree_non_mode_aligned_mem_p): New function.
> > 	(build_accesses_from_assign): Use it.
> > 
> > 	* testsuite/gcc.dg/tree-ssa/pr49094.c: New test.
> > 
> > 
> > Index: src/gcc/tree-sra.c
> > ===================================================================
> > --- src.orig/gcc/tree-sra.c
> > +++ src/gcc/tree-sra.c
> > @@ -1050,6 +1050,26 @@ disqualify_ops_if_throwing_stmt (gimple
> >    return false;
> >  }
> >  
> > +/* Return true iff type of EXP is not sufficiently aligned.  */
> > +
> > +static bool
> > +tree_non_mode_aligned_mem_p (tree exp)
> > +{
> > +  enum machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
> > +  unsigned int align;
> > +
> > +  if (TREE_CODE (exp) == SSA_NAME
> > +      || mode == BLKmode
> > +      || !STRICT_ALIGNMENT)
> > +    return false;
> > +
> > +  align = get_object_alignment (exp, BIGGEST_ALIGNMENT);
> > +  if (GET_MODE_ALIGNMENT (mode) > align)
> > +    return true;
> > +
> > +  return false;
> > +}
> > +
> >  /* Scan expressions occuring in STMT, create access structures for all accesses
> >     to candidates for scalarization and remove those candidates which occur in
> >     statements or expressions that prevent them from being split apart.  Return
> > @@ -1074,7 +1094,10 @@ build_accesses_from_assign (gimple stmt)
> >    lacc = build_access_from_expr_1 (lhs, stmt, true);
> >  
> >    if (lacc)
> > -    lacc->grp_assignment_write = 1;
> > +    {
> > +      lacc->grp_assignment_write = 1;
> > +      lacc->grp_unscalarizable_region |= tree_non_mode_aligned_mem_p (rhs);
> > +    }
> >  
> >    if (racc)
> >      {
> > @@ -1082,6 +1105,7 @@ build_accesses_from_assign (gimple stmt)
> >        if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
> >  	  && !is_gimple_reg_type (racc->type))
> >  	bitmap_set_bit (should_scalarize_away_bitmap, DECL_UID (racc->base));
> > +      racc->grp_unscalarizable_region |= tree_non_mode_aligned_mem_p (lhs);
> >      }
> >  
> >    if (lacc && racc
> > Index: src/gcc/testsuite/gcc.dg/tree-ssa/pr49094.c
> > ===================================================================
> > --- /dev/null
> > +++ src/gcc/testsuite/gcc.dg/tree-ssa/pr49094.c
> > @@ -0,0 +1,38 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O" } */
> > +
> > +struct in_addr {
> > +	unsigned int s_addr;
> > +};
> > +
> > +struct ip {
> > +	unsigned char ip_p;
> > +	unsigned short ip_sum;
> > +	struct	in_addr ip_src,ip_dst;
> > +} __attribute__ ((aligned(1), packed));
> > +
> > +struct ip ip_fw_fwd_addr;
> > +
> > +int test_alignment( char *m )
> > +{
> > +  struct ip *ip = (struct ip *) m;
> > +  struct in_addr pkt_dst;
> > +  pkt_dst = ip->ip_dst ;
> > +  if( pkt_dst.s_addr == 0 )
> > +    return 1;
> > +  else
> > +    return 0;
> > +}
> > +
> > +int __attribute__ ((noinline, noclone))
> > +intermediary (char *p)
> > +{
> > +  return test_alignment (p);
> > +}
> > +
> > +int
> > +main (int argc, char *argv[])
> > +{
> > +  ip_fw_fwd_addr.ip_dst.s_addr = 1;
> > +  return intermediary ((void *) &ip_fw_fwd_addr);
> > +}
> 
>
Richard Biener July 20, 2011, 7:47 a.m. UTC | #3
On Tue, Jul 19, 2011 at 8:20 PM, Ulrich Weigand <uweigand@de.ibm.com> wrote:
> Martin Jambor wrote:
>
>> I had to add a test that the analyzed expression is not an SSA name.
>> This has been approved by Rchi on IRC yesterday.  Thus, I have
>> committed the following as revision 175703 after successful run of c
>> and c++ testsuite on sparc64 (and a bootstrap and test with another
>> patch on x86_64-linux).
>>
>> Thanks,
>>
>> Martin
>>
>>
>> 2011-06-30  Martin Jambor  <mjambor@suse.cz>
>>
>>       PR tree-optimization/49094
>>       * tree-sra.c (tree_non_mode_aligned_mem_p): New function.
>>       (build_accesses_from_assign): Use it.
>
> This causes a regression on spu-elf:
> FAIL: gcc.dg/tree-ssa/forwprop-5.c scan-tree-dump-times optimized "disappear" 0
>
> The problem is that in this expression
>  disappear = VIEW_CONVERT_EXPR<struct VecClass>(x_8);
> the rhs is considered unaligned and blocks the SRA transformation.
>
> The check you added for SSA_NAMEs doesn't hit, because the SSA_NAME is
> encapsulated in a VIEW_CONVERT_EXPR.  When get_object_alignment is called,
> the VIEW_CONVERT_EXPR is stripped off by get_inner_reference and the
> SSA_NAME appears, but then get_object_alignment doesn't handle it
> and just returns the default alignment of 8 bits.
>
> Maybe get_object_alignment should itself handle SSA_NAMEs?

But what should it return for a rvalue?  There is no "alignment" here.  I think
SRA should avoid asking for rvalues.

Richard.

> Bye,
> Ulrich
>
> --
>  Dr. Ulrich Weigand
>  GNU Toolchain for Linux on System z and Cell BE
>  Ulrich.Weigand@de.ibm.com
>
diff mbox

Patch

Index: src/gcc/tree-sra.c
===================================================================
--- src.orig/gcc/tree-sra.c
+++ src/gcc/tree-sra.c
@@ -1050,6 +1050,26 @@  disqualify_ops_if_throwing_stmt (gimple
   return false;
 }
 
+/* Return true iff type of EXP is not sufficiently aligned.  */
+
+static bool
+tree_non_mode_aligned_mem_p (tree exp)
+{
+  enum machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
+  unsigned int align;
+
+  if (TREE_CODE (exp) == SSA_NAME
+      || mode == BLKmode
+      || !STRICT_ALIGNMENT)
+    return false;
+
+  align = get_object_alignment (exp, BIGGEST_ALIGNMENT);
+  if (GET_MODE_ALIGNMENT (mode) > align)
+    return true;
+
+  return false;
+}
+
 /* Scan expressions occuring in STMT, create access structures for all accesses
    to candidates for scalarization and remove those candidates which occur in
    statements or expressions that prevent them from being split apart.  Return
@@ -1074,7 +1094,10 @@  build_accesses_from_assign (gimple stmt)
   lacc = build_access_from_expr_1 (lhs, stmt, true);
 
   if (lacc)
-    lacc->grp_assignment_write = 1;
+    {
+      lacc->grp_assignment_write = 1;
+      lacc->grp_unscalarizable_region |= tree_non_mode_aligned_mem_p (rhs);
+    }
 
   if (racc)
     {
@@ -1082,6 +1105,7 @@  build_accesses_from_assign (gimple stmt)
       if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
 	  && !is_gimple_reg_type (racc->type))
 	bitmap_set_bit (should_scalarize_away_bitmap, DECL_UID (racc->base));
+      racc->grp_unscalarizable_region |= tree_non_mode_aligned_mem_p (lhs);
     }
 
   if (lacc && racc
Index: src/gcc/testsuite/gcc.dg/tree-ssa/pr49094.c
===================================================================
--- /dev/null
+++ src/gcc/testsuite/gcc.dg/tree-ssa/pr49094.c
@@ -0,0 +1,38 @@ 
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct in_addr {
+	unsigned int s_addr;
+};
+
+struct ip {
+	unsigned char ip_p;
+	unsigned short ip_sum;
+	struct	in_addr ip_src,ip_dst;
+} __attribute__ ((aligned(1), packed));
+
+struct ip ip_fw_fwd_addr;
+
+int test_alignment( char *m )
+{
+  struct ip *ip = (struct ip *) m;
+  struct in_addr pkt_dst;
+  pkt_dst = ip->ip_dst ;
+  if( pkt_dst.s_addr == 0 )
+    return 1;
+  else
+    return 0;
+}
+
+int __attribute__ ((noinline, noclone))
+intermediary (char *p)
+{
+  return test_alignment (p);
+}
+
+int
+main (int argc, char *argv[])
+{
+  ip_fw_fwd_addr.ip_dst.s_addr = 1;
+  return intermediary ((void *) &ip_fw_fwd_addr);
+}