diff mbox series

[PR,85762,87008,85459] Relax MEM_REF check in contains_vce_or_bfcref_p

Message ID ri6imwx9ycw.fsf@suse.cz
State New
Headers show
Series [PR,85762,87008,85459] Relax MEM_REF check in contains_vce_or_bfcref_p | expand

Commit Message

Martin Jambor March 5, 2019, 2:42 p.m. UTC
Hi,

after looking into the three PRs I found that the problem is the same in
all of them, introduced in revision 255510, with SRA treating completely
non type-punning MEM_REFs to a filed of a structure as a V_C_E (these
are typically introduced by inlining tiny C++ getters/setters and other
methods), sending it to a paranoid mode and leaving many unnecessary
memory accesses behind.

I believe the right fix is to relax the condition to what it was
intended to do in r255510 to fix PR 83141.  This particular behavior was
chosen because we were afraid floating-point accesses might be
introduced to load non-FP data, but as https://pastebin.com/Jk7ZPsVH
shows, that can still happen and so if it really must be avoided at all
costs, we have to deal with it differently.

The regressions this fixes are potentially severe, therefore I'd like to
backport this also to the gcc-8-branch.  The patch has passed bootstrap
and testing on x86_64-linux and aarch64-linux, testing and bootstrap on
i686-linux  and ppc64le-linux are in progress.

OK for trunk and then later on for the branch?

Thanks,


Martin


2019-03-01  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/85762
	PR tree-optimization/87008
	PR tree-optimization/85459
	* tree-sra.c (contains_vce_or_bfcref_p): Relax MEM_REF type-conversion
	check.

	testsuite/
	* g++.dg/tree-ssa/pr87008.C: New test.
---
 gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 +++++++++++++++++
 gcc/tree-sra.c                          | 13 ++++---------
 2 files changed, 21 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C

Comments

Richard Biener March 5, 2019, 2:51 p.m. UTC | #1
On Tue, 5 Mar 2019, Martin Jambor wrote:

> Hi,
> 
> after looking into the three PRs I found that the problem is the same in
> all of them, introduced in revision 255510, with SRA treating completely
> non type-punning MEM_REFs to a filed of a structure as a V_C_E (these
> are typically introduced by inlining tiny C++ getters/setters and other
> methods), sending it to a paranoid mode and leaving many unnecessary
> memory accesses behind.
> 
> I believe the right fix is to relax the condition to what it was
> intended to do in r255510 to fix PR 83141.  This particular behavior was
> chosen because we were afraid floating-point accesses might be
> introduced to load non-FP data, but as https://pastebin.com/Jk7ZPsVH
> shows, that can still happen and so if it really must be avoided at all
> costs, we have to deal with it differently.
> 
> The regressions this fixes are potentially severe, therefore I'd like to
> backport this also to the gcc-8-branch.  The patch has passed bootstrap
> and testing on x86_64-linux and aarch64-linux, testing and bootstrap on
> i686-linux  and ppc64le-linux are in progress.
> 
> OK for trunk and then later on for the branch?
> 
> Thanks,
> 
> 
> Martin
> 
> 
> 2019-03-01  Martin Jambor  <mjambor@suse.cz>
> 
> 	PR tree-optimization/85762
> 	PR tree-optimization/87008
> 	PR tree-optimization/85459
> 	* tree-sra.c (contains_vce_or_bfcref_p): Relax MEM_REF type-conversion
> 	check.
> 
> 	testsuite/
> 	* g++.dg/tree-ssa/pr87008.C: New test.
> ---
>  gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 +++++++++++++++++
>  gcc/tree-sra.c                          | 13 ++++---------
>  2 files changed, 21 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C
> 
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
> new file mode 100644
> index 00000000000..eef521f9ad5
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +extern void dontcallthis();
> +
> +struct A { long a, b; };
> +struct B : A {};
> +template<class T>void cp(T&a,T const&b){a=b;}
> +long f(B x){
> +  B y; cp<A>(y,x);
> +  B z; cp<A>(z,x);
> +  if (y.a - z.a)
> +    dontcallthis ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index eeef31ba496..bd30af5c6e0 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -1151,7 +1151,7 @@ contains_view_convert_expr_p (const_tree ref)
>  }
>  
>  /* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs
> -   type conversion or a COMPONENT_REF with a bit-field field declaration.  */
> +   memcpy or a COMPONENT_REF with a bit-field field declaration.  */
>  
>  static bool
>  contains_vce_or_bfcref_p (const_tree ref)
> @@ -1165,14 +1165,9 @@ contains_vce_or_bfcref_p (const_tree ref)
>        ref = TREE_OPERAND (ref, 0);
>      }
>  
> -  if (TREE_CODE (ref) != MEM_REF
> -      || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
> -    return false;
> -
> -  tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
> -  if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
> -      != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
> -    return true;
> +  if (TREE_CODE (ref) == MEM_REF
> +      && TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (ref, 1))))
> +      return true;

This doesn't make much sense to me - why not simply go back to the
old implementation which would mean to just

   return false;

here?

Richard.
Richard Biener March 5, 2019, 2:55 p.m. UTC | #2
On Tue, 5 Mar 2019, Richard Biener wrote:

> On Tue, 5 Mar 2019, Martin Jambor wrote:
> 
> > Hi,
> > 
> > after looking into the three PRs I found that the problem is the same in
> > all of them, introduced in revision 255510, with SRA treating completely
> > non type-punning MEM_REFs to a filed of a structure as a V_C_E (these
> > are typically introduced by inlining tiny C++ getters/setters and other
> > methods), sending it to a paranoid mode and leaving many unnecessary
> > memory accesses behind.
> > 
> > I believe the right fix is to relax the condition to what it was
> > intended to do in r255510 to fix PR 83141.  This particular behavior was
> > chosen because we were afraid floating-point accesses might be
> > introduced to load non-FP data, but as https://pastebin.com/Jk7ZPsVH
> > shows, that can still happen and so if it really must be avoided at all
> > costs, we have to deal with it differently.
> > 
> > The regressions this fixes are potentially severe, therefore I'd like to
> > backport this also to the gcc-8-branch.  The patch has passed bootstrap
> > and testing on x86_64-linux and aarch64-linux, testing and bootstrap on
> > i686-linux  and ppc64le-linux are in progress.
> > 
> > OK for trunk and then later on for the branch?
> > 
> > Thanks,
> > 
> > 
> > Martin
> > 
> > 
> > 2019-03-01  Martin Jambor  <mjambor@suse.cz>
> > 
> > 	PR tree-optimization/85762
> > 	PR tree-optimization/87008
> > 	PR tree-optimization/85459
> > 	* tree-sra.c (contains_vce_or_bfcref_p): Relax MEM_REF type-conversion
> > 	check.
> > 
> > 	testsuite/
> > 	* g++.dg/tree-ssa/pr87008.C: New test.
> > ---
> >  gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 +++++++++++++++++
> >  gcc/tree-sra.c                          | 13 ++++---------
> >  2 files changed, 21 insertions(+), 9 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C
> > 
> > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
> > new file mode 100644
> > index 00000000000..eef521f9ad5
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +extern void dontcallthis();
> > +
> > +struct A { long a, b; };
> > +struct B : A {};
> > +template<class T>void cp(T&a,T const&b){a=b;}
> > +long f(B x){
> > +  B y; cp<A>(y,x);
> > +  B z; cp<A>(z,x);
> > +  if (y.a - z.a)
> > +    dontcallthis ();
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */
> > diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> > index eeef31ba496..bd30af5c6e0 100644
> > --- a/gcc/tree-sra.c
> > +++ b/gcc/tree-sra.c
> > @@ -1151,7 +1151,7 @@ contains_view_convert_expr_p (const_tree ref)
> >  }
> >  
> >  /* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs
> > -   type conversion or a COMPONENT_REF with a bit-field field declaration.  */
> > +   memcpy or a COMPONENT_REF with a bit-field field declaration.  */
> >  
> >  static bool
> >  contains_vce_or_bfcref_p (const_tree ref)
> > @@ -1165,14 +1165,9 @@ contains_vce_or_bfcref_p (const_tree ref)
> >        ref = TREE_OPERAND (ref, 0);
> >      }
> >  
> > -  if (TREE_CODE (ref) != MEM_REF
> > -      || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
> > -    return false;
> > -
> > -  tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
> > -  if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
> > -      != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
> > -    return true;
> > +  if (TREE_CODE (ref) == MEM_REF
> > +      && TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (ref, 1))))
> > +      return true;
> 
> This doesn't make much sense to me - why not simply go back to the
> old implementation which would mean to just
> 
>    return false;
> 
> here?

Ah - beacause the testcase from r255510 would break...

The above is still a "bit" very tied to how we fold memcpy and friends,
thus unreliable.

Isn't the issue for the memcpy thing that we fail to record an
access for the char[] access (remember it's now char[] MEM_REF,
no longer struct X * MEM_REF with ref-all!).  So maybe it now
_does_ work with just return false...

Richard.
Martin Jambor March 7, 2019, 10:53 a.m. UTC | #3
Hi,

sorry for a somewhat long turnaround...

On Tue, Mar 05 2019, Richard Biener wrote:
> On Tue, 5 Mar 2019, Richard Biener wrote:
>
>> On Tue, 5 Mar 2019, Martin Jambor wrote:
>> > @@ -1165,14 +1165,9 @@ contains_vce_or_bfcref_p (const_tree ref)
>> >        ref = TREE_OPERAND (ref, 0);
>> >      }
>> >  
>> > -  if (TREE_CODE (ref) != MEM_REF
>> > -      || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
>> > -    return false;
>> > -
>> > -  tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
>> > -  if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
>> > -      != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
>> > -    return true;
>> > +  if (TREE_CODE (ref) == MEM_REF
>> > +      && TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (ref, 1))))
>> > +      return true;
>> 
>> This doesn't make much sense to me - why not simply go back to the
>> old implementation which would mean to just
>> 
>>    return false;
>> 
>> here?
>
> Ah - beacause the testcase from r255510 would break...

Yes.

>
> The above is still a "bit" very tied to how we fold memcpy and friends,
> thus unreliable.

Well, I thought about it a bit too but eventually decided the
unreliability is on the false positive side.

>
> Isn't the issue for the memcpy thing that we fail to record an
> access for the char[] access (remember it's now char[] MEM_REF,
> no longer struct X * MEM_REF with ref-all!).  So maybe it now
> _does_ work with just return false...
>

No, it does not work, I had tried.  But yesterday I had another look at
the testcase and realized that the reason it does not is that total
scalarization kicks back in and we ignore data in padding in totally
scalarized aggregates (otherwise we would have never removed the
original accesses to the aggregate which is the point of total
scalarization).

So, if we can cope with the headache of yet another weird flag in SRA
access structure, the following patch also fixes the issue because it
does consider padding important if it is accessed with a type changing
MEM_REF - even when the aggregate is totally scalarized.

If we wanted to be even less conservative, the test in
contains_vce_or_bfcref_p could be extended along the lines of the code
in comment 6 of PR 85762 but for all three PRs this fixes, the produced
code is the same (or seems to be the same), so perhaps this is good
enough(TM).

So far I have only tested this on x86_64-linux (and found out I need to
un-XFAIL gcc.dg/guality/pr54970.c which is not so surprising because it
was XFAILed by r255510).  Should I test a bit more and commit this to
trunk (and then to gcc-8-branch)?

Thanks,

Martin



2019-03-06  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/85762
	PR tree-optimization/87008
	PR tree-optimization/85459
	* tree-sra.c (struct access): New flag grp_type_chaning_ref.
	(dump_access): Dump it.
	(contains_vce_or_bfcref_p): New parameter, set the bool it points
	to if there is a type changing MEM_REF.  Adjust all callers.
	(build_accesses_from_assign): Set grp_type_chaning_ref if
	appropriate.
	(sort_and_splice_var_accesses): Merge grp_type_chaning_ref values.

	testsuite/
	* g++.dg/tree-ssa/pr87008.C: New test.
	* gcc.dg/tree-ssa/pr83141.c: Remove dump test.
---
 gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr83141.c |  3 +-
 gcc/tree-sra.c                          | 58 ++++++++++++++++++-------
 3 files changed, 60 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
new file mode 100644
index 00000000000..eef521f9ad5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+extern void dontcallthis();
+
+struct A { long a, b; };
+struct B : A {};
+template<class T>void cp(T&a,T const&b){a=b;}
+long f(B x){
+  B y; cp<A>(y,x);
+  B z; cp<A>(z,x);
+  if (y.a - z.a)
+    dontcallthis ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
index 73ea45c613c..d1ad3340dbd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O -fdump-tree-esra-details" } */
+/* { dg-options "-O" } */
 
 volatile short vs;
 volatile long vl;
@@ -34,4 +34,3 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-not "Will attempt to totally scalarize" "esra" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index eeef31ba496..003a54e99f2 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -224,6 +224,10 @@ struct access
      entirely? */
   unsigned grp_total_scalarization : 1;
 
+  /* Set when this portion of the aggregate is accessed either through a
+     non-register type VIEW_CONVERT_EXPR or a type changing MEM_REF.  */
+  unsigned grp_type_chaning_ref : 1;
+
   /* Other passes of the analysis use this bit to make function
      analyze_access_subtree create scalar replacements for this group if
      possible.  */
@@ -441,6 +445,7 @@ dump_access (FILE *f, struct access *access, bool grp)
     fprintf (f, ", grp_read = %d, grp_write = %d, grp_assignment_read = %d, "
 	     "grp_assignment_write = %d, grp_scalar_read = %d, "
 	     "grp_scalar_write = %d, grp_total_scalarization = %d, "
+	     "grp_type_chaning_ref = %d, "
 	     "grp_hint = %d, grp_covered = %d, "
 	     "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, "
 	     "grp_partial_lhs = %d, grp_to_be_replaced = %d, "
@@ -449,6 +454,7 @@ dump_access (FILE *f, struct access *access, bool grp)
 	     access->grp_read, access->grp_write, access->grp_assignment_read,
 	     access->grp_assignment_write, access->grp_scalar_read,
 	     access->grp_scalar_write, access->grp_total_scalarization,
+	     access->grp_type_chaning_ref,
 	     access->grp_hint, access->grp_covered,
 	     access->grp_unscalarizable_region, access->grp_unscalarized_data,
 	     access->grp_partial_lhs, access->grp_to_be_replaced,
@@ -456,9 +462,9 @@ dump_access (FILE *f, struct access *access, bool grp)
 	     access->grp_not_necessarilly_dereferenced);
   else
     fprintf (f, ", write = %d, grp_total_scalarization = %d, "
-	     "grp_partial_lhs = %d\n",
+	     "grp_type_chaning_ref = %d, grp_partial_lhs = %d\n",
 	     access->write, access->grp_total_scalarization,
-	     access->grp_partial_lhs);
+	     access->grp_type_chaning_ref, access->grp_partial_lhs);
 }
 
 /* Dump a subtree rooted in ACCESS to file F, indent by LEVEL.  */
@@ -1150,29 +1156,36 @@ contains_view_convert_expr_p (const_tree ref)
   return false;
 }
 
-/* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs
-   type conversion or a COMPONENT_REF with a bit-field field declaration.  */
+/* Return true if REF contains a VIEW_CONVERT_EXPR or a COMPONENT_REF with a
+   bit-field field declaration.  If TYPE_CHANGING_P is non-NULL, set the bool
+   it points to will be set if REF contains any of the above or a MEM_REF
+   expression that effectively performs type conversion.  */
 
 static bool
-contains_vce_or_bfcref_p (const_tree ref)
+contains_vce_or_bfcref_p (const_tree ref, bool *type_changing_p)
 {
   while (handled_component_p (ref))
     {
       if (TREE_CODE (ref) == VIEW_CONVERT_EXPR
 	  || (TREE_CODE (ref) == COMPONENT_REF
 	      && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))))
-	return true;
+	{
+	  if (type_changing_p)
+	    *type_changing_p = true;
+	  return true;
+	}
       ref = TREE_OPERAND (ref, 0);
     }
 
-  if (TREE_CODE (ref) != MEM_REF
+  if (!type_changing_p
+      || TREE_CODE (ref) != MEM_REF
       || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
     return false;
 
   tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
   if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
       != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
-    return true;
+    *type_changing_p = true;
 
   return false;
 }
@@ -1368,15 +1381,21 @@ build_accesses_from_assign (gimple *stmt)
       lacc->grp_assignment_write = 1;
       if (storage_order_barrier_p (rhs))
 	lacc->grp_unscalarizable_region = 1;
+      bool type_changing_p = false;
+      contains_vce_or_bfcref_p (lhs, &type_changing_p);
+      lacc->grp_type_chaning_ref |= type_changing_p;
     }
 
   if (racc)
     {
       racc->grp_assignment_read = 1;
-      if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
-	  && !is_gimple_reg_type (racc->type))
+      bool type_changing_p = false;
+      bool vce_or_bf = contains_vce_or_bfcref_p (rhs, &type_changing_p);
+      racc->grp_type_chaning_ref |= type_changing_p;
+      if (should_scalarize_away_bitmap && !is_gimple_reg_type (racc->type))
 	{
-	  if (contains_vce_or_bfcref_p (rhs))
+
+	  if (vce_or_bf || gimple_has_volatile_ops (stmt))
 	    bitmap_set_bit (cannot_scalarize_away_bitmap,
 			    DECL_UID (racc->base));
 	  else
@@ -2095,6 +2114,7 @@ sort_and_splice_var_accesses (tree var)
       bool grp_assignment_write = access->grp_assignment_write;
       bool multiple_scalar_reads = false;
       bool total_scalarization = access->grp_total_scalarization;
+      bool type_chaning_ref = access->grp_type_chaning_ref;
       bool grp_partial_lhs = access->grp_partial_lhs;
       bool first_scalar = is_gimple_reg_type (access->type);
       bool unscalarizable_region = access->grp_unscalarizable_region;
@@ -2144,6 +2164,7 @@ sort_and_splice_var_accesses (tree var)
 	  grp_partial_lhs |= ac2->grp_partial_lhs;
 	  unscalarizable_region |= ac2->grp_unscalarizable_region;
 	  total_scalarization |= ac2->grp_total_scalarization;
+	  type_chaning_ref |= ac2->grp_type_chaning_ref;
 	  relink_to_new_repr (access, ac2);
 
 	  /* If there are both aggregate-type and scalar-type accesses with
@@ -2182,6 +2203,7 @@ sort_and_splice_var_accesses (tree var)
       access->grp_hint = total_scalarization
 	|| (multiple_scalar_reads && !constant_decl_p (var));
       access->grp_total_scalarization = total_scalarization;
+      access->grp_type_chaning_ref = type_chaning_ref;
       access->grp_partial_lhs = grp_partial_lhs;
       access->grp_unscalarizable_region = unscalarizable_region;
 
@@ -2538,7 +2560,11 @@ analyze_access_subtree (struct access *root, struct access *parent,
 	root->grp_total_scalarization = 0;
     }
 
-  if (!hole || root->grp_total_scalarization)
+  if (!hole
+      /* We assume that totally scalarized aggregates do not have any
+	 meaningful data in holes (padding), but only if they are accessed as
+	 exactly the same types as they are declared.  */
+      || (root->grp_total_scalarization && !root->grp_type_chaning_ref))
     root->grp_covered = 1;
   else if (root->grp_write || comes_initialized_p (root->base))
     root->grp_unscalarized_data = 1; /* not covered and written to */
@@ -3557,7 +3583,7 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
 	    }
 	  else if (lacc
 		   && AGGREGATE_TYPE_P (TREE_TYPE (rhs))
-		   && !contains_vce_or_bfcref_p (rhs))
+		   && !contains_vce_or_bfcref_p (rhs, NULL))
 	    rhs = build_ref_for_model (loc, rhs, 0, lacc, gsi, false);
 
 	  if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
@@ -3578,7 +3604,7 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
       if (!useless_type_conversion_p (TREE_TYPE (dlhs), TREE_TYPE (drhs)))
 	{
 	  if (AGGREGATE_TYPE_P (TREE_TYPE (drhs))
-	      && !contains_vce_or_bfcref_p (drhs))
+	      && !contains_vce_or_bfcref_p (drhs, NULL))
 	    drhs = build_debug_ref_for_model (loc, drhs, 0, lacc);
 	  if (drhs
 	      && !useless_type_conversion_p (TREE_TYPE (dlhs),
@@ -3625,8 +3651,8 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
 
   if (modify_this_stmt
       || gimple_has_volatile_ops (stmt)
-      || contains_vce_or_bfcref_p (rhs)
-      || contains_vce_or_bfcref_p (lhs)
+      || contains_vce_or_bfcref_p (rhs, NULL)
+      || contains_vce_or_bfcref_p (lhs, NULL)
       || stmt_ends_bb_p (stmt))
     {
       /* No need to copy into a constant-pool, it comes pre-initialized.  */
Richard Biener March 8, 2019, 7:04 p.m. UTC | #4
On March 8, 2019 5:57:35 PM GMT+01:00, Martin Jambor <mjambor@suse.cz> wrote:
>Hi,
>
>BTW, we have dropped the gcc-patches from the CC quite some emails ago
>:-/
>
>On Fri, Mar 08 2019, Richard Biener wrote:
>> yOn Thu, 7 Mar 2019, Martin Jambor wrote:
>>
>>> Hello again,
>>> 
>>> On Thu, Mar 07 2019, Martin Jambor wrote:
>>> > Hi,
>>> >
>>> > On Thu, Mar 07 2019, Richard Biener wrote:
>>> >>
>>> >> So possibly we should restrict total scalarization to the case
>>> >> where all accesses are layout compatible (that is,
>>> >> types_compatible_p)?
>>> >>
>>> >
>>> > That would be the patch below (currently undergoing testing).  It
>is
>>> > slightly more strict than mine but on the three testcases in
>question,
>>> > it gives the same results too.  People memcpying their aggregates
>>> > around, even when not doing anything silly like storing stuff in
>>> > padding, will find their code slower but arguably closer to what
>they
>>> > wanted.
>>> 
>>> So, testing revealed that (unlike my previous version) this breaks
>two
>>> guality tests where we depend on total scalarization to convey to
>the
>>> debugger what is in bits of an aggregate.
>>> 
>>> In gcc.dg/guality/pr54970.c, the array a is no longer totally
>scalarized
>>> because it is memcpied into and gdb no longer knows what constant
>lurks
>>> in a[0], which is otherwise unused.  Probably not a big deal.
>>> 
>>> In gcc.dg/guality/pr59776.c, where the ain type and function are:
>>> 
>>>      struct S { float f, g; };
>>>     
>>>      __attribute__((noinline, noclone)) void
>>>      foo (struct S *p)
>>>      {
>>>        struct S s1, s2;                      /* { dg-final {
>gdb-test pr59776.c:17 "s1.f" "5.0" } } */
>>>        s1 = *p;                              /* { dg-final {
>gdb-test pr59776.c:17 "s1.g" "6.0" } } */
>>>        s2 = s1;                              /* { dg-final {
>gdb-test pr59776.c:17 "s2.f" "0.0" } } */
>>>        *(int *) &s2.f = 0;                   /* { dg-final {
>gdb-test pr59776.c:17 "s2.g" "6.0" } } */
>>>        asm volatile (NOP : : : "memory");    /* { dg-final {
>gdb-test pr59776.c:20 "s1.f" "5.0" } } */
>>>        asm volatile (NOP : : : "memory");    /* { dg-final {
>gdb-test pr59776.c:20 "s1.g" "6.0" } } */
>>>        s2 = s1;                              /* { dg-final {
>gdb-test pr59776.c:20 "s2.f" "5.0" } } */
>>>        asm volatile (NOP : : : "memory");    /* { dg-final {
>gdb-test pr59776.c:20 "s2.g" "6.0" } } */
>>>        asm volatile (NOP : : : "memory");
>>>      }
>>>   
>>> the zeroing through integer type caused that s2 variable is no
>longer
>>> totally scalarized and thus somehow opaque for the debugger.  I
>don't
>>> think this particular test is a big concern either, but such scalar
>type
>>> casted accesses were exactly the reason why I initially decided
>against
>>> completely disabling total scalarization when I see a
>types-incompatible
>>> access.
>>> 
>>> In any event, although I would still slightly prefer my patch from
>>> yesterday (or at least tolerating scalar type-incompatible
>accesses), I
>>> am also happy with the one from today, provided I have permission to
>>> XFAIL the guality tests.
>>
>> I guess I'd play safe at this point and do less adjustments.  The
>> second patch looks nicer, with
>>
>>  static bool
>> -contains_vce_or_bfcref_p (const_tree ref)
>> +contains_vce_or_bfcref_p (const_tree ref, bool *type_changing_p =
>NULL)
>
>OK
>
>>
>> (a default for type_chagning_p) there would be even less hunks.  Is
>> it functionally equivalent to the first variant when you change
>> the types_compatible_p back to the TYPE_MAIN_VARIANT compare?
>> That is, is this the only real difference?
>
>I probably don't understand the question because I'd say that the
>second
>guality failure shows there is a difference between being careful we
>just don't loose any data in padding and disabling total scalarization
>altogether.
>
>To give a non-guality example, the following will regress with the
>yesterday's variant - compared to current trunk:
>
>  struct S {unsigned a,b;};
>
>  void foo (struct S *in, struct S *out)
>  {
>    struct S s;
>    s = *in;
>    *(int *)&s.b = -1;
>    *out = s;
>  }
>
>Currently s is totally scalarized away, but if we "restrict total
>scalarization to the case where all accesses are layout compatible," it
>of course isn't and s survives all the way to optimized dump, whereas
>currently we eliminate it even in early SRA.
>
>Even the assembly output changes from:
>
>	movl	(%rdi), %eax
>	movl	%eax, (%rsi)
>	movl	$-1, 4(%rsi)
>	ret
>
>to a bit unexpected:
>
>	movabsq	$-4294967296, %rax
>	orq	(%rdi), %rax
>	movq	%rax, (%rsi)
>	ret
>
>So I guess now I am even more convinced about what I wrote yesterday,
>i.e. that if we should at least allow the scalar incompatible types.
>The reason why this is safe is that when a scalar access is not a leaf
>of an access tree, scalarization of that portion of the aggregate fails
>(and thus total scalarization of the entire aggregate fails which we
>notice because that flag is &= propagated up in the access tree).
>
>Therefore, for total scalarization to proceed, a scalar access in a
>weird type must either exactly match a field/element of the declared
>type of the variable or entirely fit into some padding.  And in the
>latter case it is recorded appropriately and data will be copied in/out
>carefully.
>
>Patch doing that is below (currently undergoing bootstrap/testing).
>Or we can just focus on the padding which is the only thing known to
>cause problems and go with the Wednesday version of the patch.

The patch below is OK. 

Richard. 

>Martin
>
>
>2019-03-08  Martin Jambor  <mjambor@suse.cz>
>
>	PR tree-optimization/85762
>	PR tree-optimization/87008
>	PR tree-optimization/85459
>	* tree-sra.c (contains_vce_or_bfcref_p): New parameter, set the bool
>	it points to if there is a type changing MEM_REF.  Adjust all callers.
>	(build_accesses_from_assign): Disable total scalarization if
>	contains_vce_or_bfcref_p returns true through the new parameter, for
>	both rhs and lhs.
>
>	testsuite/
>	* g++.dg/tree-ssa/pr87008.C: New test.
>	* gcc.dg/guality/pr54970.c: Xfail tests querying a[0] everywhere.
>---
> gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 ++++++++++++
> gcc/testsuite/gcc.dg/guality/pr54970.c  |  6 ++---
> gcc/tree-sra.c                          | 36 ++++++++++++++++++-------
> 3 files changed, 47 insertions(+), 12 deletions(-)
> create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C
>
>diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
>b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
>new file mode 100644
>index 00000000000..eef521f9ad5
>--- /dev/null
>+++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
>@@ -0,0 +1,17 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+
>+extern void dontcallthis();
>+
>+struct A { long a, b; };
>+struct B : A {};
>+template<class T>void cp(T&a,T const&b){a=b;}
>+long f(B x){
>+  B y; cp<A>(y,x);
>+  B z; cp<A>(z,x);
>+  if (y.a - z.a)
>+    dontcallthis ();
>+  return 0;
>+}
>+
>+/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */
>diff --git a/gcc/testsuite/gcc.dg/guality/pr54970.c
>b/gcc/testsuite/gcc.dg/guality/pr54970.c
>index 5d32af07c32..2e0bc5784a9 100644
>--- a/gcc/testsuite/gcc.dg/guality/pr54970.c
>+++ b/gcc/testsuite/gcc.dg/guality/pr54970.c
>@@ -8,17 +8,17 @@
> int
> main ()
> {
>-  int a[] = { 1, 2, 3 };	/* { dg-final { gdb-test .+4 "a\[0\]" "1" } }
>*/
>+  int a[] = { 1, 2, 3 };	/* { dg-final { gdb-test .+4 "a\[0\]" "1" {
>xfail { *-*-* } } } } */
>   int *p = a + 2;		/* { dg-final { gdb-test .+3 "a\[1\]" "2" } } */
>   int *q = a + 1;		/* { dg-final { gdb-test .+2 "a\[2\]" "3" } } */
> 				/* { dg-final { gdb-test .+1 "*p" "3" } } */
>   asm volatile (NOP);		/* { dg-final { gdb-test . "*q" "2" } } */
>-  *p += 10;			/* { dg-final { gdb-test .+4 "a\[0\]" "1" } } */
>+  *p += 10;			/* { dg-final { gdb-test .+4 "a\[0\]" "1" { xfail {
>*-*-* } } } } */
> 				/* { dg-final { gdb-test .+3 "a\[1\]" "2" } } */
> 				/* { dg-final { gdb-test .+2 "a\[2\]" "13" } } */
> 				/* { dg-final { gdb-test .+1 "*p" "13" } } */
>   asm volatile (NOP);		/* { dg-final { gdb-test . "*q" "2" } } */
>-  *q += 10;			/* { dg-final { gdb-test .+4 "a\[0\]" "1" } } */
>+  *q += 10;			/* { dg-final { gdb-test .+4 "a\[0\]" "1" { xfail {
>*-*-* } } } } */
> 				/* { dg-final { gdb-test .+3 "a\[1\]" "12" } } */
> 				/* { dg-final { gdb-test .+2 "a\[2\]" "13" } } */
> 				/* { dg-final { gdb-test .+1 "*p" "13" } } */
>diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
>index eeef31ba496..ca3858d5fc7 100644
>--- a/gcc/tree-sra.c
>+++ b/gcc/tree-sra.c
>@@ -1150,29 +1150,36 @@ contains_view_convert_expr_p (const_tree ref)
>   return false;
> }
> 
>-/* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that
>performs
>-   type conversion or a COMPONENT_REF with a bit-field field
>declaration.  */
>+/* Return true if REF contains a VIEW_CONVERT_EXPR or a COMPONENT_REF
>with a
>+   bit-field field declaration.  If TYPE_CHANGING_P is non-NULL, set
>the bool
>+   it points to will be set if REF contains any of the above or a
>MEM_REF
>+   expression that effectively performs type conversion.  */
> 
> static bool
>-contains_vce_or_bfcref_p (const_tree ref)
>+contains_vce_or_bfcref_p (const_tree ref, bool *type_changing_p =
>NULL)
> {
>   while (handled_component_p (ref))
>     {
>       if (TREE_CODE (ref) == VIEW_CONVERT_EXPR
> 	  || (TREE_CODE (ref) == COMPONENT_REF
> 	      && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))))
>-	return true;
>+	{
>+	  if (type_changing_p)
>+	    *type_changing_p = true;
>+	  return true;
>+	}
>       ref = TREE_OPERAND (ref, 0);
>     }
> 
>-  if (TREE_CODE (ref) != MEM_REF
>+  if (!type_changing_p
>+      || TREE_CODE (ref) != MEM_REF
>       || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
>     return false;
> 
>   tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
>   if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
>       != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
>-    return true;
>+    *type_changing_p = true;
> 
>   return false;
> }
>@@ -1368,15 +1375,26 @@ build_accesses_from_assign (gimple *stmt)
>       lacc->grp_assignment_write = 1;
>       if (storage_order_barrier_p (rhs))
> 	lacc->grp_unscalarizable_region = 1;
>+
>+      if (should_scalarize_away_bitmap && !is_gimple_reg_type
>(lacc->type))
>+	{
>+	  bool type_changing_p = false;
>+	  contains_vce_or_bfcref_p (lhs, &type_changing_p);
>+	  if (type_changing_p)
>+	    bitmap_set_bit (cannot_scalarize_away_bitmap,
>+			    DECL_UID (lacc->base));
>+	}
>     }
> 
>   if (racc)
>     {
>       racc->grp_assignment_read = 1;
>-      if (should_scalarize_away_bitmap && !gimple_has_volatile_ops
>(stmt)
>-	  && !is_gimple_reg_type (racc->type))
>+      if (should_scalarize_away_bitmap && !is_gimple_reg_type
>(racc->type))
> 	{
>-	  if (contains_vce_or_bfcref_p (rhs))
>+	  bool type_changing_p = false;
>+	  contains_vce_or_bfcref_p (rhs, &type_changing_p);
>+
>+	  if (type_changing_p || gimple_has_volatile_ops (stmt))
> 	    bitmap_set_bit (cannot_scalarize_away_bitmap,
> 			    DECL_UID (racc->base));
> 	  else
Martin Jambor March 10, 2019, 4:12 p.m. UTC | #5
Hi,

after we have accidentally dropped the mailing list from our discussion
(my apologies for not spotting that in time), Richi has approved the
following patch which I have bootstrapped and tested on x86_64-linux
(all languages) and on i686-linux, aarch64-linux and ppc64-linux (C, C++
and Fortran) and so I am about to commit it to trunk.

It XFAILS three guality tests which pass at -O0, which means there are
three additional XPASSes - there already are 5 pre-existing XPASSes in
that testcase and 29 outright failures.  I will come back to this next
in April and see whether I can make the tests pass by decoupling the
roles now played by cannot_scalarize_away_bitmap (or at least massage
the testcase to go make the XPASSes go away).  But I won't have time to
do it next two weeks and this patch is important enough to have it in
trunk now.  I intend to backport it to gcc 8 in April too.

Thanks,

Martin


2019-03-08  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/85762
	PR tree-optimization/87008
	PR tree-optimization/85459
	* tree-sra.c (contains_vce_or_bfcref_p): New parameter, set the bool
	it points to if there is a type changing MEM_REF.  Adjust all callers.
	(build_accesses_from_assign): Disable total scalarization if
	contains_vce_or_bfcref_p returns true through the new parameter, for
	both rhs and lhs.

	testsuite/
	* g++.dg/tree-ssa/pr87008.C: New test.
	* gcc.dg/guality/pr54970.c: Xfail tests querying a[0] everywhere.
---
 gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 ++++++++++++
 gcc/testsuite/gcc.dg/guality/pr54970.c  |  6 ++---
 gcc/tree-sra.c                          | 36 ++++++++++++++++++-------
 3 files changed, 47 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
new file mode 100644
index 00000000000..eef521f9ad5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+extern void dontcallthis();
+
+struct A { long a, b; };
+struct B : A {};
+template<class T>void cp(T&a,T const&b){a=b;}
+long f(B x){
+  B y; cp<A>(y,x);
+  B z; cp<A>(z,x);
+  if (y.a - z.a)
+    dontcallthis ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/guality/pr54970.c b/gcc/testsuite/gcc.dg/guality/pr54970.c
index 5d32af07c32..2e0bc5784a9 100644
--- a/gcc/testsuite/gcc.dg/guality/pr54970.c
+++ b/gcc/testsuite/gcc.dg/guality/pr54970.c
@@ -8,17 +8,17 @@
 int
 main ()
 {
-  int a[] = { 1, 2, 3 };	/* { dg-final { gdb-test .+4 "a\[0\]" "1" } } */
+  int a[] = { 1, 2, 3 };	/* { dg-final { gdb-test .+4 "a\[0\]" "1" { xfail { *-*-* } } } } */
   int *p = a + 2;		/* { dg-final { gdb-test .+3 "a\[1\]" "2" } } */
   int *q = a + 1;		/* { dg-final { gdb-test .+2 "a\[2\]" "3" } } */
 				/* { dg-final { gdb-test .+1 "*p" "3" } } */
   asm volatile (NOP);		/* { dg-final { gdb-test . "*q" "2" } } */
-  *p += 10;			/* { dg-final { gdb-test .+4 "a\[0\]" "1" } } */
+  *p += 10;			/* { dg-final { gdb-test .+4 "a\[0\]" "1" { xfail { *-*-* } } } } */
 				/* { dg-final { gdb-test .+3 "a\[1\]" "2" } } */
 				/* { dg-final { gdb-test .+2 "a\[2\]" "13" } } */
 				/* { dg-final { gdb-test .+1 "*p" "13" } } */
   asm volatile (NOP);		/* { dg-final { gdb-test . "*q" "2" } } */
-  *q += 10;			/* { dg-final { gdb-test .+4 "a\[0\]" "1" } } */
+  *q += 10;			/* { dg-final { gdb-test .+4 "a\[0\]" "1" { xfail { *-*-* } } } } */
 				/* { dg-final { gdb-test .+3 "a\[1\]" "12" } } */
 				/* { dg-final { gdb-test .+2 "a\[2\]" "13" } } */
 				/* { dg-final { gdb-test .+1 "*p" "13" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index eeef31ba496..ca3858d5fc7 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -1150,29 +1150,36 @@ contains_view_convert_expr_p (const_tree ref)
   return false;
 }
 
-/* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs
-   type conversion or a COMPONENT_REF with a bit-field field declaration.  */
+/* Return true if REF contains a VIEW_CONVERT_EXPR or a COMPONENT_REF with a
+   bit-field field declaration.  If TYPE_CHANGING_P is non-NULL, set the bool
+   it points to will be set if REF contains any of the above or a MEM_REF
+   expression that effectively performs type conversion.  */
 
 static bool
-contains_vce_or_bfcref_p (const_tree ref)
+contains_vce_or_bfcref_p (const_tree ref, bool *type_changing_p = NULL)
 {
   while (handled_component_p (ref))
     {
       if (TREE_CODE (ref) == VIEW_CONVERT_EXPR
 	  || (TREE_CODE (ref) == COMPONENT_REF
 	      && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))))
-	return true;
+	{
+	  if (type_changing_p)
+	    *type_changing_p = true;
+	  return true;
+	}
       ref = TREE_OPERAND (ref, 0);
     }
 
-  if (TREE_CODE (ref) != MEM_REF
+  if (!type_changing_p
+      || TREE_CODE (ref) != MEM_REF
       || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
     return false;
 
   tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
   if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
       != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
-    return true;
+    *type_changing_p = true;
 
   return false;
 }
@@ -1368,15 +1375,26 @@ build_accesses_from_assign (gimple *stmt)
       lacc->grp_assignment_write = 1;
       if (storage_order_barrier_p (rhs))
 	lacc->grp_unscalarizable_region = 1;
+
+      if (should_scalarize_away_bitmap && !is_gimple_reg_type (lacc->type))
+	{
+	  bool type_changing_p = false;
+	  contains_vce_or_bfcref_p (lhs, &type_changing_p);
+	  if (type_changing_p)
+	    bitmap_set_bit (cannot_scalarize_away_bitmap,
+			    DECL_UID (lacc->base));
+	}
     }
 
   if (racc)
     {
       racc->grp_assignment_read = 1;
-      if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
-	  && !is_gimple_reg_type (racc->type))
+      if (should_scalarize_away_bitmap && !is_gimple_reg_type (racc->type))
 	{
-	  if (contains_vce_or_bfcref_p (rhs))
+	  bool type_changing_p = false;
+	  contains_vce_or_bfcref_p (rhs, &type_changing_p);
+
+	  if (type_changing_p || gimple_has_volatile_ops (stmt))
 	    bitmap_set_bit (cannot_scalarize_away_bitmap,
 			    DECL_UID (racc->base));
 	  else
Martin Jambor April 17, 2019, 9:28 a.m. UTC | #6
Hello,

On Sun, Mar 10 2019, Martin Jambor wrote:
> Hi,
>
> after we have accidentally dropped the mailing list from our discussion
> (my apologies for not spotting that in time), Richi has approved the
> following patch which I have bootstrapped and tested on x86_64-linux
> (all languages) and on i686-linux, aarch64-linux and ppc64-linux (C, C++
> and Fortran) and so I am about to commit it to trunk.
>
> It XFAILS three guality tests which pass at -O0, which means there are
> three additional XPASSes - there already are 5 pre-existing XPASSes in
> that testcase and 29 outright failures.  I will come back to this next
> in April and see whether I can make the tests pass by decoupling the
> roles now played by cannot_scalarize_away_bitmap (or at least massage
> the testcase to go make the XPASSes go away).  But I won't have time to
> do it next two weeks and this patch is important enough to have it in
> trunk now.  I intend to backport it to gcc 8 in April too.
>
> Thanks,
>
> Martin
>
>
> 2019-03-08  Martin Jambor  <mjambor@suse.cz>
>
> 	PR tree-optimization/85762
> 	PR tree-optimization/87008
> 	PR tree-optimization/85459
> 	* tree-sra.c (contains_vce_or_bfcref_p): New parameter, set the bool
> 	it points to if there is a type changing MEM_REF.  Adjust all callers.
> 	(build_accesses_from_assign): Disable total scalarization if
> 	contains_vce_or_bfcref_p returns true through the new parameter, for
> 	both rhs and lhs.
>
> 	testsuite/
> 	* g++.dg/tree-ssa/pr87008.C: New test.
> 	* gcc.dg/guality/pr54970.c: Xfail tests querying a[0] everywhere.

this patch has been on trunk for over a month and at least so far nobody
complained.  I have applied it to gcc-8-branch and did a bootstrap and
testing on an x86_64-linux machine and there were no problems either.

Therefore I would propose to backport it - the other option being leaving
the gcc 8 regression(s) unfixed.  What do you think?

Martin


2019-04-16  Martin Jambor  <mjambor@suse.cz>

	Backport from mainline
	2019-03-10  Martin Jambor  <mjambor@suse.cz>

        PR tree-optimization/85762
        PR tree-optimization/87008
        PR tree-optimization/85459
        * tree-sra.c (contains_vce_or_bfcref_p): New parameter, set the bool
        it points to if there is a type changing MEM_REF.  Adjust all callers.
        (build_accesses_from_assign): Disable total scalarization if
        contains_vce_or_bfcref_p returns true through the new parameter, for
        both rhs and lhs.

        testsuite/
        * g++.dg/tree-ssa/pr87008.C: New test.
        * gcc.dg/guality/pr54970.c: Xfail tests querying a[0] everywhere.
---
 gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 ++++++++++++
 gcc/testsuite/gcc.dg/guality/pr54970.c  |  6 ++---
 gcc/tree-sra.c                          | 36 ++++++++++++++++++-------
 3 files changed, 47 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
new file mode 100644
index 00000000000..eef521f9ad5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+extern void dontcallthis();
+
+struct A { long a, b; };
+struct B : A {};
+template<class T>void cp(T&a,T const&b){a=b;}
+long f(B x){
+  B y; cp<A>(y,x);
+  B z; cp<A>(z,x);
+  if (y.a - z.a)
+    dontcallthis ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/guality/pr54970.c b/gcc/testsuite/gcc.dg/guality/pr54970.c
index 1819d023e21..f12a9aac1d2 100644
--- a/gcc/testsuite/gcc.dg/guality/pr54970.c
+++ b/gcc/testsuite/gcc.dg/guality/pr54970.c
@@ -8,17 +8,17 @@
 int
 main ()
 {
-  int a[] = { 1, 2, 3 };	/* { dg-final { gdb-test 15 "a\[0\]" "1" } } */
+  int a[] = { 1, 2, 3 };	/* { dg-final { gdb-test 15 "a\[0\]" "1" { xfail { *-*-* } } } } */
   int *p = a + 2;		/* { dg-final { gdb-test 15 "a\[1\]" "2" } } */
   int *q = a + 1;		/* { dg-final { gdb-test 15 "a\[2\]" "3" } } */
 				/* { dg-final { gdb-test 15 "*p" "3" } } */
   asm volatile (NOP);		/* { dg-final { gdb-test 15 "*q" "2" } } */
-  *p += 10;			/* { dg-final { gdb-test 20 "a\[0\]" "1" } } */
+  *p += 10;			/* { dg-final { gdb-test 20 "a\[0\]" "1" { xfail { *-*-* } } } } */
 				/* { dg-final { gdb-test 20 "a\[1\]" "2" } } */
 				/* { dg-final { gdb-test 20 "a\[2\]" "13" } } */
 				/* { dg-final { gdb-test 20 "*p" "13" } } */
   asm volatile (NOP);		/* { dg-final { gdb-test 20 "*q" "2" } } */
-  *q += 10;			/* { dg-final { gdb-test 25 "a\[0\]" "1" } } */
+  *q += 10;			/* { dg-final { gdb-test 25 "a\[0\]" "1" { xfail { *-*-* } } } } */
 				/* { dg-final { gdb-test 25 "a\[1\]" "12" } } */
 				/* { dg-final { gdb-test 25 "a\[2\]" "13" } } */
 				/* { dg-final { gdb-test 25 "*p" "13" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index bb373b33b7a..e1ebdfaa225 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -1150,29 +1150,36 @@ contains_view_convert_expr_p (const_tree ref)
   return false;
 }
 
-/* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs
-   type conversion or a COMPONENT_REF with a bit-field field declaration.  */
+/* Return true if REF contains a VIEW_CONVERT_EXPR or a COMPONENT_REF with a
+   bit-field field declaration.  If TYPE_CHANGING_P is non-NULL, set the bool
+   it points to will be set if REF contains any of the above or a MEM_REF
+   expression that effectively performs type conversion.  */
 
 static bool
-contains_vce_or_bfcref_p (const_tree ref)
+contains_vce_or_bfcref_p (const_tree ref, bool *type_changing_p = NULL)
 {
   while (handled_component_p (ref))
     {
       if (TREE_CODE (ref) == VIEW_CONVERT_EXPR
 	  || (TREE_CODE (ref) == COMPONENT_REF
 	      && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))))
-	return true;
+	{
+	  if (type_changing_p)
+	    *type_changing_p = true;
+	  return true;
+	}
       ref = TREE_OPERAND (ref, 0);
     }
 
-  if (TREE_CODE (ref) != MEM_REF
+  if (!type_changing_p
+      || TREE_CODE (ref) != MEM_REF
       || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
     return false;
 
   tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
   if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
       != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
-    return true;
+    *type_changing_p = true;
 
   return false;
 }
@@ -1368,15 +1375,26 @@ build_accesses_from_assign (gimple *stmt)
       lacc->grp_assignment_write = 1;
       if (storage_order_barrier_p (rhs))
 	lacc->grp_unscalarizable_region = 1;
+
+      if (should_scalarize_away_bitmap && !is_gimple_reg_type (lacc->type))
+	{
+	  bool type_changing_p = false;
+	  contains_vce_or_bfcref_p (lhs, &type_changing_p);
+	  if (type_changing_p)
+	    bitmap_set_bit (cannot_scalarize_away_bitmap,
+			    DECL_UID (lacc->base));
+	}
     }
 
   if (racc)
     {
       racc->grp_assignment_read = 1;
-      if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
-	  && !is_gimple_reg_type (racc->type))
+      if (should_scalarize_away_bitmap && !is_gimple_reg_type (racc->type))
 	{
-	  if (contains_vce_or_bfcref_p (rhs))
+	  bool type_changing_p = false;
+	  contains_vce_or_bfcref_p (rhs, &type_changing_p);
+
+	  if (type_changing_p || gimple_has_volatile_ops (stmt))
 	    bitmap_set_bit (cannot_scalarize_away_bitmap,
 			    DECL_UID (racc->base));
 	  else
Richard Biener April 17, 2019, 9:42 a.m. UTC | #7
On Wed, 17 Apr 2019, Martin Jambor wrote:

> Hello,
> 
> On Sun, Mar 10 2019, Martin Jambor wrote:
> > Hi,
> >
> > after we have accidentally dropped the mailing list from our discussion
> > (my apologies for not spotting that in time), Richi has approved the
> > following patch which I have bootstrapped and tested on x86_64-linux
> > (all languages) and on i686-linux, aarch64-linux and ppc64-linux (C, C++
> > and Fortran) and so I am about to commit it to trunk.
> >
> > It XFAILS three guality tests which pass at -O0, which means there are
> > three additional XPASSes - there already are 5 pre-existing XPASSes in
> > that testcase and 29 outright failures.  I will come back to this next
> > in April and see whether I can make the tests pass by decoupling the
> > roles now played by cannot_scalarize_away_bitmap (or at least massage
> > the testcase to go make the XPASSes go away).  But I won't have time to
> > do it next two weeks and this patch is important enough to have it in
> > trunk now.  I intend to backport it to gcc 8 in April too.
> >
> > Thanks,
> >
> > Martin
> >
> >
> > 2019-03-08  Martin Jambor  <mjambor@suse.cz>
> >
> > 	PR tree-optimization/85762
> > 	PR tree-optimization/87008
> > 	PR tree-optimization/85459
> > 	* tree-sra.c (contains_vce_or_bfcref_p): New parameter, set the bool
> > 	it points to if there is a type changing MEM_REF.  Adjust all callers.
> > 	(build_accesses_from_assign): Disable total scalarization if
> > 	contains_vce_or_bfcref_p returns true through the new parameter, for
> > 	both rhs and lhs.
> >
> > 	testsuite/
> > 	* g++.dg/tree-ssa/pr87008.C: New test.
> > 	* gcc.dg/guality/pr54970.c: Xfail tests querying a[0] everywhere.
> 
> this patch has been on trunk for over a month and at least so far nobody
> complained.  I have applied it to gcc-8-branch and did a bootstrap and
> testing on an x86_64-linux machine and there were no problems either.
> 
> Therefore I would propose to backport it - the other option being leaving
> the gcc 8 regression(s) unfixed.  What do you think?

Let's go for the backport.

Richard.

> Martin
> 
> 
> 2019-04-16  Martin Jambor  <mjambor@suse.cz>
> 
> 	Backport from mainline
> 	2019-03-10  Martin Jambor  <mjambor@suse.cz>
> 
>         PR tree-optimization/85762
>         PR tree-optimization/87008
>         PR tree-optimization/85459
>         * tree-sra.c (contains_vce_or_bfcref_p): New parameter, set the bool
>         it points to if there is a type changing MEM_REF.  Adjust all callers.
>         (build_accesses_from_assign): Disable total scalarization if
>         contains_vce_or_bfcref_p returns true through the new parameter, for
>         both rhs and lhs.
> 
>         testsuite/
>         * g++.dg/tree-ssa/pr87008.C: New test.
>         * gcc.dg/guality/pr54970.c: Xfail tests querying a[0] everywhere.
> ---
>  gcc/testsuite/g++.dg/tree-ssa/pr87008.C | 17 ++++++++++++
>  gcc/testsuite/gcc.dg/guality/pr54970.c  |  6 ++---
>  gcc/tree-sra.c                          | 36 ++++++++++++++++++-------
>  3 files changed, 47 insertions(+), 12 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87008.C
> 
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
> new file mode 100644
> index 00000000000..eef521f9ad5
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +extern void dontcallthis();
> +
> +struct A { long a, b; };
> +struct B : A {};
> +template<class T>void cp(T&a,T const&b){a=b;}
> +long f(B x){
> +  B y; cp<A>(y,x);
> +  B z; cp<A>(z,x);
> +  if (y.a - z.a)
> +    dontcallthis ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/guality/pr54970.c b/gcc/testsuite/gcc.dg/guality/pr54970.c
> index 1819d023e21..f12a9aac1d2 100644
> --- a/gcc/testsuite/gcc.dg/guality/pr54970.c
> +++ b/gcc/testsuite/gcc.dg/guality/pr54970.c
> @@ -8,17 +8,17 @@
>  int
>  main ()
>  {
> -  int a[] = { 1, 2, 3 };	/* { dg-final { gdb-test 15 "a\[0\]" "1" } } */
> +  int a[] = { 1, 2, 3 };	/* { dg-final { gdb-test 15 "a\[0\]" "1" { xfail { *-*-* } } } } */
>    int *p = a + 2;		/* { dg-final { gdb-test 15 "a\[1\]" "2" } } */
>    int *q = a + 1;		/* { dg-final { gdb-test 15 "a\[2\]" "3" } } */
>  				/* { dg-final { gdb-test 15 "*p" "3" } } */
>    asm volatile (NOP);		/* { dg-final { gdb-test 15 "*q" "2" } } */
> -  *p += 10;			/* { dg-final { gdb-test 20 "a\[0\]" "1" } } */
> +  *p += 10;			/* { dg-final { gdb-test 20 "a\[0\]" "1" { xfail { *-*-* } } } } */
>  				/* { dg-final { gdb-test 20 "a\[1\]" "2" } } */
>  				/* { dg-final { gdb-test 20 "a\[2\]" "13" } } */
>  				/* { dg-final { gdb-test 20 "*p" "13" } } */
>    asm volatile (NOP);		/* { dg-final { gdb-test 20 "*q" "2" } } */
> -  *q += 10;			/* { dg-final { gdb-test 25 "a\[0\]" "1" } } */
> +  *q += 10;			/* { dg-final { gdb-test 25 "a\[0\]" "1" { xfail { *-*-* } } } } */
>  				/* { dg-final { gdb-test 25 "a\[1\]" "12" } } */
>  				/* { dg-final { gdb-test 25 "a\[2\]" "13" } } */
>  				/* { dg-final { gdb-test 25 "*p" "13" } } */
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index bb373b33b7a..e1ebdfaa225 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -1150,29 +1150,36 @@ contains_view_convert_expr_p (const_tree ref)
>    return false;
>  }
>  
> -/* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs
> -   type conversion or a COMPONENT_REF with a bit-field field declaration.  */
> +/* Return true if REF contains a VIEW_CONVERT_EXPR or a COMPONENT_REF with a
> +   bit-field field declaration.  If TYPE_CHANGING_P is non-NULL, set the bool
> +   it points to will be set if REF contains any of the above or a MEM_REF
> +   expression that effectively performs type conversion.  */
>  
>  static bool
> -contains_vce_or_bfcref_p (const_tree ref)
> +contains_vce_or_bfcref_p (const_tree ref, bool *type_changing_p = NULL)
>  {
>    while (handled_component_p (ref))
>      {
>        if (TREE_CODE (ref) == VIEW_CONVERT_EXPR
>  	  || (TREE_CODE (ref) == COMPONENT_REF
>  	      && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))))
> -	return true;
> +	{
> +	  if (type_changing_p)
> +	    *type_changing_p = true;
> +	  return true;
> +	}
>        ref = TREE_OPERAND (ref, 0);
>      }
>  
> -  if (TREE_CODE (ref) != MEM_REF
> +  if (!type_changing_p
> +      || TREE_CODE (ref) != MEM_REF
>        || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
>      return false;
>  
>    tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
>    if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
>        != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
> -    return true;
> +    *type_changing_p = true;
>  
>    return false;
>  }
> @@ -1368,15 +1375,26 @@ build_accesses_from_assign (gimple *stmt)
>        lacc->grp_assignment_write = 1;
>        if (storage_order_barrier_p (rhs))
>  	lacc->grp_unscalarizable_region = 1;
> +
> +      if (should_scalarize_away_bitmap && !is_gimple_reg_type (lacc->type))
> +	{
> +	  bool type_changing_p = false;
> +	  contains_vce_or_bfcref_p (lhs, &type_changing_p);
> +	  if (type_changing_p)
> +	    bitmap_set_bit (cannot_scalarize_away_bitmap,
> +			    DECL_UID (lacc->base));
> +	}
>      }
>  
>    if (racc)
>      {
>        racc->grp_assignment_read = 1;
> -      if (should_scalarize_away_bitmap && !gimple_has_volatile_ops (stmt)
> -	  && !is_gimple_reg_type (racc->type))
> +      if (should_scalarize_away_bitmap && !is_gimple_reg_type (racc->type))
>  	{
> -	  if (contains_vce_or_bfcref_p (rhs))
> +	  bool type_changing_p = false;
> +	  contains_vce_or_bfcref_p (rhs, &type_changing_p);
> +
> +	  if (type_changing_p || gimple_has_volatile_ops (stmt))
>  	    bitmap_set_bit (cannot_scalarize_away_bitmap,
>  			    DECL_UID (racc->base));
>  	  else
>
diff mbox series

Patch

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87008.C b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
new file mode 100644
index 00000000000..eef521f9ad5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr87008.C
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+extern void dontcallthis();
+
+struct A { long a, b; };
+struct B : A {};
+template<class T>void cp(T&a,T const&b){a=b;}
+long f(B x){
+  B y; cp<A>(y,x);
+  B z; cp<A>(z,x);
+  if (y.a - z.a)
+    dontcallthis ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "dontcallthis" "optimized" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index eeef31ba496..bd30af5c6e0 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -1151,7 +1151,7 @@  contains_view_convert_expr_p (const_tree ref)
 }
 
 /* Return true if REF contains a VIEW_CONVERT_EXPR or a MEM_REF that performs
-   type conversion or a COMPONENT_REF with a bit-field field declaration.  */
+   memcpy or a COMPONENT_REF with a bit-field field declaration.  */
 
 static bool
 contains_vce_or_bfcref_p (const_tree ref)
@@ -1165,14 +1165,9 @@  contains_vce_or_bfcref_p (const_tree ref)
       ref = TREE_OPERAND (ref, 0);
     }
 
-  if (TREE_CODE (ref) != MEM_REF
-      || TREE_CODE (TREE_OPERAND (ref, 0)) != ADDR_EXPR)
-    return false;
-
-  tree mem = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
-  if (TYPE_MAIN_VARIANT (TREE_TYPE (ref))
-      != TYPE_MAIN_VARIANT (TREE_TYPE (mem)))
-    return true;
+  if (TREE_CODE (ref) == MEM_REF
+      && TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (ref, 1))))
+      return true;
 
   return false;
 }