diff mbox

[1/4] Make SRA scalarize constant-pool loads

Message ID 1450958010-12882-1-git-send-email-alan.lawrence@arm.com
State New
Headers show

Commit Message

Alan Lawrence Dec. 24, 2015, 11:53 a.m. UTC
Here's a new version that fixes the gcc.dg/guality/pr54970.c failures seen on
aarch64 and powerpc64. Prior to SRA handling constant pool decls,
-fdump-tree-esra-details (at -O1 -g) had shown:
  <bb 2>:
  a = *.LC0;
  # DEBUG a$0 => MEM[(int[3] *)&*.LC0]
  a$4_3 = MEM[(int[3] *)&*.LC0 + 4B];
  # DEBUG a$4 => a$4_3
  a$8_4 = MEM[(int[3] *)&*.LC0 + 8B];

The previous patch changed this to:
  <bb 2>:
  SR.5_3 = *.LC0[0];
  SR.6_4 = *.LC0[1];
  SR.7_19 = *.LC0[2];
  SR.8_21 = *.LC1[0];
  SR.9_22 = *.LC1[1];
  SR.10_23 = *.LC1[2];
  # DEBUG a$0 => NULL   // Note here
  a$4_24 = SR.6_4;
  # DEBUG a$4 => a$4_24
  a$8_25 = SR.7_19;

Turns out the DEBUG a$0 => NULL was produced in load_assign_lhs_subreplacements:

	  if (lacc && lacc->grp_to_be_debug_replaced)
	    {
	      gdebug *ds;
	      tree drhs;
	      struct access *racc = find_access_in_subtree (sad->top_racc,
							    offset,
							    lacc->size);

	      if (racc && racc->grp_to_be_replaced)
		{
		  if (racc->grp_write)
		    drhs = get_access_replacement (racc);
		  else
		    drhs = NULL;  // <=== HERE
		}
...
	      ds = gimple_build_debug_bind (get_access_replacement (lacc),
					    drhs, gsi_stmt (sad->old_gsi));

Prior to the patch, we'd skipped around load_assign_lhs_subreplacements, because
access_has_children_p(racc) (for racc = *.LC0) didn't hold in sra_modify_assign.

I also added a constant_decl_p function, combining the two checks, plus some
testcase fixes.

This also fixes a bunch of other guality tests on AArch64 that were failing
prior to the patch series, and another bunch on PowerPC64 (bigendian -m32), listed below.

Bootstrapped + check-gcc,g++ on x86_64, ARM, AArch64,
  also on powerpc64{,le}-none-linux-gnu *in combination with the other patches
  in the series* (I haven't tested the individual patches on PPC),
  plus Ada on ARM and x86_64.

gcc/ChangeLog:

	PR target/63679
	* tree-sra.c (disqualified_constants, constant_decl_p): New.
	(sra_initialize): Allocate disqualified_constants.
	(sra_deinitialize): Free disqualified_constants.
	(disqualify_candidate): Update disqualified_constants when appropriate.
	(create_access): Scan for constant-pool entries as we go along.
	(scalarizable_type_p): Add check against type_contains_placeholder_p.
	(maybe_add_sra_candidate): Allow constant-pool entries.
	(load_assign_lhs_subreplacements): Bind debug for constant pool vars.
	(initialize_constant_pool_replacements): New.
	(sra_modify_assign): Avoid mangling assignments created by previous,
	and don't generate writes into constant pool.
	(sra_modify_function_body): Call initialize_constant_pool_replacements.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/sra-17.c: New.
	* gcc.dg/tree-ssa/sra-18.c: New.

Tests fixed on aarch64-none-linux-gnu:
gcc.dg/guality/pr54970.c   -O1  line 15 *p == 3
gcc.dg/guality/pr54970.c   -O1  line 15 *q == 2
gcc.dg/guality/pr54970.c   -O1  line 20 *p == 13
gcc.dg/guality/pr54970.c   -O1  line 20 *q == 2
gcc.dg/guality/pr54970.c   -O1  line 25 *p == 13
gcc.dg/guality/pr54970.c   -O1  line 25 *q == 12
gcc.dg/guality/pr54970.c   -O1  line 31 *p == 6
gcc.dg/guality/pr54970.c   -O1  line 31 *q == 5
gcc.dg/guality/pr54970.c   -O1  line 36 *p == 26
gcc.dg/guality/pr54970.c   -O1  line 36 *q == 5
gcc.dg/guality/pr54970.c   -O1  line 45 *p == 26
gcc.dg/guality/pr54970.c   -O1  line 45 *q == 25
gcc.dg/guality/pr54970.c   -O1  line 45 p[-1] == 25
gcc.dg/guality/pr54970.c   -O1  line 45 p[-2] == 4
gcc.dg/guality/pr54970.c   -O1  line 45 q[-1] == 4
gcc.dg/guality/pr54970.c   -O1  line 45 q[1] == 26
gcc.dg/guality/pr56154-1.c   -O1  line pr56154-1.c:20 x.a == 6
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s1.f == 5.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s1.g == 6.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s2.f == 0.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s2.g == 6.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s1.f == 5.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s1.g == 6.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s2.f == 5.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s2.g == 6.0
gcc.dg/guality/sra-1.c   -O1  line 21 a.j == 14
gcc.dg/guality/sra-1.c   -O1  line 32 a[1] == 14
gcc.dg/guality/sra-1.c   -O1  line 43 a.i == 4
gcc.dg/guality/sra-1.c   -O1  line 43 a.j == 14
(and other optimization levels)
On ppc64 bigendian, with the rest of the series (but I expect it's this patch):
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 15 a[0] == 1
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 20 a[0] == 1
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 25 a[0] == 1
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 31 a[0] == 4
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 36 a[0] == 4
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 a[0] == 4
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 p[-2] == 4
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 q[-1] == 4
(again, and other optimization levels).

---
 gcc/testsuite/gcc.dg/tree-ssa/sra-17.c |  19 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/sra-18.c |  28 +++++++++
 gcc/tree-sra.c                         | 104 +++++++++++++++++++++++++++++++--
 3 files changed, 147 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-18.c

Comments

Alan Lawrence Jan. 15, 2016, 10:27 a.m. UTC | #1
On 24/12/15 11:53, Alan Lawrence wrote:
> Here's a new version that fixes the gcc.dg/guality/pr54970.c failures seen on
> aarch64 and powerpc64. Prior to SRA handling constant pool decls,
> -fdump-tree-esra-details (at -O1 -g) had shown:
>    <bb 2>:
>    a = *.LC0;
>    # DEBUG a$0 => MEM[(int[3] *)&*.LC0]
>    a$4_3 = MEM[(int[3] *)&*.LC0 + 4B];
>    # DEBUG a$4 => a$4_3
>    a$8_4 = MEM[(int[3] *)&*.LC0 + 8B];
>
> The previous patch changed this to:
>    <bb 2>:
>    SR.5_3 = *.LC0[0];
>    SR.6_4 = *.LC0[1];
>    SR.7_19 = *.LC0[2];
>    SR.8_21 = *.LC1[0];
>    SR.9_22 = *.LC1[1];
>    SR.10_23 = *.LC1[2];
>    # DEBUG a$0 => NULL   // Note here
>    a$4_24 = SR.6_4;
>    # DEBUG a$4 => a$4_24
>    a$8_25 = SR.7_19;
>
> Turns out the DEBUG a$0 => NULL was produced in load_assign_lhs_subreplacements:
>
> 	  if (lacc && lacc->grp_to_be_debug_replaced)
> 	    {
> 	      gdebug *ds;
> 	      tree drhs;
> 	      struct access *racc = find_access_in_subtree (sad->top_racc,
> 							    offset,
> 							    lacc->size);
>
> 	      if (racc && racc->grp_to_be_replaced)
> 		{
> 		  if (racc->grp_write)
> 		    drhs = get_access_replacement (racc);
> 		  else
> 		    drhs = NULL;  // <=== HERE
> 		}
> ...
> 	      ds = gimple_build_debug_bind (get_access_replacement (lacc),
> 					    drhs, gsi_stmt (sad->old_gsi));
>
> Prior to the patch, we'd skipped around load_assign_lhs_subreplacements, because
> access_has_children_p(racc) (for racc = *.LC0) didn't hold in sra_modify_assign.
>
> I also added a constant_decl_p function, combining the two checks, plus some
> testcase fixes.
>
> Bootstrapped + check-gcc,g++ on x86_64, ARM, AArch64,
>    also on powerpc64{,le}-none-linux-gnu *in combination with the other patches
>    in the series* (I haven't tested the individual patches on PPC),
>    plus Ada on ARM and x86_64.
>
> gcc/ChangeLog:
>
> 	PR target/63679
> 	* tree-sra.c (disqualified_constants, constant_decl_p): New.
> 	(sra_initialize): Allocate disqualified_constants.
> 	(sra_deinitialize): Free disqualified_constants.
> 	(disqualify_candidate): Update disqualified_constants when appropriate.
> 	(create_access): Scan for constant-pool entries as we go along.
> 	(scalarizable_type_p): Add check against type_contains_placeholder_p.
> 	(maybe_add_sra_candidate): Allow constant-pool entries.
> 	(load_assign_lhs_subreplacements): Bind debug for constant pool vars.
> 	(initialize_constant_pool_replacements): New.
> 	(sra_modify_assign): Avoid mangling assignments created by previous,
> 	and don't generate writes into constant pool.
> 	(sra_modify_function_body): Call initialize_constant_pool_replacements.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.dg/tree-ssa/sra-17.c: New.
> 	* gcc.dg/tree-ssa/sra-18.c: New.

Ping.

(The next bit is false, unless you force SRA to happen more widely, but all the 
above stands)

> This also fixes a bunch of other guality tests on AArch64 that were failing
> prior to the patch series, and another bunch on PowerPC64 (bigendian -m32), listed below.
 >
> Tests fixed on aarch64-none-linux-gnu:
> gcc.dg/guality/pr54970.c   -O1  line 15 *p == 3
> gcc.dg/guality/pr54970.c   -O1  line 15 *q == 2
> gcc.dg/guality/pr54970.c   -O1  line 20 *p == 13
> gcc.dg/guality/pr54970.c   -O1  line 20 *q == 2
> gcc.dg/guality/pr54970.c   -O1  line 25 *p == 13
> gcc.dg/guality/pr54970.c   -O1  line 25 *q == 12
> gcc.dg/guality/pr54970.c   -O1  line 31 *p == 6
> gcc.dg/guality/pr54970.c   -O1  line 31 *q == 5
> gcc.dg/guality/pr54970.c   -O1  line 36 *p == 26
> gcc.dg/guality/pr54970.c   -O1  line 36 *q == 5
> gcc.dg/guality/pr54970.c   -O1  line 45 *p == 26
> gcc.dg/guality/pr54970.c   -O1  line 45 *q == 25
> gcc.dg/guality/pr54970.c   -O1  line 45 p[-1] == 25
> gcc.dg/guality/pr54970.c   -O1  line 45 p[-2] == 4
> gcc.dg/guality/pr54970.c   -O1  line 45 q[-1] == 4
> gcc.dg/guality/pr54970.c   -O1  line 45 q[1] == 26
> gcc.dg/guality/pr56154-1.c   -O1  line pr56154-1.c:20 x.a == 6
> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s1.f == 5.0
> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s1.g == 6.0
> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s2.f == 0.0
> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s2.g == 6.0
> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s1.f == 5.0
> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s1.g == 6.0
> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s2.f == 5.0
> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s2.g == 6.0
> gcc.dg/guality/sra-1.c   -O1  line 21 a.j == 14
> gcc.dg/guality/sra-1.c   -O1  line 32 a[1] == 14
> gcc.dg/guality/sra-1.c   -O1  line 43 a.i == 4
> gcc.dg/guality/sra-1.c   -O1  line 43 a.j == 14
> (and other optimization levels)
> On ppc64 bigendian, with the rest of the series (but I expect it's this patch):
> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 15 a[0] == 1
> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 20 a[0] == 1
> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 25 a[0] == 1
> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 31 a[0] == 4
> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 36 a[0] == 4
> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 a[0] == 4
> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 p[-2] == 4
> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 q[-1] == 4
> (again, and other optimization levels).

Cheers, Alan

> ---
>   gcc/testsuite/gcc.dg/tree-ssa/sra-17.c |  19 ++++++
>   gcc/testsuite/gcc.dg/tree-ssa/sra-18.c |  28 +++++++++
>   gcc/tree-sra.c                         | 104 +++++++++++++++++++++++++++++++--
>   3 files changed, 147 insertions(+), 4 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
>   create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
> new file mode 100644
> index 0000000..25667f4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
> @@ -0,0 +1,19 @@
> +/* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-* powerpc*-*-* s390*-*-* } } } */
> +/* { dg-options "-O2 -fdump-tree-esra --param sra-max-scalarization-size-Ospeed=32" } */
> +
> +extern void abort (void);
> +
> +int
> +main (int argc, char **argv)
> +{
> +  long a[4] = { 7, 19, 11, 255 };
> +  int tot = 0;
> +  for (int i = 0; i < 4; i++)
> +    tot = (tot*256) + a[i];
> +  if (tot == 0x07130bff)
> +    return 0;
> +  abort ();
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing load: a = \\\*.LC0;" 1 "esra" } } */
> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\\[" 4 "esra" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
> new file mode 100644
> index 0000000..609fb11
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
> @@ -0,0 +1,28 @@
> +/* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-* powerpc*-*-* s390*-*-* } } } */
> +/* { dg-options "-O2 -fdump-tree-esra --param sra-max-scalarization-size-Ospeed=32" } */
> +
> +extern void abort (void);
> +struct foo { long x; };
> +
> +struct bar { struct foo f[2]; };
> +
> +struct baz { struct bar b[2]; };
> +
> +int
> +main (int argc, char **argv)
> +{
> +  struct baz a = { { { { { 4 }, { 5 } } }, { { { 6 }, { 7 } } }  } };
> +  int tot = 0;
> +  for (int i = 0; i < 2; i++)
> +    for (int j = 0; j < 2; j++)
> +      tot = (tot*256) + a.b[i].f[j].x;
> +  if (tot == 0x04050607)
> +    return 0;
> +  abort ();
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing load: a = \\\*.LC0;" 1 "esra" } } */
> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\.b\\\[0\\\]\\.f\\\[0\\\]\\.x" 1 "esra" } } */
> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\.b\\\[0\\\]\\.f\\\[1\\\]\\.x" 1 "esra" } } */
> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\.b\\\[1\\\]\\.f\\\[0\\\]\\.x" 1 "esra" } } */
> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\.b\\\[1\\\]\\.f\\\[1\\\]\\.x" 1 "esra" } } */
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index c4fea5b..77a2e49 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -328,6 +328,10 @@ candidate (unsigned uid)
>      those which cannot be (because they are and need be used as a whole).  */
>   static bitmap should_scalarize_away_bitmap, cannot_scalarize_away_bitmap;
>
> +/* Bitmap of candidates in the constant pool, which cannot be scalarized
> +   because this would produce non-constant expressions (e.g. Ada).  */
> +static bitmap disqualified_constants;
> +
>   /* Obstack for creation of fancy names.  */
>   static struct obstack name_obstack;
>
> @@ -652,6 +656,7 @@ sra_initialize (void)
>       (vec_safe_length (cfun->local_decls) / 2);
>     should_scalarize_away_bitmap = BITMAP_ALLOC (NULL);
>     cannot_scalarize_away_bitmap = BITMAP_ALLOC (NULL);
> +  disqualified_constants = BITMAP_ALLOC (NULL);
>     gcc_obstack_init (&name_obstack);
>     base_access_vec = new hash_map<tree, auto_vec<access_p> >;
>     memset (&sra_stats, 0, sizeof (sra_stats));
> @@ -670,6 +675,7 @@ sra_deinitialize (void)
>     candidates = NULL;
>     BITMAP_FREE (should_scalarize_away_bitmap);
>     BITMAP_FREE (cannot_scalarize_away_bitmap);
> +  BITMAP_FREE (disqualified_constants);
>     access_pool.release ();
>     assign_link_pool.release ();
>     obstack_free (&name_obstack, NULL);
> @@ -677,6 +683,13 @@ sra_deinitialize (void)
>     delete base_access_vec;
>   }
>
> +/* Return true if DECL is a VAR_DECL in the constant pool, false otherwise.  */
> +
> +static bool constant_decl_p (tree decl)
> +{
> +  return TREE_CODE (decl) == VAR_DECL && DECL_IN_CONSTANT_POOL (decl);
> +}
> +
>   /* Remove DECL from candidates for SRA and write REASON to the dump file if
>      there is one.  */
>   static void
> @@ -684,6 +697,8 @@ disqualify_candidate (tree decl, const char *reason)
>   {
>     if (bitmap_clear_bit (candidate_bitmap, DECL_UID (decl)))
>       candidates->remove_elt_with_hash (decl, DECL_UID (decl));
> +  if (constant_decl_p (decl))
> +    bitmap_set_bit (disqualified_constants, DECL_UID (decl));
>
>     if (dump_file && (dump_flags & TDF_DETAILS))
>       {
> @@ -835,8 +850,11 @@ create_access_1 (tree base, HOST_WIDE_INT offset, HOST_WIDE_INT size)
>     return access;
>   }
>
> +static bool maybe_add_sra_candidate (tree);
> +
>   /* Create and insert access for EXPR. Return created access, or NULL if it is
> -   not possible.  */
> +   not possible.  Also scan for uses of constant pool as we go along and add
> +   to candidates.  */
>
>   static struct access *
>   create_access (tree expr, gimple *stmt, bool write)
> @@ -859,6 +877,25 @@ create_access (tree expr, gimple *stmt, bool write)
>     else
>       ptr = false;
>
> +  /* For constant-pool entries, check we can substitute the constant value.  */
> +  if (constant_decl_p (base)
> +      && (sra_mode == SRA_MODE_EARLY_INTRA || sra_mode == SRA_MODE_INTRA))
> +    {
> +      gcc_assert (!bitmap_bit_p (disqualified_constants, DECL_UID (base)));
> +      if (expr != base
> +	  && !is_gimple_reg_type (TREE_TYPE (expr))
> +	  && dump_file && (dump_flags & TDF_DETAILS))
> +	{
> +	  /* This occurs in Ada with accesses to ARRAY_RANGE_REFs,
> +	     and elements of multidimensional arrays (which are
> +	     multi-element arrays in their own right).  */
> +	  fprintf (dump_file, "Allowing non-reg-type load of part"
> +			      " of constant-pool entry: ");
> +	  print_generic_expr (dump_file, expr, 0);
> +	}
> +      maybe_add_sra_candidate (base);
> +    }
> +
>     if (!DECL_P (base) || !bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
>       return NULL;
>
> @@ -918,6 +955,8 @@ static bool
>   scalarizable_type_p (tree type)
>   {
>     gcc_assert (!is_gimple_reg_type (type));
> +  if (type_contains_placeholder_p (type))
> +    return false;
>
>     switch (TREE_CODE (type))
>     {
> @@ -1852,7 +1891,10 @@ maybe_add_sra_candidate (tree var)
>         reject (var, "not aggregate");
>         return false;
>       }
> -  if (needs_to_live_in_memory (var))
> +  /* Allow constant-pool entries (that "need to live in memory")
> +     unless we are doing IPA SRA.  */
> +  if (needs_to_live_in_memory (var)
> +      && (sra_mode == SRA_MODE_EARLY_IPA || !constant_decl_p (var)))
>       {
>         reject (var, "needs to live in memory");
>         return false;
> @@ -3113,7 +3155,7 @@ load_assign_lhs_subreplacements (struct access *lacc,
>
>   	      if (racc && racc->grp_to_be_replaced)
>   		{
> -		  if (racc->grp_write)
> +		  if (racc->grp_write || constant_decl_p (racc->base))
>   		    drhs = get_access_replacement (racc);
>   		  else
>   		    drhs = NULL;
> @@ -3272,6 +3314,9 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
>     racc = get_access_for_expr (rhs);
>     if (!lacc && !racc)
>       return SRA_AM_NONE;
> +  /* Avoid modifying initializations of constant-pool replacements.  */
> +  if (racc && (racc->replacement_decl == lhs))
> +    return SRA_AM_NONE;
>
>     loc = gimple_location (stmt);
>     if (lacc && lacc->grp_to_be_replaced)
> @@ -3388,7 +3433,8 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
>         || contains_vce_or_bfcref_p (lhs)
>         || stmt_ends_bb_p (stmt))
>       {
> -      if (access_has_children_p (racc))
> +      /* No need to copy into a constant-pool, it comes pre-initialized.  */
> +      if (access_has_children_p (racc) && !constant_decl_p (racc->base))
>   	generate_subtree_copies (racc->first_child, rhs, racc->offset, 0, 0,
>   				 gsi, false, false, loc);
>         if (access_has_children_p (lacc))
> @@ -3491,6 +3537,54 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
>       }
>   }
>
> +/* Set any scalar replacements of values in the constant pool to the initial
> +   value of the constant.  (Constant-pool decls like *.LC0 have effectively
> +   been initialized before the program starts, we must do the same for their
> +   replacements.)  Thus, we output statements like 'SR.1 = *.LC0[0];' into
> +   the function's entry block.  */
> +
> +static void
> +initialize_constant_pool_replacements (void)
> +{
> +  gimple_seq seq = NULL;
> +  gimple_stmt_iterator gsi = gsi_start (seq);
> +  bitmap_iterator bi;
> +  unsigned i;
> +
> +  EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi)
> +    if (bitmap_bit_p (should_scalarize_away_bitmap, i)
> +	&& !bitmap_bit_p (cannot_scalarize_away_bitmap, i))
> +      {
> +	tree var = candidate (i);
> +	if (!constant_decl_p (var))
> +	  continue;
> +	vec<access_p> *access_vec = get_base_access_vector (var);
> +	if (!access_vec)
> +	  continue;
> +	for (unsigned i = 0; i < access_vec->length (); i++)
> +	  {
> +	    struct access *access = (*access_vec)[i];
> +	    if (!access->replacement_decl)
> +	      continue;
> +	    gassign *stmt = gimple_build_assign (
> +	      get_access_replacement (access), unshare_expr (access->expr));
> +	    if (dump_file && (dump_flags & TDF_DETAILS))
> +	      {
> +		fprintf (dump_file, "Generating constant initializer: ");
> +		print_gimple_stmt (dump_file, stmt, 0, 1);
> +		fprintf (dump_file, "\n");
> +	      }
> +	    gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
> +	    update_stmt (stmt);
> +	  }
> +      }
> +
> +  seq = gsi_seq (gsi);
> +  if (seq)
> +    gsi_insert_seq_on_edge_immediate (
> +      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
> +}
> +
>   /* Traverse the function body and all modifications as decided in
>      analyze_all_variable_accesses.  Return true iff the CFG has been
>      changed.  */
> @@ -3501,6 +3595,8 @@ sra_modify_function_body (void)
>     bool cfg_changed = false;
>     basic_block bb;
>
> +  initialize_constant_pool_replacements ();
> +
>     FOR_EACH_BB_FN (bb, cfun)
>       {
>         gimple_stmt_iterator gsi = gsi_start_bb (bb);
>
Richard Biener Jan. 18, 2016, 11:04 a.m. UTC | #2
On Fri, Jan 15, 2016 at 11:27 AM, Alan Lawrence
<alan.lawrence@foss.arm.com> wrote:
> On 24/12/15 11:53, Alan Lawrence wrote:
>>
>> Here's a new version that fixes the gcc.dg/guality/pr54970.c failures seen
>> on
>> aarch64 and powerpc64. Prior to SRA handling constant pool decls,
>> -fdump-tree-esra-details (at -O1 -g) had shown:
>>    <bb 2>:
>>    a = *.LC0;
>>    # DEBUG a$0 => MEM[(int[3] *)&*.LC0]
>>    a$4_3 = MEM[(int[3] *)&*.LC0 + 4B];
>>    # DEBUG a$4 => a$4_3
>>    a$8_4 = MEM[(int[3] *)&*.LC0 + 8B];
>>
>> The previous patch changed this to:
>>    <bb 2>:
>>    SR.5_3 = *.LC0[0];
>>    SR.6_4 = *.LC0[1];
>>    SR.7_19 = *.LC0[2];
>>    SR.8_21 = *.LC1[0];
>>    SR.9_22 = *.LC1[1];
>>    SR.10_23 = *.LC1[2];
>>    # DEBUG a$0 => NULL   // Note here
>>    a$4_24 = SR.6_4;
>>    # DEBUG a$4 => a$4_24
>>    a$8_25 = SR.7_19;
>>
>> Turns out the DEBUG a$0 => NULL was produced in
>> load_assign_lhs_subreplacements:
>>
>>           if (lacc && lacc->grp_to_be_debug_replaced)
>>             {
>>               gdebug *ds;
>>               tree drhs;
>>               struct access *racc = find_access_in_subtree (sad->top_racc,
>>                                                             offset,
>>                                                             lacc->size);
>>
>>               if (racc && racc->grp_to_be_replaced)
>>                 {
>>                   if (racc->grp_write)
>>                     drhs = get_access_replacement (racc);
>>                   else
>>                     drhs = NULL;  // <=== HERE
>>                 }
>> ...
>>               ds = gimple_build_debug_bind (get_access_replacement (lacc),
>>                                             drhs, gsi_stmt
>> (sad->old_gsi));
>>
>> Prior to the patch, we'd skipped around load_assign_lhs_subreplacements,
>> because
>> access_has_children_p(racc) (for racc = *.LC0) didn't hold in
>> sra_modify_assign.
>>
>> I also added a constant_decl_p function, combining the two checks, plus
>> some
>> testcase fixes.
>>
>> Bootstrapped + check-gcc,g++ on x86_64, ARM, AArch64,
>>    also on powerpc64{,le}-none-linux-gnu *in combination with the other
>> patches
>>    in the series* (I haven't tested the individual patches on PPC),
>>    plus Ada on ARM and x86_64.
>>
>> gcc/ChangeLog:
>>
>>         PR target/63679
>>         * tree-sra.c (disqualified_constants, constant_decl_p): New.
>>         (sra_initialize): Allocate disqualified_constants.
>>         (sra_deinitialize): Free disqualified_constants.
>>         (disqualify_candidate): Update disqualified_constants when
>> appropriate.
>>         (create_access): Scan for constant-pool entries as we go along.
>>         (scalarizable_type_p): Add check against
>> type_contains_placeholder_p.
>>         (maybe_add_sra_candidate): Allow constant-pool entries.
>>         (load_assign_lhs_subreplacements): Bind debug for constant pool
>> vars.
>>         (initialize_constant_pool_replacements): New.
>>         (sra_modify_assign): Avoid mangling assignments created by
>> previous,
>>         and don't generate writes into constant pool.
>>         (sra_modify_function_body): Call
>> initialize_constant_pool_replacements.
>>
>> gcc/testsuite/ChangeLog:
>>
>>         * gcc.dg/tree-ssa/sra-17.c: New.
>>         * gcc.dg/tree-ssa/sra-18.c: New.
>
>
> Ping.

Ok.

Thanks,
Richard.

> (The next bit is false, unless you force SRA to happen more widely, but all
> the above stands)
>
>> This also fixes a bunch of other guality tests on AArch64 that were
>> failing
>> prior to the patch series, and another bunch on PowerPC64 (bigendian
>> -m32), listed below.
>
>>
>>
>> Tests fixed on aarch64-none-linux-gnu:
>>
>> gcc.dg/guality/pr54970.c   -O1  line 15 *p == 3
>> gcc.dg/guality/pr54970.c   -O1  line 15 *q == 2
>> gcc.dg/guality/pr54970.c   -O1  line 20 *p == 13
>> gcc.dg/guality/pr54970.c   -O1  line 20 *q == 2
>> gcc.dg/guality/pr54970.c   -O1  line 25 *p == 13
>> gcc.dg/guality/pr54970.c   -O1  line 25 *q == 12
>> gcc.dg/guality/pr54970.c   -O1  line 31 *p == 6
>> gcc.dg/guality/pr54970.c   -O1  line 31 *q == 5
>> gcc.dg/guality/pr54970.c   -O1  line 36 *p == 26
>> gcc.dg/guality/pr54970.c   -O1  line 36 *q == 5
>> gcc.dg/guality/pr54970.c   -O1  line 45 *p == 26
>> gcc.dg/guality/pr54970.c   -O1  line 45 *q == 25
>> gcc.dg/guality/pr54970.c   -O1  line 45 p[-1] == 25
>> gcc.dg/guality/pr54970.c   -O1  line 45 p[-2] == 4
>> gcc.dg/guality/pr54970.c   -O1  line 45 q[-1] == 4
>> gcc.dg/guality/pr54970.c   -O1  line 45 q[1] == 26
>> gcc.dg/guality/pr56154-1.c   -O1  line pr56154-1.c:20 x.a == 6
>> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s1.f == 5.0
>> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s1.g == 6.0
>> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s2.f == 0.0
>> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s2.g == 6.0
>> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s1.f == 5.0
>> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s1.g == 6.0
>> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s2.f == 5.0
>> gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s2.g == 6.0
>> gcc.dg/guality/sra-1.c   -O1  line 21 a.j == 14
>> gcc.dg/guality/sra-1.c   -O1  line 32 a[1] == 14
>> gcc.dg/guality/sra-1.c   -O1  line 43 a.i == 4
>> gcc.dg/guality/sra-1.c   -O1  line 43 a.j == 14
>> (and other optimization levels)
>> On ppc64 bigendian, with the rest of the series (but I expect it's this
>> patch):
>> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 15 a[0] == 1
>> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 20 a[0] == 1
>> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 25 a[0] == 1
>> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 31 a[0] == 4
>> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 36 a[0] == 4
>> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 a[0] == 4
>> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 p[-2] == 4
>> unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 q[-1] == 4
>> (again, and other optimization levels).
>
>
> Cheers, Alan
>
>
>> ---
>>   gcc/testsuite/gcc.dg/tree-ssa/sra-17.c |  19 ++++++
>>   gcc/testsuite/gcc.dg/tree-ssa/sra-18.c |  28 +++++++++
>>   gcc/tree-sra.c                         | 104
>> +++++++++++++++++++++++++++++++--
>>   3 files changed, 147 insertions(+), 4 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
>>   create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
>> new file mode 100644
>> index 0000000..25667f4
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-*
>> powerpc*-*-* s390*-*-* } } } */
>> +/* { dg-options "-O2 -fdump-tree-esra --param
>> sra-max-scalarization-size-Ospeed=32" } */
>> +
>> +extern void abort (void);
>> +
>> +int
>> +main (int argc, char **argv)
>> +{
>> +  long a[4] = { 7, 19, 11, 255 };
>> +  int tot = 0;
>> +  for (int i = 0; i < 4; i++)
>> +    tot = (tot*256) + a[i];
>> +  if (tot == 0x07130bff)
>> +    return 0;
>> +  abort ();
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing load: a = \\\*.LC0;" 1
>> "esra" } } */
>> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\\[" 4
>> "esra" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
>> new file mode 100644
>> index 0000000..609fb11
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
>> @@ -0,0 +1,28 @@
>> +/* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-*
>> powerpc*-*-* s390*-*-* } } } */
>> +/* { dg-options "-O2 -fdump-tree-esra --param
>> sra-max-scalarization-size-Ospeed=32" } */
>> +
>> +extern void abort (void);
>> +struct foo { long x; };
>> +
>> +struct bar { struct foo f[2]; };
>> +
>> +struct baz { struct bar b[2]; };
>> +
>> +int
>> +main (int argc, char **argv)
>> +{
>> +  struct baz a = { { { { { 4 }, { 5 } } }, { { { 6 }, { 7 } } }  } };
>> +  int tot = 0;
>> +  for (int i = 0; i < 2; i++)
>> +    for (int j = 0; j < 2; j++)
>> +      tot = (tot*256) + a.b[i].f[j].x;
>> +  if (tot == 0x04050607)
>> +    return 0;
>> +  abort ();
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing load: a = \\\*.LC0;" 1
>> "esra" } } */
>> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ =
>> \\\*.LC0\\.b\\\[0\\\]\\.f\\\[0\\\]\\.x" 1 "esra" } } */
>> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ =
>> \\\*.LC0\\.b\\\[0\\\]\\.f\\\[1\\\]\\.x" 1 "esra" } } */
>> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ =
>> \\\*.LC0\\.b\\\[1\\\]\\.f\\\[0\\\]\\.x" 1 "esra" } } */
>> +/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ =
>> \\\*.LC0\\.b\\\[1\\\]\\.f\\\[1\\\]\\.x" 1 "esra" } } */
>> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
>> index c4fea5b..77a2e49 100644
>> --- a/gcc/tree-sra.c
>> +++ b/gcc/tree-sra.c
>> @@ -328,6 +328,10 @@ candidate (unsigned uid)
>>      those which cannot be (because they are and need be used as a whole).
>> */
>>   static bitmap should_scalarize_away_bitmap,
>> cannot_scalarize_away_bitmap;
>>
>> +/* Bitmap of candidates in the constant pool, which cannot be scalarized
>> +   because this would produce non-constant expressions (e.g. Ada).  */
>> +static bitmap disqualified_constants;
>> +
>>   /* Obstack for creation of fancy names.  */
>>   static struct obstack name_obstack;
>>
>> @@ -652,6 +656,7 @@ sra_initialize (void)
>>       (vec_safe_length (cfun->local_decls) / 2);
>>     should_scalarize_away_bitmap = BITMAP_ALLOC (NULL);
>>     cannot_scalarize_away_bitmap = BITMAP_ALLOC (NULL);
>> +  disqualified_constants = BITMAP_ALLOC (NULL);
>>     gcc_obstack_init (&name_obstack);
>>     base_access_vec = new hash_map<tree, auto_vec<access_p> >;
>>     memset (&sra_stats, 0, sizeof (sra_stats));
>> @@ -670,6 +675,7 @@ sra_deinitialize (void)
>>     candidates = NULL;
>>     BITMAP_FREE (should_scalarize_away_bitmap);
>>     BITMAP_FREE (cannot_scalarize_away_bitmap);
>> +  BITMAP_FREE (disqualified_constants);
>>     access_pool.release ();
>>     assign_link_pool.release ();
>>     obstack_free (&name_obstack, NULL);
>> @@ -677,6 +683,13 @@ sra_deinitialize (void)
>>     delete base_access_vec;
>>   }
>>
>> +/* Return true if DECL is a VAR_DECL in the constant pool, false
>> otherwise.  */
>> +
>> +static bool constant_decl_p (tree decl)
>> +{
>> +  return TREE_CODE (decl) == VAR_DECL && DECL_IN_CONSTANT_POOL (decl);
>> +}
>> +
>>   /* Remove DECL from candidates for SRA and write REASON to the dump file
>> if
>>      there is one.  */
>>   static void
>> @@ -684,6 +697,8 @@ disqualify_candidate (tree decl, const char *reason)
>>   {
>>     if (bitmap_clear_bit (candidate_bitmap, DECL_UID (decl)))
>>       candidates->remove_elt_with_hash (decl, DECL_UID (decl));
>> +  if (constant_decl_p (decl))
>> +    bitmap_set_bit (disqualified_constants, DECL_UID (decl));
>>
>>     if (dump_file && (dump_flags & TDF_DETAILS))
>>       {
>> @@ -835,8 +850,11 @@ create_access_1 (tree base, HOST_WIDE_INT offset,
>> HOST_WIDE_INT size)
>>     return access;
>>   }
>>
>> +static bool maybe_add_sra_candidate (tree);
>> +
>>   /* Create and insert access for EXPR. Return created access, or NULL if
>> it is
>> -   not possible.  */
>> +   not possible.  Also scan for uses of constant pool as we go along and
>> add
>> +   to candidates.  */
>>
>>   static struct access *
>>   create_access (tree expr, gimple *stmt, bool write)
>> @@ -859,6 +877,25 @@ create_access (tree expr, gimple *stmt, bool write)
>>     else
>>       ptr = false;
>>
>> +  /* For constant-pool entries, check we can substitute the constant
>> value.  */
>> +  if (constant_decl_p (base)
>> +      && (sra_mode == SRA_MODE_EARLY_INTRA || sra_mode ==
>> SRA_MODE_INTRA))
>> +    {
>> +      gcc_assert (!bitmap_bit_p (disqualified_constants, DECL_UID
>> (base)));
>> +      if (expr != base
>> +         && !is_gimple_reg_type (TREE_TYPE (expr))
>> +         && dump_file && (dump_flags & TDF_DETAILS))
>> +       {
>> +         /* This occurs in Ada with accesses to ARRAY_RANGE_REFs,
>> +            and elements of multidimensional arrays (which are
>> +            multi-element arrays in their own right).  */
>> +         fprintf (dump_file, "Allowing non-reg-type load of part"
>> +                             " of constant-pool entry: ");
>> +         print_generic_expr (dump_file, expr, 0);
>> +       }
>> +      maybe_add_sra_candidate (base);
>> +    }
>> +
>>     if (!DECL_P (base) || !bitmap_bit_p (candidate_bitmap, DECL_UID
>> (base)))
>>       return NULL;
>>
>> @@ -918,6 +955,8 @@ static bool
>>   scalarizable_type_p (tree type)
>>   {
>>     gcc_assert (!is_gimple_reg_type (type));
>> +  if (type_contains_placeholder_p (type))
>> +    return false;
>>
>>     switch (TREE_CODE (type))
>>     {
>> @@ -1852,7 +1891,10 @@ maybe_add_sra_candidate (tree var)
>>         reject (var, "not aggregate");
>>         return false;
>>       }
>> -  if (needs_to_live_in_memory (var))
>> +  /* Allow constant-pool entries (that "need to live in memory")
>> +     unless we are doing IPA SRA.  */
>> +  if (needs_to_live_in_memory (var)
>> +      && (sra_mode == SRA_MODE_EARLY_IPA || !constant_decl_p (var)))
>>       {
>>         reject (var, "needs to live in memory");
>>         return false;
>> @@ -3113,7 +3155,7 @@ load_assign_lhs_subreplacements (struct access
>> *lacc,
>>
>>               if (racc && racc->grp_to_be_replaced)
>>                 {
>> -                 if (racc->grp_write)
>> +                 if (racc->grp_write || constant_decl_p (racc->base))
>>                     drhs = get_access_replacement (racc);
>>                   else
>>                     drhs = NULL;
>> @@ -3272,6 +3314,9 @@ sra_modify_assign (gimple *stmt,
>> gimple_stmt_iterator *gsi)
>>     racc = get_access_for_expr (rhs);
>>     if (!lacc && !racc)
>>       return SRA_AM_NONE;
>> +  /* Avoid modifying initializations of constant-pool replacements.  */
>> +  if (racc && (racc->replacement_decl == lhs))
>> +    return SRA_AM_NONE;
>>
>>     loc = gimple_location (stmt);
>>     if (lacc && lacc->grp_to_be_replaced)
>> @@ -3388,7 +3433,8 @@ sra_modify_assign (gimple *stmt,
>> gimple_stmt_iterator *gsi)
>>         || contains_vce_or_bfcref_p (lhs)
>>         || stmt_ends_bb_p (stmt))
>>       {
>> -      if (access_has_children_p (racc))
>> +      /* No need to copy into a constant-pool, it comes pre-initialized.
>> */
>> +      if (access_has_children_p (racc) && !constant_decl_p (racc->base))
>>         generate_subtree_copies (racc->first_child, rhs, racc->offset, 0,
>> 0,
>>                                  gsi, false, false, loc);
>>         if (access_has_children_p (lacc))
>> @@ -3491,6 +3537,54 @@ sra_modify_assign (gimple *stmt,
>> gimple_stmt_iterator *gsi)
>>       }
>>   }
>>
>> +/* Set any scalar replacements of values in the constant pool to the
>> initial
>> +   value of the constant.  (Constant-pool decls like *.LC0 have
>> effectively
>> +   been initialized before the program starts, we must do the same for
>> their
>> +   replacements.)  Thus, we output statements like 'SR.1 = *.LC0[0];'
>> into
>> +   the function's entry block.  */
>> +
>> +static void
>> +initialize_constant_pool_replacements (void)
>> +{
>> +  gimple_seq seq = NULL;
>> +  gimple_stmt_iterator gsi = gsi_start (seq);
>> +  bitmap_iterator bi;
>> +  unsigned i;
>> +
>> +  EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi)
>> +    if (bitmap_bit_p (should_scalarize_away_bitmap, i)
>> +       && !bitmap_bit_p (cannot_scalarize_away_bitmap, i))
>> +      {
>> +       tree var = candidate (i);
>> +       if (!constant_decl_p (var))
>> +         continue;
>> +       vec<access_p> *access_vec = get_base_access_vector (var);
>> +       if (!access_vec)
>> +         continue;
>> +       for (unsigned i = 0; i < access_vec->length (); i++)
>> +         {
>> +           struct access *access = (*access_vec)[i];
>> +           if (!access->replacement_decl)
>> +             continue;
>> +           gassign *stmt = gimple_build_assign (
>> +             get_access_replacement (access), unshare_expr
>> (access->expr));
>> +           if (dump_file && (dump_flags & TDF_DETAILS))
>> +             {
>> +               fprintf (dump_file, "Generating constant initializer: ");
>> +               print_gimple_stmt (dump_file, stmt, 0, 1);
>> +               fprintf (dump_file, "\n");
>> +             }
>> +           gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
>> +           update_stmt (stmt);
>> +         }
>> +      }
>> +
>> +  seq = gsi_seq (gsi);
>> +  if (seq)
>> +    gsi_insert_seq_on_edge_immediate (
>> +      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
>> +}
>> +
>>   /* Traverse the function body and all modifications as decided in
>>      analyze_all_variable_accesses.  Return true iff the CFG has been
>>      changed.  */
>> @@ -3501,6 +3595,8 @@ sra_modify_function_body (void)
>>     bool cfg_changed = false;
>>     basic_block bb;
>>
>> +  initialize_constant_pool_replacements ();
>> +
>>     FOR_EACH_BB_FN (bb, cfun)
>>       {
>>         gimple_stmt_iterator gsi = gsi_start_bb (bb);
>>
>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
new file mode 100644
index 0000000..25667f4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
@@ -0,0 +1,19 @@ 
+/* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-* powerpc*-*-* s390*-*-* } } } */
+/* { dg-options "-O2 -fdump-tree-esra --param sra-max-scalarization-size-Ospeed=32" } */
+
+extern void abort (void);
+
+int
+main (int argc, char **argv)
+{
+  long a[4] = { 7, 19, 11, 255 };
+  int tot = 0;
+  for (int i = 0; i < 4; i++)
+    tot = (tot*256) + a[i];
+  if (tot == 0x07130bff)
+    return 0;
+  abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Removing load: a = \\\*.LC0;" 1 "esra" } } */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\\[" 4 "esra" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
new file mode 100644
index 0000000..609fb11
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
@@ -0,0 +1,28 @@ 
+/* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-* powerpc*-*-* s390*-*-* } } } */
+/* { dg-options "-O2 -fdump-tree-esra --param sra-max-scalarization-size-Ospeed=32" } */
+
+extern void abort (void);
+struct foo { long x; };
+
+struct bar { struct foo f[2]; };
+
+struct baz { struct bar b[2]; };
+
+int
+main (int argc, char **argv)
+{
+  struct baz a = { { { { { 4 }, { 5 } } }, { { { 6 }, { 7 } } }  } };
+  int tot = 0;
+  for (int i = 0; i < 2; i++)
+    for (int j = 0; j < 2; j++)
+      tot = (tot*256) + a.b[i].f[j].x;
+  if (tot == 0x04050607)
+    return 0;
+  abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Removing load: a = \\\*.LC0;" 1 "esra" } } */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\.b\\\[0\\\]\\.f\\\[0\\\]\\.x" 1 "esra" } } */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\.b\\\[0\\\]\\.f\\\[1\\\]\\.x" 1 "esra" } } */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\.b\\\[1\\\]\\.f\\\[0\\\]\\.x" 1 "esra" } } */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\.b\\\[1\\\]\\.f\\\[1\\\]\\.x" 1 "esra" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index c4fea5b..77a2e49 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -328,6 +328,10 @@  candidate (unsigned uid)
    those which cannot be (because they are and need be used as a whole).  */
 static bitmap should_scalarize_away_bitmap, cannot_scalarize_away_bitmap;
 
+/* Bitmap of candidates in the constant pool, which cannot be scalarized
+   because this would produce non-constant expressions (e.g. Ada).  */
+static bitmap disqualified_constants;
+
 /* Obstack for creation of fancy names.  */
 static struct obstack name_obstack;
 
@@ -652,6 +656,7 @@  sra_initialize (void)
     (vec_safe_length (cfun->local_decls) / 2);
   should_scalarize_away_bitmap = BITMAP_ALLOC (NULL);
   cannot_scalarize_away_bitmap = BITMAP_ALLOC (NULL);
+  disqualified_constants = BITMAP_ALLOC (NULL);
   gcc_obstack_init (&name_obstack);
   base_access_vec = new hash_map<tree, auto_vec<access_p> >;
   memset (&sra_stats, 0, sizeof (sra_stats));
@@ -670,6 +675,7 @@  sra_deinitialize (void)
   candidates = NULL;
   BITMAP_FREE (should_scalarize_away_bitmap);
   BITMAP_FREE (cannot_scalarize_away_bitmap);
+  BITMAP_FREE (disqualified_constants);
   access_pool.release ();
   assign_link_pool.release ();
   obstack_free (&name_obstack, NULL);
@@ -677,6 +683,13 @@  sra_deinitialize (void)
   delete base_access_vec;
 }
 
+/* Return true if DECL is a VAR_DECL in the constant pool, false otherwise.  */
+
+static bool constant_decl_p (tree decl)
+{
+  return TREE_CODE (decl) == VAR_DECL && DECL_IN_CONSTANT_POOL (decl);
+}
+
 /* Remove DECL from candidates for SRA and write REASON to the dump file if
    there is one.  */
 static void
@@ -684,6 +697,8 @@  disqualify_candidate (tree decl, const char *reason)
 {
   if (bitmap_clear_bit (candidate_bitmap, DECL_UID (decl)))
     candidates->remove_elt_with_hash (decl, DECL_UID (decl));
+  if (constant_decl_p (decl))
+    bitmap_set_bit (disqualified_constants, DECL_UID (decl));
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -835,8 +850,11 @@  create_access_1 (tree base, HOST_WIDE_INT offset, HOST_WIDE_INT size)
   return access;
 }
 
+static bool maybe_add_sra_candidate (tree);
+
 /* Create and insert access for EXPR. Return created access, or NULL if it is
-   not possible.  */
+   not possible.  Also scan for uses of constant pool as we go along and add
+   to candidates.  */
 
 static struct access *
 create_access (tree expr, gimple *stmt, bool write)
@@ -859,6 +877,25 @@  create_access (tree expr, gimple *stmt, bool write)
   else
     ptr = false;
 
+  /* For constant-pool entries, check we can substitute the constant value.  */
+  if (constant_decl_p (base)
+      && (sra_mode == SRA_MODE_EARLY_INTRA || sra_mode == SRA_MODE_INTRA))
+    {
+      gcc_assert (!bitmap_bit_p (disqualified_constants, DECL_UID (base)));
+      if (expr != base
+	  && !is_gimple_reg_type (TREE_TYPE (expr))
+	  && dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  /* This occurs in Ada with accesses to ARRAY_RANGE_REFs,
+	     and elements of multidimensional arrays (which are
+	     multi-element arrays in their own right).  */
+	  fprintf (dump_file, "Allowing non-reg-type load of part"
+			      " of constant-pool entry: ");
+	  print_generic_expr (dump_file, expr, 0);
+	}
+      maybe_add_sra_candidate (base);
+    }
+
   if (!DECL_P (base) || !bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
     return NULL;
 
@@ -918,6 +955,8 @@  static bool
 scalarizable_type_p (tree type)
 {
   gcc_assert (!is_gimple_reg_type (type));
+  if (type_contains_placeholder_p (type))
+    return false;
 
   switch (TREE_CODE (type))
   {
@@ -1852,7 +1891,10 @@  maybe_add_sra_candidate (tree var)
       reject (var, "not aggregate");
       return false;
     }
-  if (needs_to_live_in_memory (var))
+  /* Allow constant-pool entries (that "need to live in memory")
+     unless we are doing IPA SRA.  */
+  if (needs_to_live_in_memory (var)
+      && (sra_mode == SRA_MODE_EARLY_IPA || !constant_decl_p (var)))
     {
       reject (var, "needs to live in memory");
       return false;
@@ -3113,7 +3155,7 @@  load_assign_lhs_subreplacements (struct access *lacc,
 
 	      if (racc && racc->grp_to_be_replaced)
 		{
-		  if (racc->grp_write)
+		  if (racc->grp_write || constant_decl_p (racc->base))
 		    drhs = get_access_replacement (racc);
 		  else
 		    drhs = NULL;
@@ -3272,6 +3314,9 @@  sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
   racc = get_access_for_expr (rhs);
   if (!lacc && !racc)
     return SRA_AM_NONE;
+  /* Avoid modifying initializations of constant-pool replacements.  */
+  if (racc && (racc->replacement_decl == lhs))
+    return SRA_AM_NONE;
 
   loc = gimple_location (stmt);
   if (lacc && lacc->grp_to_be_replaced)
@@ -3388,7 +3433,8 @@  sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
       || contains_vce_or_bfcref_p (lhs)
       || stmt_ends_bb_p (stmt))
     {
-      if (access_has_children_p (racc))
+      /* No need to copy into a constant-pool, it comes pre-initialized.  */
+      if (access_has_children_p (racc) && !constant_decl_p (racc->base))
 	generate_subtree_copies (racc->first_child, rhs, racc->offset, 0, 0,
 				 gsi, false, false, loc);
       if (access_has_children_p (lacc))
@@ -3491,6 +3537,54 @@  sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
     }
 }
 
+/* Set any scalar replacements of values in the constant pool to the initial
+   value of the constant.  (Constant-pool decls like *.LC0 have effectively
+   been initialized before the program starts, we must do the same for their
+   replacements.)  Thus, we output statements like 'SR.1 = *.LC0[0];' into
+   the function's entry block.  */
+
+static void
+initialize_constant_pool_replacements (void)
+{
+  gimple_seq seq = NULL;
+  gimple_stmt_iterator gsi = gsi_start (seq);
+  bitmap_iterator bi;
+  unsigned i;
+
+  EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi)
+    if (bitmap_bit_p (should_scalarize_away_bitmap, i)
+	&& !bitmap_bit_p (cannot_scalarize_away_bitmap, i))
+      {
+	tree var = candidate (i);
+	if (!constant_decl_p (var))
+	  continue;
+	vec<access_p> *access_vec = get_base_access_vector (var);
+	if (!access_vec)
+	  continue;
+	for (unsigned i = 0; i < access_vec->length (); i++)
+	  {
+	    struct access *access = (*access_vec)[i];
+	    if (!access->replacement_decl)
+	      continue;
+	    gassign *stmt = gimple_build_assign (
+	      get_access_replacement (access), unshare_expr (access->expr));
+	    if (dump_file && (dump_flags & TDF_DETAILS))
+	      {
+		fprintf (dump_file, "Generating constant initializer: ");
+		print_gimple_stmt (dump_file, stmt, 0, 1);
+		fprintf (dump_file, "\n");
+	      }
+	    gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
+	    update_stmt (stmt);
+	  }
+      }
+
+  seq = gsi_seq (gsi);
+  if (seq)
+    gsi_insert_seq_on_edge_immediate (
+      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
+}
+
 /* Traverse the function body and all modifications as decided in
    analyze_all_variable_accesses.  Return true iff the CFG has been
    changed.  */
@@ -3501,6 +3595,8 @@  sra_modify_function_body (void)
   bool cfg_changed = false;
   basic_block bb;
 
+  initialize_constant_pool_replacements ();
+
   FOR_EACH_BB_FN (bb, cfun)
     {
       gimple_stmt_iterator gsi = gsi_start_bb (bb);