Patchwork Improve out-of-SSA coalescing

login
register
mail settings
Submitter Richard Guenther
Date Sept. 25, 2013, 11:30 a.m.
Message ID <alpine.LNX.2.00.1309251325110.29411@zhemvz.fhfr.qr>
Download mbox | patch
Permalink /patch/277813/
State New
Headers show

Comments

Richard Guenther - Sept. 25, 2013, 11:30 a.m.
This loosens the restriction of only coalescing SSA names with
the same base variable by ignoring that restriction for DECL_INGORED_P
base variables (ok, all of them can and should be anonymous SSA names
now, but code obviously hasn't catched up 100%).

This improves the code generated for the loop in the testcase to

        <fallthru>
        .p2align 4,,10
        .p2align 3
.L4:
        xorps   %xmm1, %xmm1
        cvtsi2ss        %eax, %xmm1
        addl    $1, %eax
        cmpl    %edi, %eax
        addss   %xmm1, %xmm0
        jne     .L4

from

        jmp     .L4
        .p2align 4,,10
        .p2align 3
.L6:
        movaps  %xmm0, %xmm1
.L4:
        xorps   %xmm0, %xmm0
        cvtsi2ss        %eax, %xmm0
        addl    $1, %eax
        cmpl    %edi, %eax
        addss   %xmm1, %xmm0
        jne     .L6

avoiding the copy on the backedge and the loop entry jump.  Overall
this is similar to what Jeff was after with his latest adjustment
of this code.

Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.

Richard.

2013-09-25  Richard Biener  <rguenther@suse.de>

	* tree-ssa-live.c (var_map_base_init): Handle SSA names with
	DECL_IGNORED_P base variables like anonymous SSA names.
	(loe_visit_block): Use gcc_checking_assert.
	* tree-ssa-coalesce.c (create_outofssa_var_map): Use
	gimple_assign_ssa_name_copy_p.
	(gimple_can_coalesce_p): Adjust according to the var_map_base_init
	change.

	* gcc.dg/tree-ssa/coalesce-2.c: New testcase.

Index: gcc/tree-ssa-live.c
===================================================================
*** gcc/tree-ssa-live.c	(revision 202883)
--- gcc/tree-ssa-live.c	(working copy)
*************** var_map_base_init (var_map map)
*** 104,110 ****
        struct tree_int_map **slot;
        unsigned baseindex;
        var = partition_to_var (map, x);
!       if (SSA_NAME_VAR (var))
  	m->base.from = SSA_NAME_VAR (var);
        else
  	/* This restricts what anonymous SSA names we can coalesce
--- 104,111 ----
        struct tree_int_map **slot;
        unsigned baseindex;
        var = partition_to_var (map, x);
!       if (SSA_NAME_VAR (var)
! 	  && !DECL_IGNORED_P (SSA_NAME_VAR (var)))
  	m->base.from = SSA_NAME_VAR (var);
        else
  	/* This restricts what anonymous SSA names we can coalesce
*************** loe_visit_block (tree_live_info_p live,
*** 990,998 ****
    edge_iterator ei;
    basic_block pred_bb;
    bitmap loe;
-   gcc_assert (!bitmap_bit_p (visited, bb->index));
  
    bitmap_set_bit (visited, bb->index);
    loe = live_on_entry (live, bb);
  
    FOR_EACH_EDGE (e, ei, bb->preds)
--- 993,1002 ----
    edge_iterator ei;
    basic_block pred_bb;
    bitmap loe;
  
+   gcc_checking_assert (!bitmap_bit_p (visited, bb->index));
    bitmap_set_bit (visited, bb->index);
+ 
    loe = live_on_entry (live, bb);
  
    FOR_EACH_EDGE (e, ei, bb->preds)
Index: gcc/tree-ssa-coalesce.c
===================================================================
*** gcc/tree-ssa-coalesce.c	(revision 202883)
--- gcc/tree-ssa-coalesce.c	(working copy)
*************** create_outofssa_var_map (coalesce_list_p
*** 980,989 ****
  	      {
  		tree lhs = gimple_assign_lhs (stmt);
  		tree rhs1 = gimple_assign_rhs1 (stmt);
! 
! 		if (gimple_assign_copy_p (stmt)
!                     && TREE_CODE (lhs) == SSA_NAME
! 		    && TREE_CODE (rhs1) == SSA_NAME
  		    && gimple_can_coalesce_p (lhs, rhs1))
  		  {
  		    v1 = SSA_NAME_VERSION (lhs);
--- 982,988 ----
  	      {
  		tree lhs = gimple_assign_lhs (stmt);
  		tree rhs1 = gimple_assign_rhs1 (stmt);
! 		if (gimple_assign_ssa_name_copy_p (stmt)
  		    && gimple_can_coalesce_p (lhs, rhs1))
  		  {
  		    v1 = SSA_NAME_VERSION (lhs);
*************** gimple_can_coalesce_p (tree name1, tree
*** 1347,1353 ****
  {
    /* First check the SSA_NAME's associated DECL.  We only want to
       coalesce if they have the same DECL or both have no associated DECL.  */
!   if (SSA_NAME_VAR (name1) != SSA_NAME_VAR (name2))
      return false;
  
    /* Now check the types.  If the types are the same, then we should
--- 1346,1356 ----
  {
    /* First check the SSA_NAME's associated DECL.  We only want to
       coalesce if they have the same DECL or both have no associated DECL.  */
!   tree var1 = SSA_NAME_VAR (name1);
!   tree var2 = SSA_NAME_VAR (name2);
!   var1 = (var1 && !DECL_IGNORED_P (var1)) ? var1 : NULL_TREE;
!   var2 = (var2 && !DECL_IGNORED_P (var2)) ? var2 : NULL_TREE;
!   if (var1 != var2)
      return false;
  
    /* Now check the types.  If the types are the same, then we should
Richard Guenther - Sept. 25, 2013, 12:57 p.m.
On Wed, 25 Sep 2013, Richard Biener wrote:

> 
> This loosens the restriction of only coalescing SSA names with
> the same base variable by ignoring that restriction for DECL_INGORED_P
> base variables (ok, all of them can and should be anonymous SSA names
> now, but code obviously hasn't catched up 100%).
> 
> This improves the code generated for the loop in the testcase to
> 
>         <fallthru>
>         .p2align 4,,10
>         .p2align 3
> .L4:
>         xorps   %xmm1, %xmm1
>         cvtsi2ss        %eax, %xmm1
>         addl    $1, %eax
>         cmpl    %edi, %eax
>         addss   %xmm1, %xmm0
>         jne     .L4
> 
> from
> 
>         jmp     .L4
>         .p2align 4,,10
>         .p2align 3
> .L6:
>         movaps  %xmm0, %xmm1
> .L4:
>         xorps   %xmm0, %xmm0
>         cvtsi2ss        %eax, %xmm0
>         addl    $1, %eax
>         cmpl    %edi, %eax
>         addss   %xmm1, %xmm0
>         jne     .L6
> 
> avoiding the copy on the backedge and the loop entry jump.  Overall
> this is similar to what Jeff was after with his latest adjustment
> of this code.
> 
> Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.

For some reason it miscompiles GCC itself.  Hmm.  Cannot spot the
obvious error yet.

Richard.
Richard Guenther - Sept. 26, 2013, 11:22 a.m.
On Wed, 25 Sep 2013, Steven Bosscher wrote:

> On Wednesday, September 25, 2013, Richard Biener <rguenther@suse.de> wrote:
> > On Wed, 25 Sep 2013, Richard Biener wrote:
> >
> >>
> >> This loosens the restriction of only coalescing SSA names with
> >> the same base variable by ignoring that restriction for DECL_INGORED_P
> >> base variables (ok, all of them can and should be anonymous SSA names
> >> now, but code obviously hasn't catched up 100%).
> >>
> >> This improves the code generated for the loop in the testcase to
> >>
> >>         <fallthru>
> >>         .p2align 4,,10
> >>         .p2align 3
> >> .L4:
> >>         xorps   %xmm1, %xmm1
> >>         cvtsi2ss        %eax, %xmm1
> >>         addl    $1, %eax
> >>         cmpl    %edi, %eax
> >>         addss   %xmm1, %xmm0
> >>         jne     .L4
> >>
> >> from
> >>
> >>         jmp     .L4
> >>         .p2align 4,,10
> >>         .p2align 3
> >> .L6:
> >>         movaps  %xmm0, %xmm1
> >> .L4:
> >>         xorps   %xmm0, %xmm0
> >>         cvtsi2ss        %eax, %xmm0
> >>         addl    $1, %eax
> >>         cmpl    %edi, %eax
> >>         addss   %xmm1, %xmm0
> >>         jne     .L6
> >>
> >> avoiding the copy on the backedge and the loop entry jump.  Overall
> >> this is similar to what Jeff was after with his latest adjustment
> >> of this code.
> >>
> >> Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.
> >
> > For some reason it miscompiles GCC itself.  Hmm.  Cannot spot the
> > obvious error yet.
> >
> Try reverting the gcc_assert change. With the checking_assert there will be
> different code for checking enabled or disabled.

Nah, it was us coalescing PARM_DECLs and VAR_DECLs (and RESULT_DECLs).
Fixed by restricting this handling to VAR_DECLs only.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2013-09-25  Richard Biener  <rguenther@suse.de>

	* tree-ssa-live.c (var_map_base_init): Handle SSA names with
	DECL_IGNORED_P base VAR_DECLs like anonymous SSA names.
	(loe_visit_block): Use gcc_checking_assert.
	* tree-ssa-coalesce.c (create_outofssa_var_map): Use
	gimple_assign_ssa_name_copy_p.
	(gimple_can_coalesce_p): Adjust according to the var_map_base_init
	change.

	* gcc.dg/tree-ssa/coalesce-2.c: New testcase.

Index: gcc/tree-ssa-live.c
===================================================================
*** gcc/tree-ssa-live.c.orig	2013-09-26 11:50:32.000000000 +0200
--- gcc/tree-ssa-live.c	2013-09-26 12:11:49.412951758 +0200
*************** var_map_base_init (var_map map)
*** 104,110 ****
        struct tree_int_map **slot;
        unsigned baseindex;
        var = partition_to_var (map, x);
!       if (SSA_NAME_VAR (var))
  	m->base.from = SSA_NAME_VAR (var);
        else
  	/* This restricts what anonymous SSA names we can coalesce
--- 104,112 ----
        struct tree_int_map **slot;
        unsigned baseindex;
        var = partition_to_var (map, x);
!       if (SSA_NAME_VAR (var)
! 	  && (!VAR_P (SSA_NAME_VAR (var))
! 	      || !DECL_IGNORED_P (SSA_NAME_VAR (var))))
  	m->base.from = SSA_NAME_VAR (var);
        else
  	/* This restricts what anonymous SSA names we can coalesce
*************** loe_visit_block (tree_live_info_p live,
*** 992,1000 ****
    edge_iterator ei;
    basic_block pred_bb;
    bitmap loe;
-   gcc_assert (!bitmap_bit_p (visited, bb->index));
  
    bitmap_set_bit (visited, bb->index);
    loe = live_on_entry (live, bb);
  
    FOR_EACH_EDGE (e, ei, bb->preds)
--- 994,1003 ----
    edge_iterator ei;
    basic_block pred_bb;
    bitmap loe;
  
+   gcc_checking_assert (!bitmap_bit_p (visited, bb->index));
    bitmap_set_bit (visited, bb->index);
+ 
    loe = live_on_entry (live, bb);
  
    FOR_EACH_EDGE (e, ei, bb->preds)
Index: gcc/tree-ssa-coalesce.c
===================================================================
*** gcc/tree-ssa-coalesce.c.orig	2013-09-26 11:50:32.000000000 +0200
--- gcc/tree-ssa-coalesce.c	2013-09-26 13:12:48.848382555 +0200
*************** create_outofssa_var_map (coalesce_list_p
*** 982,991 ****
  	      {
  		tree lhs = gimple_assign_lhs (stmt);
  		tree rhs1 = gimple_assign_rhs1 (stmt);
! 
! 		if (gimple_assign_copy_p (stmt)
!                     && TREE_CODE (lhs) == SSA_NAME
! 		    && TREE_CODE (rhs1) == SSA_NAME
  		    && gimple_can_coalesce_p (lhs, rhs1))
  		  {
  		    v1 = SSA_NAME_VERSION (lhs);
--- 982,988 ----
  	      {
  		tree lhs = gimple_assign_lhs (stmt);
  		tree rhs1 = gimple_assign_rhs1 (stmt);
! 		if (gimple_assign_ssa_name_copy_p (stmt)
  		    && gimple_can_coalesce_p (lhs, rhs1))
  		  {
  		    v1 = SSA_NAME_VERSION (lhs);
*************** gimple_can_coalesce_p (tree name1, tree
*** 1349,1355 ****
  {
    /* First check the SSA_NAME's associated DECL.  We only want to
       coalesce if they have the same DECL or both have no associated DECL.  */
!   if (SSA_NAME_VAR (name1) != SSA_NAME_VAR (name2))
      return false;
  
    /* Now check the types.  If the types are the same, then we should
--- 1346,1356 ----
  {
    /* First check the SSA_NAME's associated DECL.  We only want to
       coalesce if they have the same DECL or both have no associated DECL.  */
!   tree var1 = SSA_NAME_VAR (name1);
!   tree var2 = SSA_NAME_VAR (name2);
!   var1 = (var1 && (!VAR_P (var1) || !DECL_IGNORED_P (var1))) ? var1 : NULL_TREE;
!   var2 = (var2 && (!VAR_P (var2) || !DECL_IGNORED_P (var2))) ? var2 : NULL_TREE;
!   if (var1 != var2)
      return false;
  
    /* Now check the types.  If the types are the same, then we should
Index: gcc/testsuite/gcc.dg/tree-ssa/coalesce-2.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/coalesce-2.c	2013-09-26 11:54:38.847720838 +0200
***************
*** 0 ****
--- 1,16 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-rtl-expand-details" } */
+ 
+ float total = 0.2;
+ void foo(int n)
+ {
+   int i;
+   for (i = 0; i < n; i++)
+     total += i;
+ }
+ 
+ /* Verify that out-of-ssa coalescing did its job by verifying there are not
+    any partition copies inserted.  */
+ 
+ /* { dg-final { scan-rtl-dump-not "partition copy" "expand"} } */
+ /* { dg-final { cleanup-rtl-dump "expand" } } */

Patch

Index: gcc/testsuite/gcc.dg/tree-ssa/coalesce-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/coalesce-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/coalesce-2.c	(working copy)
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand-details" } */
+
+float total = 0.2;
+void foo(int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    total += i;
+}
+
+/* Verify that out-of-ssa coalescing did its job by verifying there are not
+   any partition copies inserted.  */
+
+/* { dg-final { scan-rtl-dump-not "partition copy" "expand"} } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */