Patchwork PR middle-end/55401: uninstrument relevant path in TM clone

login
register
mail settings
Submitter Aldy Hernandez
Date Nov. 29, 2012, 1:05 a.m.
Message ID <50B6B4C5.9050402@redhat.com>
Download mbox | patch
Permalink /patch/202631/
State New
Headers show

Comments

Aldy Hernandez - Nov. 29, 2012, 1:05 a.m.
In this testcase:

__attribute__((transaction_callable))
void q1()
{
   ringo=666;
   __transaction_atomic {
       george=999;
   }
}

...the clone for q1() ends up with instrumented code on both pathways:

   <bb 6>:
   _12 = tm_state.5_11 & 2;
   if (_12 != 0)
     goto <bb 3>;
   else
     goto <bb 4>;

   <bb 3>:
   _13 = 999;
   __builtin__ITM_WU4 (&george, _13);
   __builtin__ITM_commitTransaction ();
   goto <bb 5>;

   <bb 4>:
   _15 = 999;
   __builtin__ITM_WU4 (&george, _15);
   __builtin__ITM_commitTransaction ();

The reason this happens is because collect_bb2reg() follows the 
uninstrumented edges while traversing a transaction.

In the attached patch, I added yet another flag (*sigh*) to 
get_tm_region_blocks() specifying whether we should skip or include the 
uninstrumented blocks while accumulating basic blocks.  To assuage the 
grief, I make the new argument optional.

With the attached path, we generate this:

   <bb 6>:
   _12 = tm_state.5_11 & 2;
   if (_12 != 0)
     goto <bb 3>;
   else
     goto <bb 4>;

   <bb 3>:
   george = 999;
   __builtin__ITM_commitTransaction ();
   goto <bb 5>;

   <bb 4>:
   _13 = 999;
   __builtin__ITM_WU4 (&george, _13);
   __builtin__ITM_commitTransaction ();

...respecting the uninstrumented flag.

BTW, collect_bb2reg() is also called from execute_tm_edges() (via 
get_bb_regions_instrumented), hence the additional callback data to 
collect_bb2reg().  From TMMARK we wish to exclude the instrumented path, 
but in TMEDGE we should include it, so we can properly add cancel edges 
from the uninstrumented code path.

[I could abstract get_bb_regions_instrumented into two versions, an 
instrumented one (as the name suggests) and an uninstrumented one. 
Perhaps this would be clearer].

Blah.

OK for trunk?
commit d0d101b53552336f6133cced41aff746319cd074
Author: Aldy Hernandez <aldyh@redhat.com>
Date:   Wed Nov 28 18:17:33 2012 -0600

    	PR middle-end/55401
    	* trans-mem.c (get_tm_region_blocks): Exclude uninstrumented
    	blocks from vector if requested.
    	(collect_bb2reg): Pass new argument to
    	get_tm_region_blocks.
    	(get_bb_regions_instrumented): Add INCLUDE_UNINSTRUMENTED_P
    	argument, and pass it to expand_regions.
    	(execute_tm_mark): Pass new argument to
    	get_bb_regions_instrumented.
    	(execute_tm_edges): Same.
Richard Henderson - Nov. 29, 2012, 6:41 p.m.
On 2012-11-28 17:05, Aldy Hernandez wrote:
> Author: Aldy Hernandez <aldyh@redhat.com>
> Date:   Wed Nov 28 18:17:33 2012 -0600
> 
>     	PR middle-end/55401
>     	* trans-mem.c (get_tm_region_blocks): Exclude uninstrumented
>     	blocks from vector if requested.
>     	(collect_bb2reg): Pass new argument to
>     	get_tm_region_blocks.
>     	(get_bb_regions_instrumented): Add INCLUDE_UNINSTRUMENTED_P
>     	argument, and pass it to expand_regions.
>     	(execute_tm_mark): Pass new argument to
>     	get_bb_regions_instrumented.
>     	(execute_tm_edges): Same.

Ok.


r~

Patch

diff --git a/gcc/testsuite/gcc.dg/tm/pr55401.c b/gcc/testsuite/gcc.dg/tm/pr55401.c
new file mode 100644
index 0000000..1ac7d97
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tm/pr55401.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm -O0 -fdump-tree-optimized" } */
+
+int george;
+int ringo;
+
+__attribute__((transaction_callable))
+void foo()
+{
+  ringo=666;
+  __transaction_atomic {
+      george=999;
+  }
+}
+
+/* There should only be 2 instrumented writes to GEORGE: one in FOO,
+   and one in the transactional clone to FOO.  There should NOT be
+   more than one instrumented write to GEORGE in the clone of
+   FOO.  */
+/* { dg-final { scan-tree-dump-times "ITM_WU\[0-9\] \\(&george," 2 "optimized" } } */
+
+/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/trans-mem.c b/gcc/trans-mem.c
index 0a428fe..8762ad3 100644
--- a/gcc/trans-mem.c
+++ b/gcc/trans-mem.c
@@ -2394,14 +2394,19 @@  expand_block_tm (struct tm_region *region, basic_block bb)
 /* Return the list of basic-blocks in REGION.
 
    STOP_AT_IRREVOCABLE_P is true if caller is uninterested in blocks
-   following a TM_IRREVOCABLE call.  */
+   following a TM_IRREVOCABLE call.
+
+   INCLUDE_UNINSTRUMENTED_P is TRUE if we should include the
+   uninstrumented code path blocks in the list of basic blocks
+   returned, false otherwise.  */
 
 static vec<basic_block> 
 get_tm_region_blocks (basic_block entry_block,
 		      bitmap exit_blocks,
 		      bitmap irr_blocks,
 		      bitmap all_region_blocks,
-		      bool stop_at_irrevocable_p)
+		      bool stop_at_irrevocable_p,
+		      bool include_uninstrumented_p = true)
 {
   vec<basic_block> bbs = vNULL;
   unsigned i;
@@ -2427,7 +2432,9 @@  get_tm_region_blocks (basic_block entry_block,
 	continue;
 
       FOR_EACH_EDGE (e, ei, bb->succs)
-	if (!bitmap_bit_p (visited_blocks, e->dest->index))
+	if ((include_uninstrumented_p
+	     || !(e->flags & EDGE_TM_UNINSTRUMENTED))
+	    && !bitmap_bit_p (visited_blocks, e->dest->index))
 	  {
 	    bitmap_set_bit (visited_blocks, e->dest->index);
 	    bbs.safe_push (e->dest);
@@ -2442,11 +2449,19 @@  get_tm_region_blocks (basic_block entry_block,
   return bbs;
 }
 
+// Callback data for collect_bb2reg.
+struct bb2reg_stuff
+{
+  vec<tm_region_p> *bb2reg;
+  bool include_uninstrumented_p;
+};
+
 // Callback for expand_regions, collect innermost region data for each bb.
 static void *
 collect_bb2reg (struct tm_region *region, void *data)
 {
-  vec<tm_region_p> *bb2reg = (vec<tm_region_p> *) data;
+  struct bb2reg_stuff *stuff = (struct bb2reg_stuff *)data;
+  vec<tm_region_p> *bb2reg = stuff->bb2reg;
   vec<basic_block> queue;
   unsigned int i;
   basic_block bb;
@@ -2455,7 +2470,8 @@  collect_bb2reg (struct tm_region *region, void *data)
 				region->exit_blocks,
 				region->irr_blocks,
 				NULL,
-				/*stop_at_irr_p=*/true);
+				/*stop_at_irr_p=*/true,
+				stuff->include_uninstrumented_p);
 
   // We expect expand_region to perform a post-order traversal of the region
   // tree.  Therefore the last region seen for any bb is the innermost.
@@ -2468,7 +2484,8 @@  collect_bb2reg (struct tm_region *region, void *data)
 
 // Returns a vector, indexed by BB->INDEX, of the innermost tm_region to
 // which a basic block belongs.  Note that we only consider the instrumented
-// code paths for the region; the uninstrumented code paths are ignored.
+// code paths for the region; the uninstrumented code paths are ignored if
+// INCLUDE_UNINSTRUMENTED_P is false.
 //
 // ??? This data is very similar to the bb_regions array that is collected
 // during tm_region_init.  Or, rather, this data is similar to what could
@@ -2489,14 +2506,18 @@  collect_bb2reg (struct tm_region *region, void *data)
 // only known instance of this block sharing.
 
 static vec<tm_region_p>
-get_bb_regions_instrumented (bool traverse_clones)
+get_bb_regions_instrumented (bool traverse_clones,
+			     bool include_uninstrumented_p)
 {
   unsigned n = last_basic_block;
+  struct bb2reg_stuff stuff;
   vec<tm_region_p> ret;
 
   ret.create (n);
   ret.safe_grow_cleared (n);
-  expand_regions (all_tm_regions, collect_bb2reg, &ret, traverse_clones);
+  stuff.bb2reg = &ret;
+  stuff.include_uninstrumented_p = include_uninstrumented_p;
+  expand_regions (all_tm_regions, collect_bb2reg, &stuff, traverse_clones);
 
   return ret;
 }
@@ -2830,7 +2851,8 @@  execute_tm_mark (void)
   tm_log_init ();
 
   vec<tm_region_p> bb_regions
-    = get_bb_regions_instrumented (/*traverse_clones=*/true);
+    = get_bb_regions_instrumented (/*traverse_clones=*/true,
+				   /*include_uninstrumented_p=*/false);
   struct tm_region *r;
   unsigned i;
 
@@ -3004,7 +3026,8 @@  static unsigned int
 execute_tm_edges (void)
 {
   vec<tm_region_p> bb_regions
-    = get_bb_regions_instrumented (/*traverse_clones=*/false);
+    = get_bb_regions_instrumented (/*traverse_clones=*/false,
+				   /*include_uninstrumented_p=*/true);
   struct tm_region *r;
   unsigned i;