Patchwork [trans-mem] do not go through the run-time for clone calls

login
register
mail settings
Submitter Aldy Hernandez
Date Feb. 21, 2011, 2:29 p.m.
Message ID <4D6276AF.9040709@redhat.com>
Download mbox | patch
Permalink /patch/83847/
State New
Headers show

Comments

Aldy Hernandez - Feb. 21, 2011, 2:29 p.m.
On 02/16/11 13:23, Richard Henderson wrote:
> On 02/16/2011 08:59 AM, Aldy Hernandez wrote:
>> And now for the actual optimization...
>>
>> There is no need to go through the run-time when calling a clone.  We can just call the clone directly.
>>
>> OK?
>
> I think this is a lot closer to correct.  In the process I found some bits
> that look... odd to say the least.
>

Funny how we're moving all the logic back into 
ipa_tm_transform_calls_redirect().  I should've left 
ipa_tm_transform_calls_1() alone :).

Thanks for looking into this.  I have tested your patch, and added a 
changelog entry.  Below is the patch I am committing.

Aldy
* trans-mem.c (ipa_tm_transform_calls_redirect): Optimize for
	recursive calls inside clones.
	Abstract the logic from...
	(ipa_tm_transform_calls_1): ...here.

Patch

Index: testsuite/gcc.dg/tm/20110216.c
===================================================================
--- testsuite/gcc.dg/tm/20110216.c	(revision 0)
+++ testsuite/gcc.dg/tm/20110216.c	(revision 0)
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm" } */
+
+int george;
+
+__attribute__((transaction_callable))
+void q1()
+{
+  __transaction [[atomic]] {
+      george=999;
+  }
+  q1();
+}
+
+/* { dg-final { scan-assembler-not "_ITM_getTMCloneOrIrrevocable" } } */
Index: trans-mem.c
===================================================================
--- trans-mem.c	(revision 170359)
+++ trans-mem.c	(working copy)
@@ -4294,7 +4294,28 @@  ipa_tm_transform_calls_redirect (struct 
   struct cgraph_edge *e = cgraph_edge (node, stmt);
   tree fndecl = gimple_call_fndecl (stmt);
 
-  /* If there is a replacement, use it, otherwise use the clone.  */
+  /* For indirect calls, pass the address through the runtime.  */
+  if (fndecl == NULL)
+    {
+      *need_ssa_rename_p |=
+	ipa_tm_insert_gettmclone_call (node, region, gsi, stmt);
+      return;
+    }
+
+  /* If the call is to the TM runtime, do nothing further.  */
+  if (flags_from_decl_or_type (fndecl) & ECF_TM_OPS)
+    return;
+
+  /* Fixup recursive calls inside clones.  */
+  /* ??? Why did cgraph_copy_node_for_versioning update the call edges 
+     for recursion but not update the call statements themselves?  */
+  if (e->caller == e->callee && DECL_IS_TM_CLONE (current_function_decl))
+    {
+      gimple_call_set_fndecl (stmt, current_function_decl);
+      return;
+    }
+
+  /* If there is a replacement, use it.  */
   fndecl = find_tm_replacement_function (fndecl);
   if (fndecl)
     {
@@ -4313,6 +4334,11 @@  ipa_tm_transform_calls_redirect (struct 
 
 	 We do need to mark these nodes so that we get the proper
 	 result in expand_call_tm.  */
+      /* ??? This seems broken.  How is it that we're marking the
+	 CALLEE as may_enter_irr?  Surely we should be marking the
+	 CALLER.  Also note that find_tm_replacement_function also
+	 contains mappings into the TM runtime, e.g. memcpy.  These
+	 we know won't go irrevocable.  */
       new_node->local.tm_may_enter_irr = 1;
     }
   else
@@ -4358,27 +4384,12 @@  ipa_tm_transform_calls_1 (struct cgraph_
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     {
       gimple stmt = gsi_stmt (gsi);
-      tree fndecl;
 
       if (!is_gimple_call (stmt))
 	continue;
       if (is_tm_pure_call (stmt))
 	continue;
 
-      fndecl = gimple_call_fndecl (stmt);
-
-      /* For indirect calls, pass the address through the runtime.  */
-      if (fndecl == NULL)
-	{
-	  need_ssa_rename |=
-	    ipa_tm_insert_gettmclone_call (node, region, &gsi, stmt);
-	  continue;
-	}
-
-      /* Don't scan past the end of the transaction.  */
-      if (is_tm_ending_fndecl (fndecl))
-	break;
-
       /* Redirect edges to the appropriate replacement or clone.  */
       ipa_tm_transform_calls_redirect (node, region, &gsi, &need_ssa_rename);
     }