From patchwork Thu Jul 29 17:43:17 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [trans-mem] optimize exception object memory Date: Thu, 29 Jul 2010 07:43:17 -0000 From: Aldy Hernandez X-Patchwork-Id: 60288 Message-Id: <20100729174317.GA2836@redhat.com> To: Richard Henderson Cc: gcc-patches@gcc.gnu.org > Wouldn't it be better to invent a new attribute that users could put > on their own functions as well? I'm not sure what the best name might > be, but perhaps just "thread_local"? I opted to use the "thread_local" attribute, instead of some transactional amalgamation since perhaps we can optimize other (non-transactional) code throughout the compiler with this attribute (once we merge). Also, once we can determine thread-localness, we can calculate transaction-localness as well. See patch. > Bonus points if we can figure out a way to apply this idea to > function arguments, as opposed to just function returns as above. I can do that in a follow up patch. In the meantime, how about this? ----------------------------- libitm/ * libitm.h (THROW): Define. Add attributes to _ITM_cxa_allocate_exception and _ITM_cxa_begin_catch. * eh_cpp.cc (_ITM_cxa_throw): Fix prototype argument. (_ITM_cxa_throw) Fix argument type. gcc/ * trans-mem.c (thread_private_new_memory): Handle `thread_local' attribute. (is_thread_local): New. * c-common.c (handle_thread_local_attribute): New. Add `thread_local' entry to c_common_att. * doc/extend.texi: Add documentation for `thread_local' attribute. Index: libitm/libitm.h =================================================================== --- libitm/libitm.h (revision 162234) +++ libitm/libitm.h (working copy) @@ -33,7 +33,10 @@ #include #ifdef __cplusplus +#define THROW throw() extern "C" { +#else +#define THROW #endif #ifdef __i386__ @@ -278,9 +281,9 @@ extern void *_ITM_getTMCloneSafe (void * extern void _ITM_registerTMCloneTable (void *, size_t); extern void _ITM_deregisterTMCloneTable (void *); -extern void *_ITM_cxa_allocate_exception (size_t); -extern void _ITM_cxa_throw (void *obj, void *tinfo, void *dest); -extern void *_ITM_cxa_begin_catch (void *exc_ptr); +extern void *_ITM_cxa_allocate_exception (size_t) THROW __attribute__((malloc, thread_local)); +extern void _ITM_cxa_throw (void *obj, void *tinfo, void (*)(void*)); +extern void *_ITM_cxa_begin_catch (void *exc_ptr) __attribute__((thread_local)); extern void _ITM_cxa_end_catch (void); extern void _ITM_commitTransactionEH(void *exc_ptr) ITM_REGPARM; Index: libitm/eh_cpp.cc =================================================================== --- libitm/eh_cpp.cc (revision 162234) +++ libitm/eh_cpp.cc (working copy) @@ -34,7 +34,7 @@ using namespace GTM; extern "C" { extern void *__cxa_allocate_exception (size_t) WEAK; -extern void __cxa_throw (void *, void *, void *) WEAK; +extern void __cxa_throw (void *, void *, void (*) (void*)) WEAK; extern void *__cxa_begin_catch (void *) WEAK; extern void *__cxa_end_catch (void) WEAK; extern void __cxa_tm_cleanup (void *, void *, unsigned int) WEAK; @@ -51,7 +51,7 @@ _ITM_cxa_allocate_exception (size_t size } void -_ITM_cxa_throw (void *obj, void *tinfo, void *dest) +_ITM_cxa_throw (void *obj, void *tinfo, void (*dest)(void*)) { gtm_tx()->cxa_unthrown = NULL; __cxa_throw (obj, tinfo, dest); Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi (revision 161318) +++ gcc/doc/extend.texi (working copy) @@ -1916,6 +1916,7 @@ attributes are currently defined for fun @code{destructor}, @code{used}, @code{unused}, @code{deprecated}, @code{weak}, @code{malloc}, @code{alias}, @code{warn_unused_result}, @code{nonnull}, @code{gnu_inline}, @code{externally_visible}, +@code{thread_local}, @code{hot}, @code{cold}, @code{artificial}, @code{error} and @code{warning}. Several other attributes are defined for functions on particular target systems. Other attributes, including @code{section} @@ -2572,6 +2573,19 @@ void __attribute__ ((interrupt, use_shad use_debug_exception_return)) v7 (); @end smallexample +@item thread_local +@cindex @code{thread_local} attribute. +This attribute, attached to a function definition, specifies that the +memory returned by this function lives entirely within the current +thread and is not visible to any other threads. This attribute is +only applicable to functions returning pointers. + +If this attribute is present, the compiler is able to optimize certain +memory accesses that are known to only affect the current thread. + +Currently, GCC only uses this attribute for transactional memory +code (available with @option{-fgnu-tm}). + @item interrupt_handler @cindex interrupt handler functions on the Blackfin, m68k, H8/300 and SH processors Use this attribute on the Blackfin, m68k, H8/300, H8/300H, H8S, and SH to Index: gcc/testsuite/c-c++-common/tm/thread-local.c =================================================================== --- gcc/testsuite/c-c++-common/tm/thread-local.c (revision 0) +++ gcc/testsuite/c-c++-common/tm/thread-local.c (revision 0) @@ -0,0 +1,17 @@ +// { dg-do compile } +// { dg-options "-fgnu-tm -O -fdump-tree-optimized" } + +/* Test that `thread_local' attribute inhibits memory instrumentation. */ + +extern char *funky (void) __attribute__((transaction_pure, thread_local)); + +void tranfunc() { + __transaction { + char *p; + p = funky(); + *p = 5; /* This should not be instrumented. */ + } +} + +// { dg-final { scan-tree-dump-times "_ITM_WU" 0 "optimized" } } +// { dg-final { cleanup-tree-dump "optimized" } } Index: gcc/trans-mem.c =================================================================== --- gcc/trans-mem.c (revision 162577) +++ gcc/trans-mem.c (working copy) @@ -222,6 +222,18 @@ is_tm_safe (tree x) return false; } +/* Return true if X has been marked `thread_local'. */ + +static bool +is_thread_local (tree x) +{ + tree attrs = get_attrs_for (x); + + if (attrs) + return lookup_attribute ("thread_local", attrs) != NULL; + return false; +} + /* Return true if CALL is const, or tm_pure. */ static bool @@ -1258,6 +1270,7 @@ thread_private_new_memory (basic_block e tm_new_mem_map_t elt, *elt_p; tree val = x; enum thread_memory_type retval = mem_transaction_local; + bool escapes_function_p = false; if (!entry_block || TREE_CODE (x) != SSA_NAME @@ -1283,17 +1296,14 @@ thread_private_new_memory (basic_block e do { if (ptr_deref_may_alias_global_p (x)) - { - /* Address escapes. This is not thread-private. */ - retval = mem_non_local; - goto new_memory_ret; - } + escapes_function_p = true; stmt = SSA_NAME_DEF_STMT (x); /* If the malloc call is outside the transaction, this is thread-local. */ if (retval != mem_thread_local + && gimple_code (stmt) != GIMPLE_NOP && !dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt), entry_block)) retval = mem_thread_local; @@ -1357,6 +1367,34 @@ thread_private_new_memory (basic_block e retval = mem_non_local; new_memory_ret: + if (escapes_function_p) + { + if (is_gimple_call (stmt)) + { + tree fn = gimple_call_fn (stmt); + + /* If the address has been declared thread-local, believe it. */ + if (is_thread_local (TREE_TYPE (fn))) + { + /* If the thread-local call is inside the transaction, + the memory is transaction local. */ + if (dominated_by_p (CDI_DOMINATORS, + gimple_bb (stmt), entry_block)) + retval = mem_transaction_local; + else + retval = mem_thread_local; + } + else + { + /* Otherwise, we have to assume that any address that + escapes the function also escapes the thread. */ + retval = mem_non_local; + } + } + else + retval = mem_non_local; + } + elt_p->local_new_memory = retval; return retval; } Index: gcc/c-common.c =================================================================== --- gcc/c-common.c (revision 161318) +++ gcc/c-common.c (working copy) @@ -517,6 +517,7 @@ static tree handle_no_limit_stack_attrib static tree handle_pure_attribute (tree *, tree, tree, int, bool *); static tree handle_tm_attribute (tree *, tree, tree, int, bool *); static tree handle_tm_wrap_attribute (tree *, tree, tree, int, bool *); +static tree handle_thread_local_attribute (tree *, tree, tree, int, bool *); static tree handle_novops_attribute (tree *, tree, tree, int, bool *); static tree handle_deprecated_attribute (tree *, tree, tree, int, bool *); @@ -807,6 +808,8 @@ const struct attribute_spec c_common_att handle_tm_attribute }, { "transaction_wrap", 1, 1, true, false, false, handle_tm_wrap_attribute }, + { "thread_local", 0, 0, false, true, false, + handle_thread_local_attribute }, /* For internal use (marking of builtins) only. The name contains space to prevent its usage in source code. */ { "no vops", 0, 0, true, false, false, @@ -7205,6 +7208,30 @@ handle_pure_attribute (tree *node, tree return NULL_TREE; } +/* Handle a "thread_local" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +handle_thread_local_attribute (tree *node, tree name, tree ARG_UNUSED (args), + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + if (TREE_CODE (*node) == FUNCTION_TYPE + && TREE_CODE (TREE_TYPE (*node)) == POINTER_TYPE) + { + /* Do nothing. No sense wasting a bit to store this + information. We can just get at the attribute string + directly later. */ + ; + } + else + { + warning (OPT_Wattributes, "%qE attribute ignored", name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + /* Digest an attribute list destined for a transactional memory statement. ALLOWED is the set of attributes that are allowed for this statement; return the attribute we parsed. Multiple attributes are never allowed. */