diff mbox

[gomp4] Add tables generation

Message ID 532B1C45.9020308@codesourcery.com
State New
Headers show

Commit Message

Bernd Schmidt March 20, 2014, 4:50 p.m. UTC
This is based on Michael Zolotukhin's patch 2/3 from a while ago. It 
adds functionality to build function/variable tables that will allow 
libgomp to look up offload target code based on the address of the 
corresponding host function. There are two alternatives, one based on 
named sections, and one based on a target hook when named sections are 
unavailable (as on ptx).

Committed on gomp-4_0-branch.


Bernd

Comments

Jakub Jelinek March 20, 2014, 6:56 p.m. UTC | #1
On Thu, Mar 20, 2014 at 05:50:13PM +0100, Bernd Schmidt wrote:
> --- libgcc/crtstuff.c	(revision 208706)
> +++ libgcc/crtstuff.c	(working copy)
> @@ -311,6 +311,15 @@ register_tm_clones (void)
>  }
>  #endif /* USE_TM_CLONE_REGISTRY */
>  
> +#if defined(HAVE_GAS_HIDDEN) && defined(ENABLE_OFFLOADING)
> +void *_omp_func_table[0]
> +  __attribute__ ((__used__, visibility ("protected"),
> +		  section (".offload_func_table_section"))) = { };
> +void *_omp_var_table[0]
> +  __attribute__ ((__used__, visibility ("protected"),
> +		  section (".offload_var_table_section"))) = { };
> +#endif
> +
>  #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP)
>  
>  #ifdef OBJECT_FORMAT_ELF
> @@ -752,6 +761,23 @@ __do_global_ctors (void)
>  #error "What are you doing with crtstuff.c, then?"
>  #endif
>  
> +#if defined(HAVE_GAS_HIDDEN) && defined(ENABLE_OFFLOADING)
> +void *_omp_funcs_end[0]
> +  __attribute__ ((__used__, visibility ("protected"),
> +		  section (".offload_func_table_section"))) = { };
> +void *_omp_vars_end[0]
> +  __attribute__ ((__used__, visibility ("protected"),
> +		  section (".offload_var_table_section"))) = { };
> +extern void *_omp_func_table[];
> +extern void *_omp_var_table[];
> +void *__OPENMP_TARGET__[] __attribute__ ((__visibility__ ("protected"))) =
> +{
> +  &_omp_func_table, &_omp_funcs_end,
> +  &_omp_var_table, &_omp_vars_end
> +};
> +#endif
> +
> +
>  #else /* ! CRT_BEGIN && ! CRT_END */
>  #error "One of CRT_BEGIN or CRT_END must be defined."
>  #endif

I don't like these libgcc changes at all.

First of all, we should find a way which has no runtime costs for at least
programs not compiled with -fopenmp/-fopenacc at all, preferrably no runtime
cost for any program or shared library that actually doesn't contain any
offloading code.  The above costs every single binary/shared library 5
exported symbols (with the worst ever visibility, protected should basically
never be used, it is even more costly than normal symbol visibility, why it
isn't just hidden?) and 4 * sizeof (void *) bytes in data section and 4
runtime relocations (with the protected visibility costly ones).

When we were discussing the design last year, my strong preference was that
either this lives in some other crt object that mkoffload/linker plugin adds
to link, or that it would be completely mkoffload synthetized.

Also, I'd prefer if __OPENMP_TARGET__ header was as compact as possible for the
case when there is nothing to offload (ideally, if __OPENMP_TARGET__ symbol
is never referenced, not create it at all, if it is referenced, but there is
nothing to offload, say just a single 0 byte, otherwise say an uleb128
number how many different kinds of offload data there are and then for each
one some identification which offload it is for, the tables, where to find
it.

	Jakub
Ilya Verbin March 27, 2014, 1:31 p.m. UTC | #2
>+#ifdef ACCEL_COMPILER
>+  /* Decls are placed in reversed order in fat-objects, so we need to
>+     revert them back if we compile target.  */
>...

Actually this change is incorrect.  If host binary is built with -flto, then
both host gcc and target gcc read decls from lto and target_lto sections in the
same order, and resulting tables are identical.
So, in this case there is no need to change the order.

But what if one wants to link non-lto host object files with a target image,
produced from target_lto sections?
In this case the order of host table, produced during ordinary compilation will
differ from the order of target table, produced during lto compilation.

Jakub, what do you think?


Here is a simple example with 4 functions and 4 global variables:

#define N 100

#pragma omp declare target
int arr1[N];
int arr2[N];
int arr3[N];
int arr4[N];
#pragma omp end declare target

void foo ()
{
  #pragma omp target
  for (int i = 0; i < N; i++)
    arr1[i] = 41 + i;

  #pragma omp target
  for (int i = 0; i < N; i++)
    arr2[i] = 42 + i;

  #pragma omp target
  for (int i = 0; i < N; i++)
    arr3[i] = 43 + i;

  #pragma omp target
  for (int i = 0; i < N; i++)
    arr4[i] = 44 + i;
}


I print DECL_NAME ((*v_funcs)[i]) and DECL_NAME ((*v_vars)[i]) in
omp_finish_file:

Host compilation:
$ gcc -std=c99 -fopenmp -flto -c test.c -o test.o

host func 0: foo._omp_fn.0
host func 1: foo._omp_fn.1
host func 2: foo._omp_fn.2
host func 3: foo._omp_fn.3
host var 0:  arr4
host var 1:  arr3
host var 2:  arr2
host var 3:  arr1

Host lto and target lto:
$ gcc -std=c99 -fopenmp -flto test.o -o test

host func 0: foo._omp_fn.3
host func 1: foo._omp_fn.2
host func 2: foo._omp_fn.1
host func 3: foo._omp_fn.0
host var 0:  arr4
host var 1:  arr3
host var 2:  arr2
host var 3:  arr1

target func 0: foo._omp_fn.3
target func 1: foo._omp_fn.2
target func 2: foo._omp_fn.1
target func 3: foo._omp_fn.0
target var 0:  arr4
target var 1:  arr3
target var 2:  arr2
target var 3:  arr1

The func tables produced during ordinary compilation and lto are different.

  -- Ilya
Bernd Schmidt March 27, 2014, 1:41 p.m. UTC | #3
On 03/27/2014 02:31 PM, Ilya Verbin wrote:
>> +#ifdef ACCEL_COMPILER
>> +  /* Decls are placed in reversed order in fat-objects, so we need to
>> +     revert them back if we compile target.  */
>> ...
>
> Actually this change is incorrect.  If host binary is built with -flto, then
> both host gcc and target gcc read decls from lto and target_lto sections in the
> same order, and resulting tables are identical.
> So, in this case there is no need to change the order.
>
> But what if one wants to link non-lto host object files with a target image,
> produced from target_lto sections?
> In this case the order of host table, produced during ordinary compilation will
> differ from the order of target table, produced during lto compilation.

I haven't looked into the ordering issue here (the reversing of the 
order is from Michael's original patch), because I still think the whole 
scheme can't work and I was intending to produce a testcase to 
demonstrate that. Looks like you saved me some time here :)

My suggestion would be to augment the tables with the unique-name scheme 
I posted previously. I think the objections against it were a little 
exaggerated, and it would ensure reliability.


Bernd
Bernd Schmidt March 27, 2014, 1:42 p.m. UTC | #4
On 03/27/2014 02:31 PM, Ilya Verbin wrote:
>> +#ifdef ACCEL_COMPILER
>> +  /* Decls are placed in reversed order in fat-objects, so we need to
>> +     revert them back if we compile target.  */
>> ...
>
> Actually this change is incorrect.  If host binary is built with -flto, then
> both host gcc and target gcc read decls from lto and target_lto sections in the
> same order, and resulting tables are identical.
> So, in this case there is no need to change the order.
>
> But what if one wants to link non-lto host object files with a target image,
> produced from target_lto sections?
> In this case the order of host table, produced during ordinary compilation will
> differ from the order of target table, produced during lto compilation.

I haven't looked into the ordering issue here (the reversing of the 
order is from Michael's original patch), because I still think the whole 
scheme can't work and I was intending to produce a testcase to 
demonstrate that. Looks like you saved me some time here :)

My suggestion would be to augment the tables with the unique-name scheme 
I posted previously. I think the objections against it were a little 
exaggerated, and it would ensure reliability.


Bernd
Jakub Jelinek March 27, 2014, 2:02 p.m. UTC | #5
On Thu, Mar 27, 2014 at 05:31:29PM +0400, Ilya Verbin wrote:
> >+#ifdef ACCEL_COMPILER
> >+  /* Decls are placed in reversed order in fat-objects, so we need to
> >+     revert them back if we compile target.  */
> >...
> 
> Actually this change is incorrect.  If host binary is built with -flto, then
> both host gcc and target gcc read decls from lto and target_lto sections in the
> same order, and resulting tables are identical.
> So, in this case there is no need to change the order.
> 
> But what if one wants to link non-lto host object files with a target image,
> produced from target_lto sections?
> In this case the order of host table, produced during ordinary compilation will
> differ from the order of target table, produced during lto compilation.
> 
> Jakub, what do you think?

The tables need to be created before IPA, that way it really shouldn't
matter in what order you emit them.  E.g. the outlined target functions
could be added to the table during ompexp pass which actually creates the
outlined functions, the vars need to be added before target lto or host lto
is streamed.

	Jakub
Ilya Verbin March 27, 2014, 4:13 p.m. UTC | #6
On 27 Mar 15:02, Jakub Jelinek wrote:
> The tables need to be created before IPA, that way it really shouldn't
> matter in what order you emit them.  E.g. the outlined target functions
> could be added to the table during ompexp pass which actually creates the
> outlined functions, the vars need to be added before target lto or host lto
> is streamed.

For host tables it's ok, but when target compiler will create tables with functions?
It reads bytecode from target_lto sections, so it never executes ompexp pass.

  -- Ilya
Jakub Jelinek March 27, 2014, 4:16 p.m. UTC | #7
On Thu, Mar 27, 2014 at 08:13:00PM +0400, Ilya Verbin wrote:
> On 27 Mar 15:02, Jakub Jelinek wrote:
> > The tables need to be created before IPA, that way it really shouldn't
> > matter in what order you emit them.  E.g. the outlined target functions
> > could be added to the table during ompexp pass which actually creates the
> > outlined functions, the vars need to be added before target lto or host lto
> > is streamed.
> 
> For host tables it's ok, but when target compiler will create tables with functions?
> It reads bytecode from target_lto sections, so it never executes ompexp pass.

Which is why the table created for host by the ompexp pass should be
streamed into the target_lto sections (marked specially somehow, special
attribute or whatever), and then corresponding target table created from
that, rather then created from some possibly different ordering there.

	Jakub
Ilya Verbin March 27, 2014, 6:45 p.m. UTC | #8
On 27 Mar 17:16, Jakub Jelinek wrote:
> Which is why the table created for host by the ompexp pass should be
> streamed into the target_lto sections (marked specially somehow, special
> attribute or whatever), and then corresponding target table created from
> that, rather then created from some possibly different ordering there.

Ok, this should work.  I'll rewrite tables generation.

  -- Ilya
Thomas Schwinge April 2, 2014, 7:34 a.m. UTC | #9
Hi!

On Thu, 20 Mar 2014 17:50:13 +0100, Bernd Schmidt <bernds@codesourcery.com> wrote:
> This is based on Michael Zolotukhin's patch 2/3 from a while ago. It 
> adds functionality to build function/variable tables that will allow 
> libgomp to look up offload target code based on the address of the 
> corresponding host function. There are two alternatives, one based on 
> named sections, and one based on a target hook when named sections are 
> unavailable (as on ptx).
> 
> Committed on gomp-4_0-branch.

I see regressions in the libgomp testsuite for configurations where
offloading is not enabled:

    spawn [...]/build/gcc/xgcc -B[...]/build/gcc/ [...]/source/libgomp/testsuite/libgomp.c/for-3.c -B[...]/build/x86_64-unknown-linux-gnu/./libgomp/ -B[...]/build/x86_64-unknown-linux-gnu/./libgomp/.libs -I[...]/build/x86_64-unknown-linux-gnu/./libgomp -I[...]/source/libgomp/testsuite/.. -fmessage-length=0 -fno-diagnostics-show-caret -fdiagnostics-color=never -fopenmp -std=gnu99 -fopenmp -L[...]/build/x86_64-unknown-linux-gnu/./libgomp/.libs -lm -o ./for-3.exe
    /tmp/ccGnT0ei.o: In function `main':
    for-3.c:(.text+0x21032): undefined reference to `__OPENMP_TARGET__'
    collect2: error: ld returned 1 exit status

I suppose that's because even if...

> --- gcc/configure.ac	(revision 208715)
> +++ gcc/configure.ac	(working copy)
> @@ -887,6 +887,10 @@ AC_SUBST(enable_accelerator)
>  offload_targets=`echo $offload_targets | sed -e 's#,#:#'`
>  AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
>   [Define to hold the list of target names suitable for offloading.])
> +if test x$offload_targets != x; then
> +  AC_DEFINE(ENABLE_OFFLOADING, 1,
> +    [Define this to enable support for offloading.])
> +fi

... offloading is not enabled, this...

> --- gcc/omp-low.c	(revision 208706)
> +++ gcc/omp-low.c	(working copy)
> @@ -8671,19 +8672,22 @@ expand_omp_target (struct omp_region *re
>      }
>  
>    gimple g;
> -  /* FIXME: This will be address of
> -     extern char __OPENMP_TARGET__[] __attribute__((visibility ("hidden")))
> -     symbol, as soon as the linker plugin is able to create it for us.  */
> -  tree openmp_target = build_zero_cst (ptr_type_node);
> +  tree openmp_target
> +    = build_decl (UNKNOWN_LOCATION, VAR_DECL,
> +		  get_identifier ("__OPENMP_TARGET__"), ptr_type_node);
> +  TREE_PUBLIC (openmp_target) = 1;
> +  DECL_EXTERNAL (openmp_target) = 1;
>    if (kind == GF_OMP_TARGET_KIND_REGION)
>      {
>        tree fnaddr = build_fold_addr_expr (child_fn);
> -      g = gimple_build_call (builtin_decl_explicit (start_ix), 7,
> -			     device, fnaddr, openmp_target, t1, t2, t3, t4);
> +      g = gimple_build_call (builtin_decl_explicit (start_ix), 7, device,
> +			     fnaddr, build_fold_addr_expr (openmp_target),
> +			     t1, t2, t3, t4);
>      }
>    else
> -    g = gimple_build_call (builtin_decl_explicit (start_ix), 6,
> -			   device, openmp_target, t1, t2, t3, t4);
> +    g = gimple_build_call (builtin_decl_explicit (start_ix), 6, device,
> +			   build_fold_addr_expr (openmp_target),
> +			   t1, t2, t3, t4);

... will now cause a reference to __OPENMP_TARGET__, but...

> --- libgcc/crtstuff.c	(revision 208706)
> +++ libgcc/crtstuff.c	(working copy)
> @@ -311,6 +311,15 @@ register_tm_clones (void)
>  }
>  #endif /* USE_TM_CLONE_REGISTRY */
>  
> +#if defined(HAVE_GAS_HIDDEN) && defined(ENABLE_OFFLOADING)
> +void *_omp_func_table[0]
> +  __attribute__ ((__used__, visibility ("protected"),
> +		  section (".offload_func_table_section"))) = { };
> +void *_omp_var_table[0]
> +  __attribute__ ((__used__, visibility ("protected"),
> +		  section (".offload_var_table_section"))) = { };
> +#endif
> +
>  #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP)
>  
>  #ifdef OBJECT_FORMAT_ELF
> @@ -752,6 +761,23 @@ __do_global_ctors (void)
>  #error "What are you doing with crtstuff.c, then?"
>  #endif
>  
> +#if defined(HAVE_GAS_HIDDEN) && defined(ENABLE_OFFLOADING)
> +void *_omp_funcs_end[0]
> +  __attribute__ ((__used__, visibility ("protected"),
> +		  section (".offload_func_table_section"))) = { };
> +void *_omp_vars_end[0]
> +  __attribute__ ((__used__, visibility ("protected"),
> +		  section (".offload_var_table_section"))) = { };
> +extern void *_omp_func_table[];
> +extern void *_omp_var_table[];
> +void *__OPENMP_TARGET__[] __attribute__ ((__visibility__ ("protected"))) =
> +{
> +  &_omp_func_table, &_omp_funcs_end,
> +  &_omp_var_table, &_omp_vars_end
> +};
> +#endif

... __OPENMP_TARGET__ is not being defined here for the
!ENABLE_OFFLOADING case.  In
<http://news.gmane.org/find-root.php?message_id=%3C20130905082455.GH23437%40tucnak.redhat.com%3E>,
Jakub had suggested this to be a weak symbol, so we'd get NULL in this
case, which would be what's needed here, I think?


Also, I'd suggest to rename __OPENMP_TARGET__ (and similar ones) to
__GNU_OFFLOAD__ (or similar).  As we're using this offloading stuff for
both OpenACC and OpenMP target, it makes sense to me to use a generic
name; we still have the chance to do so now while this stuff is not yet
in trunk.


Grüße,
 Thomas
diff mbox

Patch

Index: libgcc/ChangeLog
===================================================================
--- libgcc/ChangeLog	(revision 208706)
+++ libgcc/ChangeLog	(working copy)
@@ -1,3 +1,9 @@ 
+2014-03-20  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* crtstuff.c (_omp_func_table, _omp_var_table, _omp_funcs_end,
+	_omp_vars_end): New array fragments.
+	(__OPENMP_TARGET__): New variable.
+
 2014-02-28  Joey Ye  <joey.ye@arm.com>
 
 	PR libgcc/60166
Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 208720)
+++ gcc/ChangeLog	(working copy)
@@ -1,5 +1,20 @@ 
 2014-03-20  Bernd Schmidt  <bernds@codesourcery.com>
 
+	Mostly by Michael Zolotukhin:
+	* omp-low.c: Include "common/common-target.h".
+	(expand_omp_target): Pass in address of __OPENMP_TARGET__.
+	(add_decls_addresses_to_decl_constructor, omp_finish_file): New
+	functions.
+	* omp-low.h (omp_finish_file): Declare.
+	* toplev.c: Include "omp-low.h".
+	(compile_file): Call omp_finish_file.
+	* target.def (record_offload_symbol): New hook.
+	* doc/tm.texi.in (TARGET_RECORD_OFFLOAD_SYMBOL): Add.
+	* doc/tm.texi: Regenerate.
+	* configure.ac (ENABLE_OFFLOADING): Define if we have offload_targets.
+	* configure: Regenerate.
+	* config.in: Regenerate.
+
 	* config/darwin.c: Include "lto-section-names.h".
 	(LTO_SEGMENT_NAME): Don't define.
 	* config/i386/winnt.c: Include "lto-section-names.h".
Index: gcc/config.in
===================================================================
--- gcc/config.in	(revision 208715)
+++ gcc/config.in	(working copy)
@@ -139,6 +139,12 @@ 
 #endif
 
 
+/* Define this to enable support for offloading. */
+#ifndef USED_FOR_TARGET
+#undef ENABLE_OFFLOADING
+#endif
+
+
 /* Define to enable plugin support. */
 #ifndef USED_FOR_TARGET
 #undef ENABLE_PLUGIN
Index: gcc/configure
===================================================================
--- gcc/configure	(revision 208715)
+++ gcc/configure	(working copy)
@@ -7363,6 +7363,11 @@  cat >>confdefs.h <<_ACEOF
 #define OFFLOAD_TARGETS "$offload_targets"
 _ACEOF
 
+if test x$offload_targets != x; then
+
+$as_echo "#define ENABLE_OFFLOADING 1" >>confdefs.h
+
+fi
 
 
 # Check whether --with-multilib-list was given.
@@ -18008,7 +18013,7 @@  else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 18011 "configure"
+#line 18016 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -18114,7 +18119,7 @@  else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 18117 "configure"
+#line 18122 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
Index: gcc/configure.ac
===================================================================
--- gcc/configure.ac	(revision 208715)
+++ gcc/configure.ac	(working copy)
@@ -887,6 +887,10 @@  AC_SUBST(enable_accelerator)
 offload_targets=`echo $offload_targets | sed -e 's#,#:#'`
 AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
  [Define to hold the list of target names suitable for offloading.])
+if test x$offload_targets != x; then
+  AC_DEFINE(ENABLE_OFFLOADING, 1,
+    [Define this to enable support for offloading.])
+fi
 
 AC_ARG_WITH(multilib-list,
 [AS_HELP_STRING([--with-multilib-list], [select multilibs (AArch64, SH and x86-64 only)])],
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi	(revision 208706)
+++ gcc/doc/tm.texi	(working copy)
@@ -11418,3 +11418,9 @@  If defined, this function returns an app
 @deftypefn {Target Hook} void TARGET_ATOMIC_ASSIGN_EXPAND_FENV (tree *@var{hold}, tree *@var{clear}, tree *@var{update})
 ISO C11 requires atomic compound assignments that may raise floating-point exceptions to raise exceptions corresponding to the arithmetic operation whose result was successfully stored in a compare-and-exchange sequence.  This requires code equivalent to calls to @code{feholdexcept}, @code{feclearexcept} and @code{feupdateenv} to be generated at appropriate points in the compare-and-exchange sequence.  This hook should set @code{*@var{hold}} to an expression equivalent to the call to @code{feholdexcept}, @code{*@var{clear}} to an expression equivalent to the call to @code{feclearexcept} and @code{*@var{update}} to an expression equivalent to the call to @code{feupdateenv}.  The three expressions are @code{NULL_TREE} on entry to the hook and may be left as @code{NULL_TREE} if no code is required in a particular place.  The default implementation leaves all three expressions as @code{NULL_TREE}.  The @code{__atomic_feraiseexcept} function from @code{libatomic} may be of use as part of the code generated in @code{*@var{update}}.
 @end deftypefn
+
+@deftypefn {Target Hook} void TARGET_RECORD_OFFLOAD_SYMBOL (tree)
+Used when offloaded functions are seen in the compilation unit and no named
+sections are available.  It is called once for each symbol that must be
+recorded in the offload function and variable table.
+@end deftypefn
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in	(revision 208706)
+++ gcc/doc/tm.texi.in	(working copy)
@@ -8414,3 +8414,5 @@  and the associated definitions of those
 @hook TARGET_ATOMIC_ALIGN_FOR_MODE
 
 @hook TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+
+@hook TARGET_RECORD_OFFLOAD_SYMBOL
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 208706)
+++ gcc/omp-low.c	(working copy)
@@ -64,6 +64,7 @@  along with GCC; see the file COPYING3.
 #include "optabs.h"
 #include "cfgloop.h"
 #include "target.h"
+#include "common/common-target.h"
 #include "omp-low.h"
 #include "gimple-low.h"
 #include "tree-cfgcleanup.h"
@@ -8671,19 +8672,22 @@  expand_omp_target (struct omp_region *re
     }
 
   gimple g;
-  /* FIXME: This will be address of
-     extern char __OPENMP_TARGET__[] __attribute__((visibility ("hidden")))
-     symbol, as soon as the linker plugin is able to create it for us.  */
-  tree openmp_target = build_zero_cst (ptr_type_node);
+  tree openmp_target
+    = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+		  get_identifier ("__OPENMP_TARGET__"), ptr_type_node);
+  TREE_PUBLIC (openmp_target) = 1;
+  DECL_EXTERNAL (openmp_target) = 1;
   if (kind == GF_OMP_TARGET_KIND_REGION)
     {
       tree fnaddr = build_fold_addr_expr (child_fn);
-      g = gimple_build_call (builtin_decl_explicit (start_ix), 7,
-			     device, fnaddr, openmp_target, t1, t2, t3, t4);
+      g = gimple_build_call (builtin_decl_explicit (start_ix), 7, device,
+			     fnaddr, build_fold_addr_expr (openmp_target),
+			     t1, t2, t3, t4);
     }
   else
-    g = gimple_build_call (builtin_decl_explicit (start_ix), 6,
-			   device, openmp_target, t1, t2, t3, t4);
+    g = gimple_build_call (builtin_decl_explicit (start_ix), 6, device,
+			   build_fold_addr_expr (openmp_target),
+			   t1, t2, t3, t4);
   gimple_set_location (g, gimple_location (entry_stmt));
   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
   if (kind != GF_OMP_TARGET_KIND_REGION)
@@ -12801,4 +12805,139 @@  make_pass_omp_simd_clone (gcc::context *
   return new pass_omp_simd_clone (ctxt);
 }
 
+/* Helper function for omp_finish_file routine.
+   Takes decls from V_DECLS and adds their addresses and sizes to
+   constructor-vector V_CTOR.  It will be later used as DECL_INIT for decl
+   representing a global symbol for OpenMP descriptor.  */
+static void
+add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
+					 vec<constructor_elt, va_gc> *v_ctor)
+{
+  unsigned len = vec_safe_length (v_decls);
+  for (unsigned i = 0; i < len; i++)
+    {
+      tree it = (*v_decls)[i];
+      bool is_function = TREE_CODE (it) != VAR_DECL;
+
+      CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, build_fold_addr_expr (it));
+      if (!is_function)
+	CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE,
+				fold_convert (const_ptr_type_node,
+					      DECL_SIZE (it)));
+    }
+}
+
+/* Create new symbol containing (address, size) pairs for omp-marked
+   functions and global variables.  */
+void
+omp_finish_file (void)
+{
+  struct cgraph_node *node;
+  struct varpool_node *vnode;
+  const char *funcs_section_name = ".offload_func_table_section";
+  const char *vars_section_name = ".offload_var_table_section";
+  vec<tree, va_gc> *v_funcs, *v_vars;
+
+  vec_alloc (v_vars, 0);
+  vec_alloc (v_funcs, 0);
+
+  /* Collect all omp-target functions.  */
+  FOR_EACH_DEFINED_FUNCTION (node)
+    {
+      /* TODO: This check could fail on functions, created by omp
+	 parallel/task pragmas.  It's better to name outlined for offloading
+	 functions in some different way and to check here the function name.
+	 It could be something like "*_omp_tgtfn" in contrast with "*_omp_fn"
+	 for functions from omp parallel/task pragmas.  */
+      if (!lookup_attribute ("omp declare target",
+			     DECL_ATTRIBUTES (node->decl))
+	  || !DECL_ARTIFICIAL (node->decl))
+	continue;
+      vec_safe_push (v_funcs, node->decl);
+    }
+  /* Collect all omp-target global variables.  */
+  FOR_EACH_DEFINED_VARIABLE (vnode)
+    {
+      if (!lookup_attribute ("omp declare target",
+			     DECL_ATTRIBUTES (vnode->decl))
+	  || TREE_CODE (vnode->decl) != VAR_DECL
+	  || DECL_SIZE (vnode->decl) == 0)
+	continue;
+
+      vec_safe_push (v_vars, vnode->decl);
+    }
+  unsigned num_vars = vec_safe_length (v_vars);
+  unsigned num_funcs = vec_safe_length (v_funcs);
+
+  if (num_vars == 0 && num_funcs == 0)
+    return;
+
+#ifdef ACCEL_COMPILER
+  /* Decls are placed in reversed order in fat-objects, so we need to
+     revert them back if we compile target.  */
+  for (unsigned i = 0; i < num_funcs / 2; i++)
+    {
+      tree it = (*v_funcs)[i];
+      (*v_funcs)[i] = (*v_funcs)[num_funcs - i - 1];
+      (*v_funcs)[num_funcs - i - 1] = it;
+    }
+  for (unsigned i = 0; i < num_vars / 2; i++)
+    {
+      tree it = (*v_vars)[i];
+      (*v_vars)[i] = (*v_vars)[num_vars - i - 1];
+      (*v_vars)[num_vars - i - 1] = it;
+    }
+#endif
+
+  if (targetm_common.have_named_sections)
+    {
+      vec<constructor_elt, va_gc> *v_f, *v_v;
+      vec_alloc (v_f, num_funcs);
+      vec_alloc (v_v, num_vars * 2);
+
+      add_decls_addresses_to_decl_constructor (v_funcs, v_f);
+      add_decls_addresses_to_decl_constructor (v_vars, v_v);
+
+      tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
+						    num_vars * 2);
+      tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
+						     num_funcs);
+      TYPE_ALIGN (vars_decl_type) = TYPE_ALIGN (pointer_sized_int_node);
+      TYPE_ALIGN (funcs_decl_type) = TYPE_ALIGN (pointer_sized_int_node);
+      tree ctor_v = build_constructor (vars_decl_type, v_v);
+      tree ctor_f = build_constructor (funcs_decl_type, v_f);
+      TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
+      TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
+      tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+				    get_identifier (".omp_func_table"),
+				    funcs_decl_type);
+      tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+				   get_identifier (".omp_var_table"),
+				   vars_decl_type);
+      TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
+      DECL_INITIAL (funcs_decl) = ctor_f;
+      DECL_INITIAL (vars_decl) = ctor_v;
+      DECL_SECTION_NAME (funcs_decl)
+	= build_string (strlen (funcs_section_name), funcs_section_name);
+      DECL_SECTION_NAME (vars_decl)
+	= build_string (strlen (vars_section_name), vars_section_name);
+ 
+      varpool_assemble_decl (varpool_node_for_decl (vars_decl));
+      varpool_assemble_decl (varpool_node_for_decl (funcs_decl));
+   }
+  else
+    {
+      for (unsigned i = 0; i < num_funcs; i++)
+	{
+	  tree it = (*v_funcs)[i];
+	  targetm.record_offload_symbol (it);
+	}  
+      for (unsigned i = 0; i < num_funcs; i++)
+	{
+	  tree it = (*v_vars)[i];
+	  targetm.record_offload_symbol (it);
+	}  
+    }
+}
+
 #include "gt-omp-low.h"
Index: gcc/omp-low.h
===================================================================
--- gcc/omp-low.h	(revision 208706)
+++ gcc/omp-low.h	(working copy)
@@ -27,5 +27,6 @@  extern void omp_expand_local (basic_bloc
 extern void free_omp_regions (void);
 extern tree omp_reduction_init (tree, tree);
 extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
+extern void omp_finish_file (void);
 
 #endif /* GCC_OMP_LOW_H */
Index: gcc/target.def
===================================================================
--- gcc/target.def	(revision 208706)
+++ gcc/target.def	(working copy)
@@ -1772,6 +1772,14 @@  HOOK_VECTOR_END (vectorize)
 #undef HOOK_PREFIX
 #define HOOK_PREFIX "TARGET_"
 
+DEFHOOK
+(record_offload_symbol,
+ "Used when offloaded functions are seen in the compilation unit and no named\n\
+sections are available.  It is called once for each symbol that must be\n\
+recorded in the offload function and variable table.",
+ void, (tree),
+ hook_void_tree)
+
 /* Allow target specific overriding of option settings after options have
   been changed by an attribute or pragma or when it is reset at the
   end of the code affected by an attribute or pragma.  */
Index: gcc/toplev.c
===================================================================
--- gcc/toplev.c	(revision 208706)
+++ gcc/toplev.c	(working copy)
@@ -79,6 +79,7 @@  along with GCC; see the file COPYING3.
 #include "context.h"
 #include "pass_manager.h"
 #include "optabs.h"
+#include "omp-low.h"
 
 #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO)
 #include "dbxout.h"
@@ -577,6 +578,8 @@  compile_file (void)
       if (flag_sanitize & SANITIZE_THREAD)
 	tsan_finish_file ();
 
+      omp_finish_file ();
+
       output_shared_constant_pool ();
       output_object_blocks ();
       finish_tm_clone_pairs ();
Index: libgcc/crtstuff.c
===================================================================
--- libgcc/crtstuff.c	(revision 208706)
+++ libgcc/crtstuff.c	(working copy)
@@ -311,6 +311,15 @@  register_tm_clones (void)
 }
 #endif /* USE_TM_CLONE_REGISTRY */
 
+#if defined(HAVE_GAS_HIDDEN) && defined(ENABLE_OFFLOADING)
+void *_omp_func_table[0]
+  __attribute__ ((__used__, visibility ("protected"),
+		  section (".offload_func_table_section"))) = { };
+void *_omp_var_table[0]
+  __attribute__ ((__used__, visibility ("protected"),
+		  section (".offload_var_table_section"))) = { };
+#endif
+
 #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP)
 
 #ifdef OBJECT_FORMAT_ELF
@@ -752,6 +761,23 @@  __do_global_ctors (void)
 #error "What are you doing with crtstuff.c, then?"
 #endif
 
+#if defined(HAVE_GAS_HIDDEN) && defined(ENABLE_OFFLOADING)
+void *_omp_funcs_end[0]
+  __attribute__ ((__used__, visibility ("protected"),
+		  section (".offload_func_table_section"))) = { };
+void *_omp_vars_end[0]
+  __attribute__ ((__used__, visibility ("protected"),
+		  section (".offload_var_table_section"))) = { };
+extern void *_omp_func_table[];
+extern void *_omp_var_table[];
+void *__OPENMP_TARGET__[] __attribute__ ((__visibility__ ("protected"))) =
+{
+  &_omp_func_table, &_omp_funcs_end,
+  &_omp_var_table, &_omp_vars_end
+};
+#endif
+
+
 #else /* ! CRT_BEGIN && ! CRT_END */
 #error "One of CRT_BEGIN or CRT_END must be defined."
 #endif