diff mbox

[v4] SH FDPIC backend support

Message ID 1445783331.8060.3.camel@t-online.de
State New
Headers show

Commit Message

Oleg Endo Oct. 25, 2015, 2:28 p.m. UTC
On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote:
> Here's my updated version of the FDPIC patch with all requested
> changes made and Changelog added. I've included all the original
> authors. This is my first time writing such an extensive Changelog
> entry so please let me know if there are things I got wrong.

I took the liberty and fixed some minor formatting trivia and extracted
functions sh_emit_storesi and sh_emit_storehi which are used in
 sh_trampoline_init to effectively memcpy code into the trampoline
area.  Can you please check it?  If it's OK I'll commit the attached
patch to trunk.

Cheers,
Oleg

Comments

Rich Felker Oct. 27, 2015, 2:47 a.m. UTC | #1
On Sun, Oct 25, 2015 at 11:28:51PM +0900, Oleg Endo wrote:
> On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote:
> > Here's my updated version of the FDPIC patch with all requested
> > changes made and Changelog added. I've included all the original
> > authors. This is my first time writing such an extensive Changelog
> > entry so please let me know if there are things I got wrong.
> 
> I took the liberty and fixed some minor formatting trivia and extracted
> functions sh_emit_storesi and sh_emit_storehi which are used in
>  sh_trampoline_init to effectively memcpy code into the trampoline
> area.  Can you please check it?  If it's OK I'll commit the attached
> patch to trunk.

Is there anything in particular you'd like me to check? It builds fine
for fdpic target, successfully compiles musl libc.so, and busybox runs
with the resulting libc.so. I did a quick visual inspection of the
diff between my version and yours too and didn't see anything that
looked suspicious to me.

Rich
Oleg Endo Oct. 27, 2015, 2:01 p.m. UTC | #2
On Mon, 2015-10-26 at 22:47 -0400, Rich Felker wrote:
> On Sun, Oct 25, 2015 at 11:28:51PM +0900, Oleg Endo wrote:
> > On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote:
> > > Here's my updated version of the FDPIC patch with all requested
> > > changes made and Changelog added. I've included all the original
> > > authors. This is my first time writing such an extensive
> > > Changelog
> > > entry so please let me know if there are things I got wrong.
> > 
> > I took the liberty and fixed some minor formatting trivia and
> > extracted
> > functions sh_emit_storesi and sh_emit_storehi which are used in
> >  sh_trampoline_init to effectively memcpy code into the trampoline
> > area.  Can you please check it?  If it's OK I'll commit the
> > attached
> > patch to trunk.
> 
> Is there anything in particular you'd like me to check? It builds
> fine
> for fdpic target, successfully compiles musl libc.so, and busybox
> runs
> with the resulting libc.so. I did a quick visual inspection of the
> diff between my version and yours too and didn't see anything that
> looked suspicious to me.

Thanks.  I have committed it as r229438 after a sanity check with "make
all" on sh-elf.

The way libcalls are now emitted is a bit unhandy.  If more special-ABI
libcalls are to be added in the future, they all have to do the jsr vs.
bsrf handling (some potential candidates for new libcalls are optimized
soft FP routines).  Then we still have PR 65374 and PR 54019. In the
future maybe we should come up with something that allows emitting
libcalls in a more transparent way...

Cheers,
Oleg
Rich Felker Nov. 10, 2015, 8:07 p.m. UTC | #3
On Tue, Oct 27, 2015 at 11:01:39PM +0900, Oleg Endo wrote:
> On Mon, 2015-10-26 at 22:47 -0400, Rich Felker wrote:
> > On Sun, Oct 25, 2015 at 11:28:51PM +0900, Oleg Endo wrote:
> > > On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote:
> > > > Here's my updated version of the FDPIC patch with all requested
> > > > changes made and Changelog added. I've included all the original
> > > > authors. This is my first time writing such an extensive
> > > > Changelog
> > > > entry so please let me know if there are things I got wrong.
> > > 
> > > I took the liberty and fixed some minor formatting trivia and
> > > extracted
> > > functions sh_emit_storesi and sh_emit_storehi which are used in
> > >  sh_trampoline_init to effectively memcpy code into the trampoline
> > > area.  Can you please check it?  If it's OK I'll commit the
> > > attached
> > > patch to trunk.
> > 
> > Is there anything in particular you'd like me to check? It builds
> > fine
> > for fdpic target, successfully compiles musl libc.so, and busybox
> > runs
> > with the resulting libc.so. I did a quick visual inspection of the
> > diff between my version and yours too and didn't see anything that
> > looked suspicious to me.
> 
> Thanks.  I have committed it as r229438 after a sanity check with "make
> all" on sh-elf.
> 
> The way libcalls are now emitted is a bit unhandy.  If more special-ABI
> libcalls are to be added in the future, they all have to do the jsr vs.
> bsrf handling (some potential candidates for new libcalls are optimized
> soft FP routines).  Then we still have PR 65374 and PR 54019. In the
> future maybe we should come up with something that allows emitting
> libcalls in a more transparent way...

I'd like to look into improving this at some point in the near future.
On further reading of the changes made, I think there's a lot of code
we could reduce or simplify.

In all the places where new RTL patterns were added for *call*_fdpic,
the main constraint change vs the non-fdpic version is using REG_PIC.
Is it possible to make a REG_GOT_ARG macro or similar that's defined
as something like TARGET_FDPIC ? REG_PIC : nonexistent_or_dummy?

As for the call site stuff, I wonder why the existing call site stuff
used by "call_pcrel" can't be used for SFUNC_STATIC. I'm actually
trying to prepare a simpler FDPIC patch for other gcc versions we're
interested in that's not so invasive, and for now I'm just having
function_symbol replace SFUNC_STATIC with SFUNC_GOT on TARGET_FDPIC to
avoid needing all the label stuff, but it would be nice to find a way
to reuse the existing framework.

Rich
Oleg Endo Nov. 11, 2015, 2:36 p.m. UTC | #4
On Tue, 2015-11-10 at 15:07 -0500, Rich Felker wrote:

> > The way libcalls are now emitted is a bit unhandy.  If more special
> > -ABI
> > libcalls are to be added in the future, they all have to do the jsr
> > vs.
> > bsrf handling (some potential candidates for new libcalls are
> > optimized
> > soft FP routines).  Then we still have PR 65374 and PR 54019. In
> > the
> > future maybe we should come up with something that allows emitting
> > libcalls in a more transparent way...
> 
> I'd like to look into improving this at some point in the near
> future.
> On further reading of the changes made, I think there's a lot of code
> we could reduce or simplify.
> 
> In all the places where new RTL patterns were added for *call*_fdpic,
> the main constraint change vs the non-fdpic version is using REG_PIC.
> Is it possible to make a REG_GOT_ARG macro or similar that's defined
> as something like TARGET_FDPIC ? REG_PIC : nonexistent_or_dummy?

I'm not sure I understand what you mean by that.  Do you have a small
code snippet example?

> As for the call site stuff, I wonder why the existing call site stuff
> used by "call_pcrel" can't be used for SFUNC_STATIC. 

"call_pcrel" is a real call insn.  The libcalls are not expanded as
real call insns to avoid the regular register save/restores etc which
is needed to do a normal function call.
I guess the generic fix for this issue would be some mechanism to
specify which regs are clobbered/preserved and then provide the right
settings for the libcall functions.


> I'm actually
> trying to prepare a simpler FDPIC patch for other gcc versions we're
> interested in that's not so invasive, and for now I'm just having
> function_symbol replace SFUNC_STATIC with SFUNC_GOT on TARGET_FDPIC
> to
> avoid needing all the label stuff, but it would be nice to find a way
> to reuse the existing framework.

Do you know how this affects code size (and inherently performance)?

Cheers,
Oleg
Rich Felker Nov. 11, 2015, 2:56 p.m. UTC | #5
On Wed, Nov 11, 2015 at 11:36:26PM +0900, Oleg Endo wrote:
> On Tue, 2015-11-10 at 15:07 -0500, Rich Felker wrote:
> 
> > > The way libcalls are now emitted is a bit unhandy.  If more special
> > > -ABI
> > > libcalls are to be added in the future, they all have to do the jsr
> > > vs.
> > > bsrf handling (some potential candidates for new libcalls are
> > > optimized
> > > soft FP routines).  Then we still have PR 65374 and PR 54019. In
> > > the
> > > future maybe we should come up with something that allows emitting
> > > libcalls in a more transparent way...
> > 
> > I'd like to look into improving this at some point in the near
> > future.
> > On further reading of the changes made, I think there's a lot of code
> > we could reduce or simplify.
> > 
> > In all the places where new RTL patterns were added for *call*_fdpic,
> > the main constraint change vs the non-fdpic version is using REG_PIC.
> > Is it possible to make a REG_GOT_ARG macro or similar that's defined
> > as something like TARGET_FDPIC ? REG_PIC : nonexistent_or_dummy?
> 
> I'm not sure I understand what you mean by that.  Do you have a small
> code snippet example?

Sorry, I don't really understand RTL well enough to make a code
snippet. What I want to express is that an insn "uses" (in the (use
...) sense) a register (r12) conditionally depending on a runtime
option (TARGET_FDPIC).

> > As for the call site stuff, I wonder why the existing call site stuff
> > used by "call_pcrel" can't be used for SFUNC_STATIC. 
> 
> "call_pcrel" is a real call insn.  The libcalls are not expanded as
> real call insns to avoid the regular register save/restores etc which
> is needed to do a normal function call.

Yes, I see that. What I was really wondering though is why the new
call site generation code and constraint was added when the call_pcrel
code already has mechanisms for this, rather than just duplicating the
internals that call_pcrel uses. It seems like we're doing things in a
gratuitously different way here.

> I guess the generic fix for this issue would be some mechanism to
> specify which regs are clobbered/preserved and then provide the right
> settings for the libcall functions.

Is this possible in the sh backend or does it need changes to
higher-level gcc code? (i.e. is it presently possible to make an insn
that conditionally clobbers different things rather than having to
make tons of different insns for each possible set of clobbers?)

> > I'm actually
> > trying to prepare a simpler FDPIC patch for other gcc versions we're
> > interested in that's not so invasive, and for now I'm just having
> > function_symbol replace SFUNC_STATIC with SFUNC_GOT on TARGET_FDPIC
> > to
> > avoid needing all the label stuff, but it would be nice to find a way
> > to reuse the existing framework.
> 
> Do you know how this affects code size (and inherently performance)?

I suspect it makes very little difference, but to compare I'd need to
do the same hack on 5.2.0 or trunk. The only difference should be one
additional load per call, and one additional GOT slot per function
called this way (but just once per executable/library).

Another issue I've started looking at is how r12 is put in fixed_regs,
which is conceptually wrong. Preliminary tests show that removing it
from fixed_regs doesn't break and produces much better code -- r12
gets used as a temp register in functions that don't need it, and in
one function that made multiple calls, the saving of initial r12 to a
call-saved register even happened in the delay slot of the call. I've
been discussing it with Alexander Monakov on IRC (#musl) and based on
my understanding so far of how gcc works (which admittedly may be
wrong) the current FDPIC code looks like it's written not to depend on
r12 being 'fixed'. Also I think I'm pretty close to understanding how
we could make the same improvements for non-FDPIC PIC codegen: instead
of loading r12 in the prologue, load a pseudo, then use that pseudo
for GOT access and force it into r12 the same way FDPIC call code does
for PLT calls. Does this sound correct?

Rich
Rich Felker Nov. 11, 2015, 4:41 p.m. UTC | #6
On Wed, Nov 11, 2015 at 09:56:42AM -0500, Rich Felker wrote:
> > > I'm actually
> > > trying to prepare a simpler FDPIC patch for other gcc versions we're
> > > interested in that's not so invasive, and for now I'm just having
> > > function_symbol replace SFUNC_STATIC with SFUNC_GOT on TARGET_FDPIC
> > > to
> > > avoid needing all the label stuff, but it would be nice to find a way
> > > to reuse the existing framework.
> > 
> > Do you know how this affects code size (and inherently performance)?
> 
> I suspect it makes very little difference, but to compare I'd need to
> do the same hack on 5.2.0 or trunk. The only difference should be one
> additional load per call, and one additional GOT slot per function
> called this way (but just once per executable/library).

Actually I think this is not quite right: if the call takes place via
the GOT, this also requires the initial r12 to be preserved somewhere
in order to load the function address, whereas for SFUNC_STATIC, the
initial r12 can be completely discarded, right? (SFUNC functions are
not permitted to use the GOT themselves as far as I can tell, and thus
do not receive the hidden GOT argument in r12.)

Rich
Oleg Endo Nov. 15, 2015, 5:08 a.m. UTC | #7
On Wed, 2015-11-11 at 09:56 -0500, Rich Felker wrote:

> Sorry, I don't really understand RTL well enough to make a code
> snippet. What I want to express is that an insn "uses" (in the (use
> ...) sense) a register (r12) conditionally depending on a runtime
> option (TARGET_FDPIC).

As far as I know this is not possible.  It would require two variants
of the same pattern, one with the use and another without the use.  

> Is this possible in the sh backend or does it need changes to
> higher-level gcc code? (i.e. is it presently possible to make an insn
> that conditionally clobbers different things rather than having to
> make tons of different insns for each possible set of clobbers?)

This is basically the same as above ... it's not possible to
conditionally construct/modify pattern descriptions in the .md. 
 However, it's possible to modify the CALL_INSN_FUNCTION_USAGE field of
call insns -- for some examples see 'grep -r CALL_INSN_FUNCTION_USAGE
gcc/config/*'.  Also, it seems the SH backend doesn't make use of some
existing libcall related parameters and target hooks/macros.  Maybe
those could be helpful.

> Another issue I've started looking at is how r12 is put in 
> fixed_regs, which is conceptually wrong. Preliminary tests show that 
> removing it from fixed_regs doesn't break and produces much better 
> code -- r12 gets used as a temp register in functions that don't need 
> it, and in one function that made multiple calls, the saving of 
> initial r12 to a call-saved register even happened in the delay slot 
> of the call. I've been discussing it with Alexander Monakov on IRC 
> (#musl) and based on my understanding so far of how gcc works (which 
> admittedly may be wrong) the current FDPIC code looks like it's 
> written not to depend on r12 being 'fixed'. Also I think I'm pretty 
> close to understanding how we could make the same improvements for 
> non-FDPIC PIC codegen: instead loading r12 in the prologue, load a 
> pseudo, then use that pseudo for GOT access and force it into r12 the 
> same way FDPIC call code does for PLT calls. Does this sound correct?

Maybe TARGET_USE_PSEUDO_PIC_REG could be useful?

Cheers,
Oleg
Rich Felker Nov. 15, 2015, 8:39 p.m. UTC | #8
On Sun, Nov 15, 2015 at 02:08:34PM +0900, Oleg Endo wrote:
> On Wed, 2015-11-11 at 09:56 -0500, Rich Felker wrote:
> 
> > Sorry, I don't really understand RTL well enough to make a code
> > snippet. What I want to express is that an insn "uses" (in the (use
> > ...) sense) a register (r12) conditionally depending on a runtime
> > option (TARGET_FDPIC).
> 
> As far as I know this is not possible.  It would require two variants
> of the same pattern, one with the use and another without the use.  

OK. That's exactly what we've got now.

> > Is this possible in the sh backend or does it need changes to
> > higher-level gcc code? (i.e. is it presently possible to make an insn
> > that conditionally clobbers different things rather than having to
> > make tons of different insns for each possible set of clobbers?)
> 
> This is basically the same as above ... it's not possible to
> conditionally construct/modify pattern descriptions in the .md. 
>  However, it's possible to modify the CALL_INSN_FUNCTION_USAGE field of
> call insns -- for some examples see 'grep -r CALL_INSN_FUNCTION_USAGE
> gcc/config/*'.  Also, it seems the SH backend doesn't make use of some
> existing libcall related parameters and target hooks/macros.  Maybe
> those could be helpful.

I'll take a look at this. Let me know if you turn up anything
interesting.

> > Another issue I've started looking at is how r12 is put in 
> > fixed_regs, which is conceptually wrong. Preliminary tests show that 
> > removing it from fixed_regs doesn't break and produces much better 
> > code -- r12 gets used as a temp register in functions that don't need 
> > it, and in one function that made multiple calls, the saving of 
> > initial r12 to a call-saved register even happened in the delay slot 
> > of the call. I've been discussing it with Alexander Monakov on IRC 
> > (#musl) and based on my understanding so far of how gcc works (which 
> > admittedly may be wrong) the current FDPIC code looks like it's 
> > written not to depend on r12 being 'fixed'. Also I think I'm pretty 
> > close to understanding how we could make the same improvements for 
> > non-FDPIC PIC codegen: instead loading r12 in the prologue, load a 
> > pseudo, then use that pseudo for GOT access and force it into r12 the 
> > same way FDPIC call code does for PLT calls. Does this sound correct?
> 
> Maybe TARGET_USE_PSEUDO_PIC_REG could be useful?

Yes. Is there any documentation on using it? I came across that but
couldn't figure out how it compares to just doing the pseudo yourself
in the target files. Is non-target-specific code affected by this?

Rich
Oleg Endo Nov. 16, 2015, 2:54 p.m. UTC | #9
On Sun, 2015-11-15 at 15:39 -0500, Rich Felker wrote:

> > This is basically the same as above ... it's not possible to
> > conditionally construct/modify pattern descriptions in the .md. 
> >  However, it's possible to modify the CALL_INSN_FUNCTION_USAGE
> > field of
> > call insns -- for some examples see 'grep -r
> > CALL_INSN_FUNCTION_USAGE
> > gcc/config/*'.  Also, it seems the SH backend doesn't make use of
> > some
> > existing libcall related parameters and target hooks/macros.  Maybe
> > those could be helpful.
> 
> I'll take a look at this. Let me know if you turn up anything
> interesting.

I'm currently working on other things, sorry.


> > 
> > Maybe TARGET_USE_PSEUDO_PIC_REG could be useful?
> 
> Yes. Is there any documentation on using it? I came across that but
> couldn't figure out how it compares to just doing the pseudo yourself
> in the target files. Is non-target-specific code affected by this?

Yes, non-target-specific code seems to be affected by this in some way,
although I don't know any details.  Due to lack of documentation you'll
have to grep yourself through it by looking for "USE_PSEUDO_PIC_REG"
and "use_pseudo_pic_reg" to find the places where it's used.

Cheers,
Oleg
diff mbox

Patch

Index: gcc/config/sh/constraints.md
===================================================================
--- gcc/config/sh/constraints.md	(revision 229290)
+++ gcc/config/sh/constraints.md	(working copy)
@@ -25,6 +25,7 @@ 
 ;;  Bsc: SCRATCH - for the scratch register in movsi_ie in the
 ;;       fldi0 / fldi0 cases
 ;; Cxx: Constants other than only CONST_INT
+;;  Ccl: call site label
 ;;  Css: signed 16-bit constant, literal or symbolic
 ;;  Csu: unsigned 16-bit constant, literal or symbolic
 ;;  Csy: label or symbol
@@ -233,6 +234,11 @@ 
    hence mova is being used, hence do not select this pattern."
   (match_code "scratch"))
 
+(define_constraint "Ccl"
+  "A call site label, for bsrf."
+  (and (match_code "unspec")
+       (match_test "XINT (op, 1) == UNSPEC_CALLER")))
+
 (define_constraint "Css"
   "A signed 16-bit constant, literal or symbolic."
   (and (match_code "const")
Index: gcc/config/sh/linux.h
===================================================================
--- gcc/config/sh/linux.h	(revision 229290)
+++ gcc/config/sh/linux.h	(working copy)
@@ -67,7 +67,8 @@ 
 #define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
 
 #undef SUBTARGET_LINK_EMUL_SUFFIX
-#define SUBTARGET_LINK_EMUL_SUFFIX "_linux"
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd;:_linux}"
+
 #undef SUBTARGET_LINK_SPEC
 #define SUBTARGET_LINK_SPEC \
   "%{shared:-shared} \
Index: gcc/config/sh/sh-c.c
===================================================================
--- gcc/config/sh/sh-c.c	(revision 229290)
+++ gcc/config/sh/sh-c.c	(working copy)
@@ -137,6 +137,11 @@ 
     builtin_define ("__HITACHI__");
   if (TARGET_FMOVD)
     builtin_define ("__FMOVD_ENABLED__");
+  if (TARGET_FDPIC)
+    {
+      builtin_define ("__SH_FDPIC__");
+      builtin_define ("__FDPIC__");
+    }
   builtin_define (TARGET_LITTLE_ENDIAN
 		  ? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__");
 
Index: gcc/config/sh/sh-mem.cc
===================================================================
--- gcc/config/sh/sh-mem.cc	(revision 229290)
+++ gcc/config/sh/sh-mem.cc	(working copy)
@@ -108,29 +108,30 @@ 
 	  rtx r4 = gen_rtx_REG (SImode, 4);
 	  rtx r5 = gen_rtx_REG (SImode, 5);
 
-	  function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
+	  rtx lab = function_symbol (func_addr_rtx, "__movmemSI12_i4",
+				     SFUNC_STATIC).lab;
 	  force_into (XEXP (operands[0], 0), r4);
 	  force_into (XEXP (operands[1], 0), r5);
-	  emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+	  emit_insn (gen_block_move_real_i4 (func_addr_rtx, lab));
 	  return true;
 	}
       else if (! optimize_size)
 	{
-	  const char *entry_name;
 	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
-	  int dwords;
 	  rtx r4 = gen_rtx_REG (SImode, 4);
 	  rtx r5 = gen_rtx_REG (SImode, 5);
 	  rtx r6 = gen_rtx_REG (SImode, 6);
 
-	  entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
-	  function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
+	  rtx lab = function_symbol (func_addr_rtx, bytes & 4
+						    ? "__movmem_i4_odd"
+						    : "__movmem_i4_even",
+				     SFUNC_STATIC).lab;
 	  force_into (XEXP (operands[0], 0), r4);
 	  force_into (XEXP (operands[1], 0), r5);
 
-	  dwords = bytes >> 3;
+	  int dwords = bytes >> 3;
 	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
-	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
 	  return true;
 	}
       else
@@ -144,10 +145,10 @@ 
       rtx r5 = gen_rtx_REG (SImode, 5);
 
       sprintf (entry, "__movmemSI%d", bytes);
-      function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
+      rtx lab = function_symbol (func_addr_rtx, entry, SFUNC_STATIC).lab;
       force_into (XEXP (operands[0], 0), r4);
       force_into (XEXP (operands[1], 0), r5);
-      emit_insn (gen_block_move_real (func_addr_rtx));
+      emit_insn (gen_block_move_real (func_addr_rtx, lab));
       return true;
     }
 
@@ -161,7 +162,7 @@ 
       rtx r5 = gen_rtx_REG (SImode, 5);
       rtx r6 = gen_rtx_REG (SImode, 6);
 
-      function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
+      rtx lab = function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC).lab;
       force_into (XEXP (operands[0], 0), r4);
       force_into (XEXP (operands[1], 0), r5);
 
@@ -174,7 +175,7 @@ 
       final_switch = 16 - ((bytes / 4) % 16);
       while_loop = ((bytes / 4) / 16 - 1) * 16;
       emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
-      emit_insn (gen_block_lump_real (func_addr_rtx));
+      emit_insn (gen_block_lump_real (func_addr_rtx, lab));
       return true;
     }
 
Index: gcc/config/sh/sh-protos.h
===================================================================
--- gcc/config/sh/sh-protos.h	(revision 229290)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -377,7 +377,19 @@ 
 extern void sh_pr_interrupt (struct cpp_reader *);
 extern void sh_pr_trapa (struct cpp_reader *);
 extern void sh_pr_nosave_low_regs (struct cpp_reader *);
-extern rtx function_symbol (rtx, const char *, enum sh_function_kind);
+
+struct function_symbol_result
+{
+  function_symbol_result (void) : sym (NULL), lab (NULL) { }
+  function_symbol_result (rtx s, rtx l) : sym (s), lab (l) { }
+
+  rtx sym;
+  rtx lab;
+};
+
+extern function_symbol_result function_symbol (rtx, const char *,
+					       sh_function_kind);
+extern rtx sh_get_fdpic_reg_initial_val (void);
 extern rtx sh_get_pr_initial_val (void);
 
 extern void sh_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree,
@@ -396,4 +408,5 @@ 
 extern machine_mode sh_hard_regno_caller_save_mode (unsigned int, unsigned int,
 						    machine_mode);
 extern bool sh_can_use_simple_return_p (void);
+extern rtx sh_load_function_descriptor (rtx);
 #endif /* ! GCC_SH_PROTOS_H */
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 229290)
+++ gcc/config/sh/sh.c	(working copy)
@@ -251,6 +251,7 @@ 
 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
 				HOST_WIDE_INT, tree);
 static void sh_file_start (void);
+static bool sh_assemble_integer (rtx, unsigned int, int);
 static bool flow_dependent_p (rtx, rtx);
 static void flow_dependent_p_1 (rtx, const_rtx, void *);
 static int shiftcosts (rtx);
@@ -259,6 +260,7 @@ 
 static int multcosts (rtx);
 static bool unspec_caller_rtx_p (rtx);
 static bool sh_cannot_copy_insn_p (rtx_insn *);
+static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
 static int sh_pr_n_sets (void);
@@ -404,6 +406,9 @@ 
 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
 
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER sh_assemble_integer
+
 #undef TARGET_REGISTER_MOVE_COST
 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
 
@@ -662,6 +667,9 @@ 
 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
 
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
@@ -979,6 +987,13 @@ 
   if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
     TARGET_ZDCBRANCH = 1;
 
+  /* FDPIC code is a special form of PIC, and the vast majority of code
+     generation constraints that apply to PIC also apply to FDPIC, so we
+     set flag_pic to avoid the need to check TARGET_FDPIC everywhere
+     flag_pic is checked. */
+  if (TARGET_FDPIC && !flag_pic)
+    flag_pic = 2;
+
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (! VALID_REGISTER_P (regno))
       sh_register_names[regno][0] = '\0';
@@ -1670,6 +1685,14 @@ 
 	  output_addr_const (file, XVECEXP (x, 0, 1));
 	  fputs ("-.)", file);
 	  break;
+	case UNSPEC_GOTFUNCDESC:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTFUNCDESC", file);
+	  break;
+	case UNSPEC_GOTOFFFUNCDESC:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTOFFFUNCDESC", file);
+	  break;
 	default:
 	  return false;
 	}
@@ -1854,6 +1877,9 @@ 
 	    {
 	    case TLS_MODEL_GLOBAL_DYNAMIC:
 	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      if (TARGET_FDPIC)
+		emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+				sh_get_fdpic_reg_initial_val ());
 	      emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
 	      tmp = gen_reg_rtx (Pmode);
 	      emit_move_insn (tmp, tga_ret);
@@ -1862,6 +1888,9 @@ 
 
 	    case TLS_MODEL_LOCAL_DYNAMIC:
 	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      if (TARGET_FDPIC)
+		emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+				sh_get_fdpic_reg_initial_val ());
 	      emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
 
 	      tmp = gen_reg_rtx (Pmode);
@@ -1879,6 +1908,9 @@ 
 	    case TLS_MODEL_INITIAL_EXEC:
 	      tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
 	      tmp = gen_sym2GOTTPOFF (op1);
+	      if (TARGET_FDPIC)
+		emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
+				sh_get_fdpic_reg_initial_val ());
 	      emit_insn (gen_tls_initial_exec (tga_op1, tmp));
 	      op1 = tga_op1;
 	      break;
@@ -1905,6 +1937,22 @@ 
 	  operands[1] = op1;
 	}
     }
+
+  if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      rtx base, offset;
+      split_const (operands[1], &base, &offset);
+
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	{
+	  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
+	  emit_move_insn (tmp, base);
+	  if (!arith_operand (offset, mode))
+	    offset = force_reg (mode, offset);
+	  emit_insn (gen_add3_insn (operands[0], tmp, offset));
+	}
+    }
 }
 
 /* Implement the canonicalize_comparison target hook for the combine
@@ -3009,6 +3057,24 @@ 
     }
 }
 
+/* Implementation of TARGET_ASM_INTEGER for SH.  Pointers to functions
+   need to be output as pointers to function descriptors for
+   FDPIC.  */
+
+static bool
+sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
+{
+  if (TARGET_FDPIC && size == UNITS_PER_WORD
+      && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
+    {
+      fputs ("\t.long\t", asm_out_file);
+      output_addr_const (asm_out_file, value);
+      fputs ("@FUNCDESC\n", asm_out_file);
+      return true;
+    }
+  return default_assemble_integer (value, size, aligned_p);
+}
+
 /* Check if PAT includes UNSPEC_CALLER unspec pattern.  */
 static bool
 unspec_caller_rtx_p (rtx pat)
@@ -3044,6 +3110,17 @@ 
     return false;
 
   pat = PATTERN (insn);
+
+  if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
+    return false;
+
+  if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
+    {
+      rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
+      if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
+	return true;
+    }
+
   if (GET_CODE (pat) != SET)
     return false;
   pat = SET_SRC (pat);
@@ -4085,8 +4162,8 @@ 
   /* Load the value into an arg reg and call a helper.  */
   emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
   sprintf (func, "__ashiftrt_r4_%d", value);
-  function_symbol (wrk, func, SFUNC_STATIC);
-  emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+  rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
+  emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
   emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
   return true;
 }
@@ -7937,7 +8014,8 @@ 
       stack_usage += d;
     }
 
-  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+  if (flag_pic && !TARGET_FDPIC
+      && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
     emit_insn (gen_GOTaddr2picreg (const0_rtx));
 
   if (SHMEDIA_REGS_STACK_ADJUST ())
@@ -10438,7 +10516,9 @@ 
 	  || XINT (x, 1) == UNSPEC_PLT
 	  || XINT (x, 1) == UNSPEC_PCREL
 	  || XINT (x, 1) == UNSPEC_SYMOFF
-	  || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
+	  || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
+	  || XINT (x, 1) == UNSPEC_GOTFUNCDESC
+	  || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
     return false;
 
   fmt = GET_RTX_FORMAT (GET_CODE (x));
@@ -10473,7 +10553,26 @@ 
       if (reg == NULL_RTX)
 	reg = gen_reg_rtx (Pmode);
 
-      emit_insn (gen_symGOTOFF2reg (reg, orig));
+      if (TARGET_FDPIC
+	  && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
+	{
+	  /* Weak functions may be NULL which doesn't work with
+	     GOTOFFFUNCDESC because the runtime offset is not known.  */
+	  if (SYMBOL_REF_WEAK (orig))
+	    emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+	  else
+	    emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
+	}
+      else if (TARGET_FDPIC
+	       && (GET_CODE (orig) == LABEL_REF
+		   || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
+		       && (TREE_READONLY (SYMBOL_REF_DECL (orig))
+			   || SYMBOL_REF_EXTERNAL_P (orig)
+			   || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
+	/* In FDPIC, GOTOFF can only be used for writable data.  */
+	emit_insn (gen_symGOT2reg (reg, orig));
+      else
+	emit_insn (gen_symGOTOFF2reg (reg, orig));
       return reg;
     }
   else if (GET_CODE (orig) == SYMBOL_REF)
@@ -10481,7 +10580,10 @@ 
       if (reg == NULL_RTX)
 	reg = gen_reg_rtx (Pmode);
 
-      emit_insn (gen_symGOT2reg (reg, orig));
+      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
+	emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
+      else
+	emit_insn (gen_symGOT2reg (reg, orig));
       return reg;
     }
   return orig;
@@ -11519,9 +11621,40 @@ 
    5 0008 00000000 	l1:  	.long   area
    6 000c 00000000 	l2:	.long   function
 
+   FDPIC needs a form that includes a function descriptor and
+   code to load the GOT register:
+   0 0000 00000000		.long	l0
+   1 0004 00000000		.long	gotval
+   2 0008 D302    	l0:	mov.l	l1,r3
+   3 000a D203    		mov.l	l2,r2
+   4 000c 6122    		mov.l	@r2,r1
+   5 000e 5C21    		mov.l	@(4,r2),r12
+   6 0010 412B    		jmp	@r1
+   7 0012 0009    		nop
+   8 0014 00000000	l1:	.long	area
+   9 0018 00000000	l2:	.long	function
+
    SH5 (compact) uses r1 instead of r3 for the static chain.  */
 
+/* Emit insns to store a value at memory address + offset.  */
+static void
+sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
+{
+  gcc_assert ((offset & 3) == 0);
+  emit_move_insn (offset == 0
+		  ? change_address (addr, SImode, NULL_RTX)
+		  : adjust_address (addr, SImode, offset), value);
+}
 
+/* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2.  */
+static void
+sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
+{
+  sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
+					       ? (w0 | (w1 << 16))
+					       : (w1 | (w0 << 16)), SImode));
+}
+
 /* Emit RTL insns to initialize the variable parts of a trampoline.
    FNADDR is an RTX for the address of the function's pure code.
    CXT is an RTX for the static chain value for the function.  */
@@ -11655,20 +11788,34 @@ 
       emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
       return;
     }
-  emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
-		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
-				SImode));
-  emit_move_insn (adjust_address (tramp_mem, SImode, 4),
-		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
-				SImode));
-  emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
-  emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+  if (TARGET_FDPIC)
+    {
+      rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
+
+      sh_emit_storesi (tramp_mem, 0, a);
+      sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
+
+      sh_emit_storehi (tramp_mem,  8, 0xd302, 0xd203);
+      sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
+      sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
+
+      sh_emit_storesi (tramp_mem, 20, cxt);
+      sh_emit_storesi (tramp_mem, 24, fnaddr);
+    }
+  else
+    {
+      sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
+      sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
+
+      sh_emit_storesi (tramp_mem,  8, cxt);
+      sh_emit_storesi (tramp_mem, 12, fnaddr);
+    }
   if (TARGET_HARD_SH4 || TARGET_SH5)
     {
       if (!TARGET_INLINE_IC_INVALIDATE
 	  || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
 	emit_library_call (function_symbol (NULL, "__ic_invalidate",
-					    FUNCTION_ORDINARY),
+					    FUNCTION_ORDINARY).sym,
 			   LCT_NORMAL, VOIDmode, 1, tramp, SImode);
       else
 	emit_insn (gen_ic_invalidate_line (tramp));
@@ -11698,7 +11845,7 @@ 
 	  && (! TARGET_SHCOMPACT
 	      || crtl->args.info.stack_regs == 0)
 	  && ! sh_cfun_interrupt_handler_p ()
-	  && (! flag_pic
+	  && (! flag_pic || TARGET_FDPIC
 	      || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
 	      || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
 }
@@ -11712,7 +11859,7 @@ 
 
   if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
     emit_insn (gen_sym_label2reg (reg, sym, lab));
-  else if (sibcall_p)
+  else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
     emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
   else
     emit_insn (gen_symPLT_label2reg (reg, sym, lab));
@@ -12715,8 +12862,16 @@ 
 #endif
   if (TARGET_SH2 && flag_pic)
     {
-      sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
-      XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+      if (TARGET_FDPIC)
+	{
+	  sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
+	  XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
+	}
+      else
+	{
+	  sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
+	  XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+	}
     }
   else
     {
@@ -12757,17 +12912,25 @@ 
   epilogue_completed = 0;
 }
 
-rtx
-function_symbol (rtx target, const char *name, enum sh_function_kind kind)
+/* Return an RTX pair for the address and call site label of a function
+   NAME of kind KIND, placing the result in TARGET if not NULL.  For
+   SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
+   (const_int 0) if jsr should be used, or a label_ref if bsrf should
+   be used.  For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
+   address of the function itself, not a function descriptor, so they
+   can only be used with functions not using the FDPIC register that
+   are known to be called directory without a PLT entry.  */
+
+function_symbol_result
+function_symbol (rtx target, const char *name, sh_function_kind kind)
 {
-  rtx sym;
-
   /* If this is not an ordinary function, the name usually comes from a
      string literal or an sprintf buffer.  Make sure we use the same
      string consistently, so that cse will be able to unify address loads.  */
   if (kind != FUNCTION_ORDINARY)
     name = IDENTIFIER_POINTER (get_identifier (name));
-  sym = gen_rtx_SYMBOL_REF (Pmode, name);
+  rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
+  rtx lab = const0_rtx;
   SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
   if (flag_pic)
     switch (kind)
@@ -12784,14 +12947,25 @@ 
 	}
       case SFUNC_STATIC:
 	{
-	  /* ??? To allow cse to work, we use GOTOFF relocations.
-	     We could add combiner patterns to transform this into
-	     straight pc-relative calls with sym2PIC / bsrf when
-	     label load and function call are still 1:1 and in the
-	     same basic block during combine.  */
 	  rtx reg = target ? target : gen_reg_rtx (Pmode);
 
-	  emit_insn (gen_symGOTOFF2reg (reg, sym));
+	  if (TARGET_FDPIC)
+	    {
+	      /* We use PC-relative calls, since GOTOFF can only refer
+		 to writable data.  This works along with sh_sfunc_call.  */
+ 	      lab = PATTERN (gen_call_site ());
+	      emit_insn (gen_sym_label2reg (reg, sym, lab));
+	    }
+	  else
+	    {
+	      /* ??? To allow cse to work, we use GOTOFF relocations.
+		 we could add combiner patterns to transform this into
+		 straight pc-relative calls with sym2PIC / bsrf when
+		 label load and function call are still 1:1 and in the
+		 same basic block during combine.  */
+	      emit_insn (gen_symGOTOFF2reg (reg, sym));
+	    }
+
 	  sym = reg;
 	  break;
 	}
@@ -12799,9 +12973,9 @@ 
   if (target && sym != target)
     {
       emit_move_insn (target, sym);
-      return target;
+      return function_symbol_result (target, lab);
     }
-  return sym;
+  return function_symbol_result (sym, lab);
 }
 
 /* Find the number of a general purpose register in S.  */
@@ -13414,6 +13588,12 @@ 
       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
     }
+  if (TARGET_FDPIC)
+    {
+      fixed_regs[PIC_REG] = 1;
+      call_used_regs[PIC_REG] = 1;
+      call_really_used_regs[PIC_REG] = 1;
+    }
   /* Renesas saves and restores mac registers on call.  */
   if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
     {
@@ -13442,14 +13622,32 @@ 
 static bool
 sh_legitimate_constant_p (machine_mode mode, rtx x)
 {
-  return (TARGET_SHMEDIA
-	  ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
-	     || x == CONST0_RTX (mode)
-	     || !TARGET_SHMEDIA_FPU
-	     || TARGET_SHMEDIA64)
-	  : (GET_CODE (x) != CONST_DOUBLE
-	     || mode == DFmode || mode == SFmode
-	     || mode == DImode || GET_MODE (x) == VOIDmode));
+  if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      rtx base, offset;
+      split_const (x, &base, &offset);
+
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+       return false;
+    }
+
+  if (TARGET_FDPIC
+      && (SYMBOLIC_CONST_P (x)
+	  || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+	      && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
+    return false;
+
+  if (TARGET_SHMEDIA
+      && ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
+	  || x == CONST0_RTX (mode)
+	  || !TARGET_SHMEDIA_FPU
+	  || TARGET_SHMEDIA64))
+    return false;
+
+  return GET_CODE (x) != CONST_DOUBLE
+	 || mode == DFmode || mode == SFmode
+	 || mode == DImode || GET_MODE (x) == VOIDmode;
 }
 
 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
@@ -14540,4 +14738,41 @@ 
     }
 }
 
+bool
+sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
+			     rtx x ATTRIBUTE_UNUSED)
+{
+  return TARGET_FDPIC;
+}
+
+/* Emit insns to load the function address from FUNCDESC (an FDPIC
+   function descriptor) into r1 and the GOT address into r12,
+   returning an rtx for r1.  */
+
+rtx
+sh_load_function_descriptor (rtx funcdesc)
+{
+  rtx r1 = gen_rtx_REG (Pmode, R1_REG);
+  rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+  rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
+  rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
+
+  emit_move_insn (r1, fnaddr);
+  /* The ABI requires the entry point address to be loaded first, so
+     prevent the load from being moved after that of the GOT
+     address.  */
+  emit_insn (gen_blockage ());
+  emit_move_insn (pic_reg, gotaddr);
+  return r1;
+}
+
+/* Return an rtx holding the initial value of the FDPIC register (the
+   FDPIC pointer passed in from the caller).  */
+
+rtx
+sh_get_fdpic_reg_initial_val (void)
+{
+  return get_hard_reg_initial_val (Pmode, PIC_REG);
+}
+
 #include "gt-sh.h"
Index: gcc/config/sh/sh.h
===================================================================
--- gcc/config/sh/sh.h	(revision 229290)
+++ gcc/config/sh/sh.h	(working copy)
@@ -316,7 +316,7 @@ 
 #endif
 
 #ifndef SUBTARGET_ASM_SPEC
-#define SUBTARGET_ASM_SPEC ""
+#define SUBTARGET_ASM_SPEC "%{mfdpic:--fdpic}"
 #endif
 
 #if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
@@ -344,7 +344,7 @@ 
 #define ASM_ISA_DEFAULT_SPEC ""
 #endif /* MASK_SH5 */
 
-#define SUBTARGET_LINK_EMUL_SUFFIX ""
+#define SUBTARGET_LINK_EMUL_SUFFIX "%{mfdpic:_fd}"
 #define SUBTARGET_LINK_SPEC ""
 
 /* Go via SH_LINK_SPEC to avoid code replication.  */
@@ -378,9 +378,19 @@ 
 "%{m2a*:%eSH2a does not support little-endian}}"
 #endif
 
+#ifdef FDPIC_DEFAULT
+#define FDPIC_SELF_SPECS "%{!mno-fdpic:-mfdpic}"
+#else
+#define FDPIC_SELF_SPECS
+#endif
+
 #undef DRIVER_SELF_SPECS
-#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A
+#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A SUBTARGET_DRIVER_SELF_SPECS \
+  FDPIC_SELF_SPECS
 
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS
+
 #define ASSEMBLER_DIALECT assembler_dialect
 
 extern int assembler_dialect;
@@ -937,6 +947,10 @@ 
    code access to data items.  */
 #define PIC_OFFSET_TABLE_REGNUM	(flag_pic ? PIC_REG : INVALID_REGNUM)
 
+/* For FDPIC, the FDPIC register is call-clobbered (otherwise PLT
+   entries would need to handle saving and restoring it).  */
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED TARGET_FDPIC
+
 #define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
 
 /* Definitions for register eliminations.
@@ -1561,7 +1575,8 @@ 
    6 000c 00000000 	l2:	.long   function  */
 
 /* Length in units of the trampoline for entering a nested function.  */
-#define TRAMPOLINE_SIZE  (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
+#define TRAMPOLINE_SIZE \
+  (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : TARGET_FDPIC ? 32 : 16)
 
 /* Alignment required for a trampoline in bits.  */
 #define TRAMPOLINE_ALIGNMENT \
@@ -1617,6 +1632,10 @@ 
       || GENERAL_REGISTER_P ((unsigned) reg_renumber[(REGNO)])) \
    : (REGNO) == R0_REG || (unsigned) reg_renumber[(REGNO)] == R0_REG)
 
+/* True if SYMBOL + OFFSET constants must refer to something within
+   SYMBOL's section.  */
+#define SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P TARGET_FDPIC
+
 /* Maximum number of registers that can appear in a valid memory
    address.  */
 #define MAX_REGS_PER_ADDRESS 2
@@ -2257,9 +2276,11 @@ 
 /* We have to distinguish between code and data, so that we apply
    datalabel where and only where appropriate.  Use sdataN for data.  */
 #define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
- ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
-  | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr) \
-  | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
+  ((TARGET_FDPIC \
+    ? ((GLOBAL) ? DW_EH_PE_indirect | DW_EH_PE_datarel : DW_EH_PE_pcrel) \
+    : ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
+       | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr))) \
+   | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
 
 /* Handle special EH pointer encodings.  Absolute, pc-relative, and
    indirect are handled automatically.  */
@@ -2272,6 +2293,17 @@ 
 	SYMBOL_REF_FLAGS (ADDR) |= SYMBOL_FLAG_FUNCTION; \
 	if (0) goto DONE; \
       } \
+    if (TARGET_FDPIC \
+	&& ((ENCODING) & 0xf0) == (DW_EH_PE_indirect | DW_EH_PE_datarel)) \
+      { \
+	fputs ("\t.ualong ", FILE); \
+	output_addr_const (FILE, ADDR); \
+	if (GET_CODE (ADDR) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (ADDR)) \
+	  fputs ("@GOTFUNCDESC", FILE); \
+	else \
+	  fputs ("@GOT", FILE); \
+	goto DONE; \
+      } \
   } while (0)
 
 #if (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 229290)
+++ gcc/config/sh/sh.md	(working copy)
@@ -170,6 +170,9 @@ 
   UNSPEC_SYMOFF
   ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
   UNSPEC_PCREL_SYMOFF
+  ;; For FDPIC
+  UNSPEC_GOTFUNCDESC
+  UNSPEC_GOTOFFFUNCDESC
   ;; Misc builtins
   UNSPEC_BUILTIN_STRLEN
 ])
@@ -2591,15 +2594,18 @@ 
 ;; This reload would clobber the value in r0 we are trying to store.
 ;; If we let reload allocate r0, then this problem can never happen.
 (define_insn "udivsi3_i1"
-  [(set (match_operand:SI 0 "register_operand" "=z")
+  [(set (match_operand:SI 0 "register_operand" "=z,z")
 	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
    (clobber (reg:SI R1_REG))
    (clobber (reg:SI R4_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))]
   "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
-  "jsr	@%1%#"
+  "@
+	jsr	@%1%#
+	bsrf	%1\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -2648,7 +2654,7 @@ 
 })
 
 (define_insn "udivsi3_i4"
-  [(set (match_operand:SI 0 "register_operand" "=y")
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
 	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
@@ -2660,16 +2666,19 @@ 
    (clobber (reg:SI R4_REG))
    (clobber (reg:SI R5_REG))
    (clobber (reg:SI FPSCR_STAT_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))
    (use (reg:SI FPSCR_MODES_REG))]
   "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
-  "jsr	@%1%#"
+  "@
+	jsr	@%1%#
+	bsrf	%1\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "fp_mode" "double")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "udivsi3_i4_single"
-  [(set (match_operand:SI 0 "register_operand" "=y")
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
 	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
@@ -2680,10 +2689,13 @@ 
    (clobber (reg:SI R1_REG))
    (clobber (reg:SI R4_REG))
    (clobber (reg:SI R5_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))]
   "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
    && TARGET_FPU_SINGLE"
-  "jsr	@%1%#"
+  "@
+	jsr	@%1%#
+	bsrf	%1\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -2742,11 +2754,11 @@ 
     }
   else if (TARGET_DIVIDE_CALL_FP)
     {
-      function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC);
+      rtx lab = function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC).lab;
       if (TARGET_FPU_SINGLE)
-	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+	last = gen_udivsi3_i4_single (operands[0], operands[3], lab);
       else
-	last = gen_udivsi3_i4 (operands[0], operands[3]);
+	last = gen_udivsi3_i4 (operands[0], operands[3], lab);
     }
   else if (TARGET_SHMEDIA_FPU)
     {
@@ -2771,14 +2783,14 @@ 
       if (TARGET_SHMEDIA)
 	last = gen_udivsi3_i1_media (operands[0], operands[3]);
       else if (TARGET_FPU_ANY)
-	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+	last = gen_udivsi3_i4_single (operands[0], operands[3], const0_rtx);
       else
-	last = gen_udivsi3_i1 (operands[0], operands[3]);
+	last = gen_udivsi3_i1 (operands[0], operands[3], const0_rtx);
     }
   else
     {
-      function_symbol (operands[3], "__udivsi3", SFUNC_STATIC);
-      last = gen_udivsi3_i1 (operands[0], operands[3]);
+      rtx lab = function_symbol (operands[3], "__udivsi3", SFUNC_STATIC).lab;
+      last = gen_udivsi3_i1 (operands[0], operands[3], lab);
     }
   emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
   emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
@@ -2906,7 +2918,7 @@ 
       emit_move_insn (gen_rtx_REG (DImode, R20_REG), x);
       break;
     }
-  sym = function_symbol (NULL, name, kind);
+  sym = function_symbol (NULL, name, kind).sym;
   emit_insn (gen_divsi3_media_2 (operands[0], sym));
   DONE;
 }
@@ -2926,31 +2938,37 @@ 
 })
 
 (define_insn "divsi3_i4"
-  [(set (match_operand:SI 0 "register_operand" "=y")
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
 	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI PR_REG))
    (clobber (reg:DF DR0_REG))
    (clobber (reg:DF DR2_REG))
    (clobber (reg:SI FPSCR_STAT_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))
    (use (reg:SI FPSCR_MODES_REG))]
   "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
-  "jsr	@%1%#"
+  "@
+	jsr	@%1%#
+	bsrf	%1\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "fp_mode" "double")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "divsi3_i4_single"
-  [(set (match_operand:SI 0 "register_operand" "=y")
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
 	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI PR_REG))
    (clobber (reg:DF DR0_REG))
    (clobber (reg:DF DR2_REG))
    (clobber (reg:SI R2_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))]
   "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
    && TARGET_FPU_SINGLE"
-  "jsr	@%1%#"
+  "@
+	jsr	@%1%#
+	bsrf	%1\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -2994,11 +3012,12 @@ 
     }
   else if (TARGET_DIVIDE_CALL_FP)
     {
-      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+      rtx lab = function_symbol (operands[3], sh_divsi3_libfunc,
+				 SFUNC_STATIC).lab;
       if (TARGET_FPU_SINGLE)
-	last = gen_divsi3_i4_single (operands[0], operands[3]);
+	last = gen_divsi3_i4_single (operands[0], operands[3], lab);
       else
-	last = gen_divsi3_i4 (operands[0], operands[3]);
+	last = gen_divsi3_i4 (operands[0], operands[3], lab);
     }
   else if (TARGET_SH2A)
     {
@@ -3113,7 +3132,7 @@ 
 	last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media)
 		(operands[0], operands[3]));
       else if (TARGET_FPU_ANY)
-	last = gen_divsi3_i4_single (operands[0], operands[3]);
+	last = gen_divsi3_i4_single (operands[0], operands[3], const0_rtx);
       else
 	last = gen_divsi3_i1 (operands[0], operands[3]);
     }
@@ -3713,7 +3732,7 @@ 
     {
       /* The address must be set outside the libcall,
 	 since it goes into a pseudo.  */
-      rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC);
+      rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC).sym;
       rtx addr = force_reg (SImode, sym);
       rtx insns = gen_mulsi3_call (operands[0], operands[1],
 				   operands[2], addr);
@@ -4970,8 +4989,8 @@ 
     {
       emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
       rtx funcaddr = gen_reg_rtx (Pmode);
-      function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC);
-      emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr));
+      rtx lab = function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC).lab;
+      emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr, lab));
 
       DONE;
     }
@@ -5024,15 +5043,18 @@ 
 ;; In order to make combine understand the truncation of the shift amount
 ;; operand we have to allow it to use pseudo regs for the shift operands.
 (define_insn "ashlsi3_d_call"
-  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
 	(ashift:SI (reg:SI R4_REG)
-		   (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+		   (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
 			   (const_int 31))))
-   (use (match_operand:SI 2 "arith_reg_operand" "r"))
+   (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+   (use (match_operand 3 "" "Z,Ccl"))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))]
   "TARGET_SH1 && !TARGET_DYNSHIFT"
-  "jsr	@%2%#"
+  "@
+	jsr	@%2%#
+	bsrf	%2\n%O3:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -5374,12 +5396,15 @@ 
 (define_insn "ashrsi3_n"
   [(set (reg:SI R4_REG)
 	(ashiftrt:SI (reg:SI R4_REG)
-		     (match_operand:SI 0 "const_int_operand" "i")))
+		     (match_operand:SI 0 "const_int_operand" "i,i")))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
+   (use (match_operand 2 "" "Z,Ccl"))]
   "TARGET_SH1"
-  "jsr	@%1%#"
+  "@
+	jsr	@%1%#
+	bsrf	%1\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -5532,8 +5557,8 @@ 
     {
       emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
       rtx funcaddr = gen_reg_rtx (Pmode);
-      function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC);
-      emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr));
+      rtx lab = function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC).lab;
+      emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr, lab));
       DONE;
     }
 })
@@ -5585,15 +5610,18 @@ 
 ;; In order to make combine understand the truncation of the shift amount
 ;; operand we have to allow it to use pseudo regs for the shift operands.
 (define_insn "lshrsi3_d_call"
-  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,z")
 	(lshiftrt:SI (reg:SI R4_REG)
-		     (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+		     (and:SI (match_operand:SI 1 "arith_reg_operand" "z,z")
 			     (const_int 31))))
-   (use (match_operand:SI 2 "arith_reg_operand" "r"))
+   (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
+   (use (match_operand 3 "" "Z,Ccl"))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))]
   "TARGET_SH1 && !TARGET_DYNSHIFT"
-  "jsr	@%2%#"
+  "@
+	jsr	@%2%#
+	bsrf	%2\n%O3:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -7315,7 +7343,7 @@ 
     }
   else if (TARGET_SHCOMPACT)
     {
-      operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC);
+      operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC).sym;
       operands[1] = force_reg (Pmode, operands[1]);
       emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
       DONE;
@@ -7397,7 +7425,7 @@ 
 
   tramp = force_reg (Pmode, operands[0]);
   sfun = force_reg (Pmode, function_symbol (NULL, "__init_trampoline",
-					    SFUNC_STATIC));
+					    SFUNC_STATIC).sym);
   emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
   emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
 
@@ -9455,9 +9483,9 @@ 
 	 (match_operand 1 "" ""))
    (use (reg:SI FPSCR_MODES_REG))
    (clobber (reg:SI PR_REG))]
-  "TARGET_SH1"
+  "TARGET_SH1 && !TARGET_FDPIC"
 {
-  if (TARGET_SH2A && (dbr_sequence_length () == 0))
+  if (TARGET_SH2A && dbr_sequence_length () == 0)
     return "jsr/n	@%0";
   else
     return "jsr	@%0%#";
@@ -9469,6 +9497,26 @@ 
    (set_attr "needs_delay_slot" "yes")
    (set_attr "fp_set" "unknown")])
 
+(define_insn "calli_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_FDPIC"
+{
+  if (TARGET_SH2A && dbr_sequence_length () == 0)
+    return "jsr/n	@%0";
+  else
+    return "jsr	@%0%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
 ;; This is TBR relative jump instruction for SH2A architecture.
 ;; Its use is enabled by assigning an attribute "function_vector"
 ;; and the vector number to a function during its declaration.
@@ -9584,9 +9632,9 @@ 
 	      (match_operand 2 "" "")))
    (use (reg:SI FPSCR_MODES_REG))
    (clobber (reg:SI PR_REG))]
-  "TARGET_SH1"
+  "TARGET_SH1 && !TARGET_FDPIC"
 {
-  if (TARGET_SH2A && (dbr_sequence_length () == 0))
+  if (TARGET_SH2A && dbr_sequence_length () == 0)
     return "jsr/n	@%1";
   else
     return "jsr	@%1%#";
@@ -9598,6 +9646,27 @@ 
    (set_attr "needs_delay_slot" "yes")
    (set_attr "fp_set" "unknown")])
 
+(define_insn "call_valuei_fdpic"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2)))
+   (use (reg:SI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_FDPIC"
+{
+  if (TARGET_SH2A && dbr_sequence_length () == 0)
+    return "jsr/n	@%1";
+  else
+    return "jsr	@%1%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
 ;; This is TBR relative jump instruction for SH2A architecture.
 ;; Its use is enabled by assigning an attribute "function_vector"
 ;; and the vector number to a function during its declaration.
@@ -9721,6 +9790,12 @@ 
 	      (clobber (reg:SI PR_REG))])]
   ""
 {
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+    }
+
   if (TARGET_SHMEDIA)
     {
       operands[0] = shmedia_prepare_call_address (operands[0], 0);
@@ -9755,8 +9830,8 @@ 
 	 run out of registers when adjusting fpscr for the call.  */
       emit_insn (gen_force_mode_for_call ());
 
-      operands[0]
-	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+				     SFUNC_GOT).sym;
       operands[0] = force_reg (SImode, operands[0]);
 
       emit_move_insn (r0, func);
@@ -9804,7 +9879,13 @@ 
     operands[1] = operands[2];
   }
 
-  emit_call_insn (gen_calli (operands[0], operands[1]));
+  if (TARGET_FDPIC)
+    {
+      operands[0] = sh_load_function_descriptor (operands[0]);
+      emit_call_insn (gen_calli_fdpic (operands[0], operands[1]));
+    }
+  else
+    emit_call_insn (gen_calli (operands[0], operands[1]));
   DONE;
 })
 
@@ -9884,7 +9965,7 @@ 
   emit_insn (gen_force_mode_for_call ());
 
   operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
-				 SFUNC_GOT);
+				 SFUNC_GOT).sym;
   operands[0] = force_reg (SImode, operands[0]);
 
   emit_move_insn (r0, func);
@@ -9909,6 +9990,12 @@ 
 	      (clobber (reg:SI PR_REG))])]
   ""
 {
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+    }
+
   if (TARGET_SHMEDIA)
     {
       operands[1] = shmedia_prepare_call_address (operands[1], 0);
@@ -9944,8 +10031,8 @@ 
 	 run out of registers when adjusting fpscr for the call.  */
       emit_insn (gen_force_mode_for_call ());
 
-      operands[1]
-	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+				     SFUNC_GOT).sym;
       operands[1] = force_reg (SImode, operands[1]);
 
       emit_move_insn (r0, func);
@@ -9993,7 +10080,14 @@ 
   else
     operands[1] = force_reg (SImode, XEXP (operands[1], 0));
 
-  emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
+  if (TARGET_FDPIC)
+    {
+      operands[1] = sh_load_function_descriptor (operands[1]);
+      emit_call_insn (gen_call_valuei_fdpic (operands[0], operands[1],
+					     operands[2]));
+    }
+  else
+    emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
   DONE;
 })
 
@@ -10002,7 +10096,7 @@ 
 	 (match_operand 1 "" ""))
    (use (reg:SI FPSCR_MODES_REG))
    (return)]
-  "TARGET_SH1"
+  "TARGET_SH1 && !TARGET_FDPIC"
   "jmp	@%0%#"
   [(set_attr "needs_delay_slot" "yes")
    (set (attr "fp_mode")
@@ -10010,6 +10104,20 @@ 
 		      (const_string "single") (const_string "double")))
    (set_attr "type" "jump_ind")])
 
+(define_insn "sibcalli_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "k"))
+	 (match_operand 1))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (return)]
+  "TARGET_FDPIC"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
 (define_insn "sibcalli_pcrel"
   [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
 	 (match_operand 1 "" ""))
@@ -10016,7 +10124,7 @@ 
    (use (match_operand 2 "" ""))
    (use (reg:SI FPSCR_MODES_REG))
    (return)]
-  "TARGET_SH2"
+  "TARGET_SH2 && !TARGET_FDPIC"
 {
   return       "braf	%0"	"\n"
 	 "%O2:%#";
@@ -10027,6 +10135,24 @@ 
 		      (const_string "single") (const_string "double")))
    (set_attr "type" "jump_ind")])
 
+(define_insn "sibcalli_pcrel_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
+	 (match_operand 1))
+   (use (match_operand 2))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (return)]
+  "TARGET_SH2 && TARGET_FDPIC"
+{
+  return       "braf	%0"	"\n"
+	 "%O2:%#";
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
 ;; This uses an unspec to describe that the symbol_ref is very close.
 (define_insn "sibcalli_thunk"
   [(call (mem:SI (unspec:SI [(match_operand:SI 0 "symbol_ref_operand" "")]
@@ -10049,7 +10175,7 @@ 
    (use (reg:SI FPSCR_MODES_REG))
    (clobber (match_scratch:SI 2 "=&k"))
    (return)]
-  "TARGET_SH2"
+  "TARGET_SH2 && !TARGET_FDPIC"
   "#"
   "reload_completed"
   [(const_int 0)]
@@ -10069,6 +10195,32 @@ 
 		      (const_string "single") (const_string "double")))
    (set_attr "type" "jump_ind")])
 
+(define_insn_and_split "sibcall_pcrel_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand"))
+	 (match_operand 1))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (match_scratch:SI 2 "=k"))
+   (return)]
+  "TARGET_SH2 && TARGET_FDPIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+
+  sh_expand_sym_label2reg (operands[2], operands[0], lab, true);
+  rtx i = emit_call_insn (gen_sibcalli_pcrel_fdpic (operands[2], operands[1],
+						    copy_rtx (lab)));
+  SIBLING_CALL_P (i) = 1;
+  DONE;
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
 (define_insn "sibcall_compact"
   [(call (mem:SI (match_operand:SI 0 "register_operand" "k,k"))
 	 (match_operand 1 "" ""))
@@ -10113,6 +10265,12 @@ 
      (return)])]
   ""
 {
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+    }
+
   if (TARGET_SHMEDIA)
     {
       operands[0] = shmedia_prepare_call_address (operands[0], 1);
@@ -10157,8 +10315,8 @@ 
 	 run out of registers when adjusting fpscr for the call.  */
       emit_insn (gen_force_mode_for_call ());
 
-      operands[0]
-	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+				     SFUNC_GOT).sym;
       operands[0] = force_reg (SImode, operands[0]);
 
       /* We don't need a return trampoline, since the callee will
@@ -10192,13 +10350,23 @@ 
 	 static functions.  */
       && SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
     {
-      emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
+      if (TARGET_FDPIC)
+	emit_call_insn (gen_sibcall_pcrel_fdpic (XEXP (operands[0], 0),
+						 operands[1]));
+      else
+	emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
       DONE;
     }
   else
     operands[0] = force_reg (SImode, XEXP (operands[0], 0));
 
-  emit_call_insn (gen_sibcalli (operands[0], operands[1]));
+  if (TARGET_FDPIC)
+    {
+      operands[0] = sh_load_function_descriptor (operands[0]);
+      emit_call_insn (gen_sibcalli_fdpic (operands[0], operands[1]));
+    }
+  else
+    emit_call_insn (gen_sibcalli (operands[0], operands[1]));
   DONE;
 })
 
@@ -10208,10 +10376,25 @@ 
 	      (match_operand 2 "" "")))
    (use (reg:SI FPSCR_MODES_REG))
    (return)]
-  "TARGET_SH1"
+  "TARGET_SH1 && !TARGET_FDPIC"
   "jmp	@%1%#"
   [(set_attr "needs_delay_slot" "yes")
    (set (attr "fp_mode")
+       (if_then_else (eq_attr "fpu_single" "yes")
+		     (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_fdpic"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "k"))
+	      (match_operand 2)))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (return)]
+  "TARGET_FDPIC"
+  "jmp	@%1%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
 	(if_then_else (eq_attr "fpu_single" "yes")
 		      (const_string "single") (const_string "double")))
    (set_attr "type" "jump_ind")])
@@ -10223,7 +10406,7 @@ 
    (use (match_operand 3 "" ""))
    (use (reg:SI FPSCR_MODES_REG))
    (return)]
-  "TARGET_SH2"
+  "TARGET_SH2 && !TARGET_FDPIC"
 {
   return       "braf	%1"	"\n"
 	 "%O3:%#";
@@ -10234,6 +10417,25 @@ 
 		      (const_string "single") (const_string "double")))
    (set_attr "type" "jump_ind")])
 
+(define_insn "sibcall_valuei_pcrel_fdpic"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k"))
+	      (match_operand 2)))
+   (use (match_operand 3))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (return)]
+  "TARGET_SH2 && TARGET_FDPIC"
+{
+  return       "braf	%1"	"\n"
+	 "%O3:%#";
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
 (define_insn_and_split "sibcall_value_pcrel"
   [(set (match_operand 0 "" "=rf")
 	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
@@ -10241,7 +10443,7 @@ 
    (use (reg:SI FPSCR_MODES_REG))
    (clobber (match_scratch:SI 3 "=&k"))
    (return)]
-  "TARGET_SH2"
+  "TARGET_SH2 && !TARGET_FDPIC"
   "#"
   "reload_completed"
   [(const_int 0)]
@@ -10263,6 +10465,35 @@ 
 		      (const_string "single") (const_string "double")))
    (set_attr "type" "jump_ind")])
 
+(define_insn_and_split "sibcall_value_pcrel_fdpic"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand"))
+	      (match_operand 2)))
+   (use (reg:SI FPSCR_MODES_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (match_scratch:SI 3 "=k"))
+   (return)]
+  "TARGET_SH2 && TARGET_FDPIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+
+  sh_expand_sym_label2reg (operands[3], operands[1], lab, true);
+  rtx i = emit_call_insn (gen_sibcall_valuei_pcrel_fdpic (operands[0],
+							  operands[3],
+							  operands[2],
+							  copy_rtx (lab)));
+  SIBLING_CALL_P (i) = 1;
+  DONE;
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
 (define_insn "sibcall_value_compact"
   [(set (match_operand 0 "" "=rf,rf")
 	(call (mem:SI (match_operand:SI 1 "register_operand" "k,k"))
@@ -10310,6 +10541,12 @@ 
      (return)])]
   ""
 {
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+    }
+
   if (TARGET_SHMEDIA)
     {
       operands[1] = shmedia_prepare_call_address (operands[1], 1);
@@ -10355,8 +10592,8 @@ 
 	 run out of registers when adjusting fpscr for the call.  */
       emit_insn (gen_force_mode_for_call ());
 
-      operands[1]
-	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+				     SFUNC_GOT).sym;
       operands[1] = force_reg (SImode, operands[1]);
 
       /* We don't need a return trampoline, since the callee will
@@ -10391,15 +10628,27 @@ 
 	 static functions.  */
       && SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
     {
-      emit_call_insn (gen_sibcall_value_pcrel (operands[0],
-					       XEXP (operands[1], 0),
-					       operands[2]));
+      if (TARGET_FDPIC)
+       emit_call_insn (gen_sibcall_value_pcrel_fdpic (operands[0],
+						      XEXP (operands[1], 0),
+						      operands[2]));
+      else
+       emit_call_insn (gen_sibcall_value_pcrel (operands[0],
+						XEXP (operands[1], 0),
+						operands[2]));
       DONE;
     }
   else
     operands[1] = force_reg (SImode, XEXP (operands[1], 0));
 
-  emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
+  if (TARGET_FDPIC)
+    {
+      operands[1] = sh_load_function_descriptor (operands[1]);
+      emit_call_insn (gen_sibcall_valuei_fdpic (operands[0], operands[1],
+						operands[2]));
+    }
+  else
+    emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
   DONE;
 })
 
@@ -10483,7 +10732,7 @@ 
   emit_insn (gen_force_mode_for_call ());
 
   operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
-				 SFUNC_GOT);
+				 SFUNC_GOT).sym;
   operands[1] = force_reg (SImode, operands[1]);
 
   emit_move_insn (r0, func);
@@ -10681,6 +10930,13 @@ 
       DONE;
     }
 
+  if (TARGET_FDPIC)
+    {
+      rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
+      emit_move_insn (pic_reg, sh_get_fdpic_reg_initial_val ());
+      DONE;
+    }
+
   operands[1] = gen_rtx_REG (Pmode, PIC_REG);
   operands[2] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
 
@@ -10816,6 +11072,9 @@ 
   rtx mem;
   bool stack_chk_guard_p = false;
 
+  rtx picreg = TARGET_FDPIC ? sh_get_fdpic_reg_initial_val ()
+			    : gen_rtx_REG (Pmode, PIC_REG);
+
   operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
   operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
 
@@ -10858,8 +11117,7 @@ 
   if (stack_chk_guard_p)
     emit_insn (gen_chk_guard_add (operands[3], operands[2]));
   else
-    emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2],
-					       gen_rtx_REG (Pmode, PIC_REG)));
+    emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2], picreg));
 
   /* N.B. This is not constant for a GOTPLT relocation.  */
   mem = gen_rtx_MEM (Pmode, operands[3]);
@@ -10890,6 +11148,23 @@ 
   DONE;
 })
 
+(define_expand "sym2GOTFUNCDESC"
+  [(const (unspec [(match_operand 0)] UNSPEC_GOTFUNCDESC))]
+  "TARGET_FDPIC")
+
+(define_expand "symGOTFUNCDESC2reg"
+  [(match_operand 0) (match_operand 1)]
+  "TARGET_FDPIC"
+{
+  rtx gotsym = gen_sym2GOTFUNCDESC (operands[1]);
+  PUT_MODE (gotsym, Pmode);
+  rtx insn = emit_insn (gen_symGOT_load (operands[0], gotsym));
+
+  MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+  DONE;
+})
+
 (define_expand "symGOTPLT2reg"
   [(match_operand 0 "" "") (match_operand 1 "" "")]
   ""
@@ -10916,12 +11191,13 @@ 
 	   ? operands[0]
 	   : gen_reg_rtx (GET_MODE (operands[0])));
 
+  rtx picreg = TARGET_FDPIC ? sh_get_fdpic_reg_initial_val ()
+			    : gen_rtx_REG (Pmode, PIC_REG);
+
   gotoffsym = gen_sym2GOTOFF (operands[1]);
   PUT_MODE (gotoffsym, Pmode);
   emit_move_insn (t, gotoffsym);
-  insn = emit_move_insn (operands[0],
-			 gen_rtx_PLUS (Pmode, t,
-				       gen_rtx_REG (Pmode, PIC_REG)));
+  insn = emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
 
   set_unique_reg_note (insn, REG_EQUAL, operands[1]);
 
@@ -10928,6 +11204,26 @@ 
   DONE;
 })
 
+(define_expand "sym2GOTOFFFUNCDESC"
+  [(const (unspec [(match_operand 0)] UNSPEC_GOTOFFFUNCDESC))]
+  "TARGET_FDPIC")
+
+(define_expand "symGOTOFFFUNCDESC2reg"
+  [(match_operand 0) (match_operand 1)]
+  "TARGET_FDPIC"
+{
+  rtx picreg = sh_get_fdpic_reg_initial_val ();
+  rtx t = !can_create_pseudo_p ()
+	  ? operands[0]
+	  : gen_reg_rtx (GET_MODE (operands[0]));
+
+  rtx gotoffsym = gen_sym2GOTOFFFUNCDESC (operands[1]);
+  PUT_MODE (gotoffsym, Pmode);
+  emit_move_insn (t, gotoffsym);
+  emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, picreg));
+  DONE;
+})
+
 (define_expand "symPLT_label2reg"
   [(set (match_operand:SI 0 "" "")
 	(const:SI
@@ -12678,11 +12974,14 @@ 
 (define_insn "block_move_real"
   [(parallel [(set (mem:BLK (reg:SI R4_REG))
 		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+	      (use (match_operand 1 "" "Z,Ccl"))
 	      (clobber (reg:SI PR_REG))
 	      (clobber (reg:SI R0_REG))])]
   "TARGET_SH1 && ! TARGET_HARD_SH4"
-  "jsr	@%0%#"
+  "@
+	jsr	@%0%#
+	bsrf	%0\n%O1:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -12689,7 +12988,8 @@ 
 (define_insn "block_lump_real"
   [(parallel [(set (mem:BLK (reg:SI R4_REG))
 		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+	      (use (match_operand 1 "" "Z,Ccl"))
 	      (use (reg:SI R6_REG))
 	      (clobber (reg:SI PR_REG))
 	      (clobber (reg:SI T_REG))
@@ -12698,7 +12998,9 @@ 
 	      (clobber (reg:SI R6_REG))
 	      (clobber (reg:SI R0_REG))])]
   "TARGET_SH1 && ! TARGET_HARD_SH4"
-  "jsr	@%0%#"
+  "@
+	jsr	@%0%#
+	bsrf	%0\n%O1:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -12705,13 +13007,16 @@ 
 (define_insn "block_move_real_i4"
   [(parallel [(set (mem:BLK (reg:SI R4_REG))
 		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+	      (use (match_operand 1 "" "Z,Ccl"))
 	      (clobber (reg:SI PR_REG))
 	      (clobber (reg:SI R0_REG))
 	      (clobber (reg:SI R1_REG))
 	      (clobber (reg:SI R2_REG))])]
   "TARGET_HARD_SH4"
-  "jsr	@%0%#"
+  "@
+	jsr	@%0%#
+	bsrf	%0\n%O1:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
@@ -12718,7 +13023,8 @@ 
 (define_insn "block_lump_real_i4"
   [(parallel [(set (mem:BLK (reg:SI R4_REG))
 		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+	      (use (match_operand 1 "" "Z,Ccl"))
 	      (use (reg:SI R6_REG))
 	      (clobber (reg:SI PR_REG))
 	      (clobber (reg:SI T_REG))
@@ -12730,7 +13036,9 @@ 
 	      (clobber (reg:SI R2_REG))
 	      (clobber (reg:SI R3_REG))])]
   "TARGET_HARD_SH4"
-  "jsr	@%0%#"
+  "@
+	jsr	@%0%#
+	bsrf	%0\n%O1:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
Index: gcc/config/sh/sh.opt
===================================================================
--- gcc/config/sh/sh.opt	(revision 229290)
+++ gcc/config/sh/sh.opt	(working copy)
@@ -260,6 +260,10 @@ 
 Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
 Specify name for 32 bit signed division function.
 
+mfdpic
+Target Report Var(TARGET_FDPIC) Init(0)
+Generate ELF FDPIC code
+
 mfmovd
 Target RejectNegative Mask(FMOVD)
 Enable the use of 64-bit floating point registers in fmov instructions.  See -mdalign if 64-bit alignment is required.
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc	(revision 229290)
+++ gcc/config.gcc	(working copy)
@@ -2628,6 +2628,9 @@ 
 	tm_file="${tm_file} dbxelf.h elfos.h sh/elf.h"
 	case ${target} in
 	sh*-*-linux*)	tmake_file="${tmake_file} sh/t-linux"
+			if test x$enable_fdpic = xyes; then
+				tm_defines="$tm_defines FDPIC_DEFAULT=1"
+			fi
 			tm_file="${tm_file} gnu-user.h linux.h glibc-stdint.h sh/linux.h" ;;
 	sh*-*-netbsd*)
 			tm_file="${tm_file} netbsd.h netbsd-elf.h sh/netbsd-elf.h"
Index: gcc/doc/install.texi
===================================================================
--- gcc/doc/install.texi	(revision 229290)
+++ gcc/doc/install.texi	(working copy)
@@ -1810,6 +1810,9 @@ 
 128-bit @code{long double} when built against GNU C Library 2.4 and later,
 64-bit @code{long double} otherwise.
 
+@item --enable-fdpic
+On SH Linux systems, generate ELF FDPIC code.
+
 @item --with-gmp=@var{pathname}
 @itemx --with-gmp-include=@var{pathname}
 @itemx --with-gmp-lib=@var{pathname}
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 229290)
+++ gcc/doc/invoke.texi	(working copy)
@@ -21244,6 +21244,10 @@ 
 Prefer zero-displacement conditional branches for conditional move instruction
 patterns.  This can result in faster code on the SH4 processor.
 
+@item -mfdpic
+@opindex fdpic
+Generate code using the FDPIC ABI.
+
 @end table
 
 @node Solaris 2 Options
Index: include/longlong.h
===================================================================
--- include/longlong.h	(revision 229290)
+++ include/longlong.h	(working copy)
@@ -1102,6 +1102,11 @@ 
 /* This is the same algorithm as __udiv_qrnnd_c.  */
 #define UDIV_NEEDS_NORMALIZATION 1
 
+#ifdef __FDPIC__
+/* FDPIC needs a special version of the asm fragment to extract the
+   code address from the function descriptor. __udiv_qrnnd_16 is
+   assumed to be local and not to use the GOT, so loading r12 is
+   not needed. */
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
@@ -1108,6 +1113,28 @@ 
 			__attribute__ ((visibility ("hidden")));	\
     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
     __asm__ (								\
+	"mov%M4	%4,r5\n"						\
+"	swap.w	%3,r4\n"						\
+"	swap.w	r5,r6\n"						\
+"	mov.l	@%5,r2\n"						\
+"	jsr	@r2\n"							\
+"	shll16	r6\n"							\
+"	swap.w	r4,r4\n"						\
+"	mov.l	@%5,r2\n"						\
+"	jsr	@r2\n"							\
+"	swap.w	r1,%0\n"						\
+"	or	r1,%0"							\
+	: "=r" (q), "=&z" (r)						\
+	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
+	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
+  } while (0)
+#else
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
+			__attribute__ ((visibility ("hidden")));	\
+    /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
+    __asm__ (								\
 	"mov%M4 %4,r5\n"						\
 "	swap.w %3,r4\n"							\
 "	swap.w r5,r6\n"							\
@@ -1121,6 +1148,7 @@ 
 	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
 	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
   } while (0)
+#endif /* __FDPIC__  */
 
 #define UDIV_TIME 80
 
Index: libitm/config/sh/sjlj.S
===================================================================
--- libitm/config/sh/sjlj.S	(revision 229290)
+++ libitm/config/sh/sjlj.S	(working copy)
@@ -58,9 +58,6 @@ 
 	jsr	@r1
 	 mov	r15, r5
 #else
-	mova	.Lgot, r0
-	mov.l	.Lgot, r12
-	add	r0, r12
 	mov.l	.Lbegin, r1
 	bsrf	r1
 	 mov	r15, r5
@@ -79,14 +76,12 @@ 
 	 nop
 	cfi_endproc
 
-        .align  2
-.Lgot:
-	.long	_GLOBAL_OFFSET_TABLE_
+	.align  2
 .Lbegin:
 #if defined HAVE_ATTRIBUTE_VISIBILITY || !defined __PIC__
 	.long	GTM_begin_transaction
 #else
-	.long	GTM_begin_transaction@PLT-(.Lbegin0-.)
+	.long	GTM_begin_transaction@PCREL-(.Lbegin0-.)
 #endif
 	.size	_ITM_beginTransaction, . - _ITM_beginTransaction